Garbd - errors in init script stop/start functions

Bug #1367956 reported by Przemek
8
This bug affects 1 person
Affects Status Importance Assigned to Milestone
Percona XtraDB Cluster moved to https://jira.percona.com/projects/PXC
Status tracked in 5.6
5.5
Fix Released
Undecided
Unassigned
5.6
Fix Released
Undecided
Unassigned

Bug Description

The /etc/init.d/garb fails to stop the daemon and often reports start as failed when actually started just fine.
Checked on both Percona-XtraDB-Cluster-garbd-2-2.11-1.2675.rhel6 and Percona-XtraDB-Cluster-garbd-3-3.7-1.3254.rhel6 on Centos 6.5. Haven't checked Debian/Ubuntu yet.

[root@percona33 ~]# cat /etc/sysconfig/garb
# Copyright (C) 2012 Coedership Oy
# This config file is to be sourced by garb service script.

# A space-separated list of node addresses (address[:port]) in the cluster
GALERA_NODES="192.168.4.20:4567"

# Galera cluster name, should be the same as on the rest of the nodes.
GALERA_GROUP="PrzemekPXC"

# Optional Galera internal options string (e.g. SSL settings)
# see http://www.codership.com/wiki/doku.php?id=galera_parameters
GALERA_OPTIONS="gmcast.listen_addr=tcp://192.168.4.40:4777"

# Log file for garbd. Optional, by default logs to syslog
LOG_FILE="/var/log/garbd.log"

[root@percona33 ~]# /etc/init.d/garb status
garbd is stopped

[root@percona33 ~]# /etc/init.d/garb start
Starting /usr/bin/garbd: [FAILED]

[root@percona33 ~]# echo $?
0

[root@percona33 ~]# /etc/init.d/garb status
garbd (pid 11154) is running...

[root@percona33 ~]# ps -ef |grep garb
nobody 11154 1 0 22:09 ? 00:00:00 /usr/bin/garbd -d -a gcomm://192.168.4.20:4567 -g PrzemekPXC -o gmcast.listen_addr=tcp://192.168.4.40:4777 -l /var/log/garbd.log

[root@percona33 ~]# /etc/init.d/garb stop
[root@percona33 ~]# in/garbd: [ OK ]
[root@percona33 ~]# /etc/init.d/garb status
garbd (pid 11154) is running...

[root@percona33 ~]# bash -x /etc/init.d/garb stop
+ '[' -f /etc/redhat-release ']'
+ . /etc/init.d/functions
++ TEXTDOMAIN=initscripts
++ umask 022
++ PATH=/sbin:/usr/sbin:/bin:/usr/bin
++ export PATH
++ '[' -z '' ']'
++ COLUMNS=80
++ '[' -z '' ']'
+++ /sbin/consoletype
++ CONSOLETYPE=pty
++ '[' -f /etc/sysconfig/i18n -a -z '' -a -z '' ']'
++ . /etc/profile.d/lang.sh
++ unset LANGSH_SOURCED
++ '[' -z '' ']'
++ '[' -f /etc/sysconfig/init ']'
++ . /etc/sysconfig/init
+++ BOOTUP=color
+++ RES_COL=60
+++ MOVE_TO_COL='echo -en \033[60G'
+++ SETCOLOR_SUCCESS='echo -en \033[0;32m'
+++ SETCOLOR_FAILURE='echo -en \033[0;31m'
+++ SETCOLOR_WARNING='echo -en \033[0;33m'
+++ SETCOLOR_NORMAL='echo -en \033[0;39m'
+++ PROMPT=yes
+++ AUTOSWAP=no
+++ ACTIVE_CONSOLES='/dev/tty[1-6]'
+++ SINGLE=/sbin/sushell
++ '[' pty = serial ']'
++ __sed_discard_ignored_files='/\(~\|\.bak\|\.orig\|\.rpmnew\|\.rpmorig\|\.rpmsave\)$/d'
+ . /etc/sysconfig/network
++ NETWORKING=yes
++ HOSTNAME=percona33
+ config=/etc/sysconfig/garb
+ PIDFILE=/var/run/garbd
++ which garbd
+ prog=/usr/bin/garbd
+ case "$1" in
+ stop
+ '[' 0 '!=' 0 ']'
+ '[' -r /var/run/garbd ']'
+ return 3
+ exit 3

[root@percona33 ~]# lsof -p 11154|grep garb
garbd 11154 nobody cwd DIR 8,1 4096 2 /
garbd 11154 nobody rtd DIR 8,1 4096 2 /
garbd 11154 nobody txt REG 8,1 1413080 3067 /usr/bin/garbd
garbd 11154 nobody mem REG 8,1 122040 3593 /lib64/libselinux.so.1
garbd 11154 nobody mem REG 8,1 110960 3084 /lib64/libresolv-2.12.so
garbd 11154 nobody mem REG 8,1 10192 5175 /lib64/libkeyutils.so.1.3
garbd 11154 nobody mem REG 8,1 43728 5184 /lib64/libkrb5support.so.0.1
garbd 11154 nobody mem REG 8,1 88600 3484 /lib64/libz.so.1.2.3
garbd 11154 nobody mem REG 8,1 19536 3064 /lib64/libdl-2.12.so
garbd 11154 nobody mem REG 8,1 174840 5180 /lib64/libk5crypto.so.3.1
garbd 11154 nobody mem REG 8,1 14664 3565 /lib64/libcom_err.so.2.1
garbd 11154 nobody mem REG 8,1 941920 5182 /lib64/libkrb5.so.3.3
garbd 11154 nobody mem REG 8,1 277704 5176 /lib64/libgssapi_krb5.so.2.2
garbd 11154 nobody mem REG 8,1 1921096 3058 /lib64/libc-2.12.so
garbd 11154 nobody mem REG 8,1 90880 22 /lib64/libgcc_s-4.4.7-20120601.so.1
garbd 11154 nobody mem REG 8,1 596264 3066 /lib64/libm-2.12.so
garbd 11154 nobody mem REG 8,1 987096 3759 /usr/lib64/libstdc++.so.6.0.13
garbd 11154 nobody mem REG 8,1 1950976 5215 /usr/lib64/libcrypto.so.1.0.1e
garbd 11154 nobody mem REG 8,1 441112 11293 /usr/lib64/libssl.so.1.0.1e
garbd 11154 nobody mem REG 8,1 43832 3086 /lib64/librt-2.12.so
garbd 11154 nobody mem REG 8,1 142640 3082 /lib64/libpthread-2.12.so
garbd 11154 nobody mem REG 8,1 154520 3050 /lib64/ld-2.12.so
garbd 11154 nobody 0r CHR 1,3 0t0 3782 /dev/null
garbd 11154 nobody 1r CHR 1,3 0t0 3782 /dev/null
garbd 11154 nobody 2r CHR 1,3 0t0 3782 /dev/null
garbd 11154 nobody 3w REG 8,1 4021 130819 /var/log/garbd.log
garbd 11154 nobody 4u REG 0,9 0 3780 [eventpoll]
garbd 11154 nobody 5u REG 0,9 0 3780 [timerfd]
garbd 11154 nobody 6u REG 0,9 0 3780 [eventfd]
garbd 11154 nobody 7u IPv4 109491 0t0 TCP percona33:4777 (LISTEN)
garbd 11154 nobody 8u IPv4 109494 0t0 TCP percona33:53714->percona11:tram (ESTABLISHED)
garbd 11154 nobody 9u IPv4 109495 0t0 TCP percona33:4777->percona22:45835 (ESTABLISHED)
garbd 11154 nobody 10u IPv4 109499 0t0 TCP percona33:4777->percona33:54248 (ESTABLISHED)

[root@percona33 ~]# kill 11154
[root@percona33 ~]# /etc/init.d/garb status
garbd is stopped

Seems like the script is checking pid file which does not exist.

Revision history for this message
Nilnandan Joshi (nilnandan-joshi) wrote :

Able to reproduce the same with Percona-XtraDB-Cluster-garbd-3.x86_64 0:3.7-1.3254.rhel6. Script is not working properly.

[root@percona-pxc55-1 mysql]#
[root@percona-pxc55-1 mysql]# /etc/init.d/garb start
Starting /usr/bin/garbd: [FAILED]
[root@percona-pxc55-1 mysql]# /etc/init.d/garb status
garbd (pid 1971) is running...
[root@percona-pxc55-1 mysql]#
[root@percona-pxc55-1 mysql]# ps -ef | grep -i garbd
nobody 1971 1 0 22:40 ? 00:00:00 /usr/bin/garbd -d -a gcomm://192.168.43.60:4567 -g my_centos_cluster -o gmcast.listen_addr=tcp://192.168.43.60:4777 -l /var/lib/mysql/garbd.log
root 1981 1586 0 22:40 pts/2 00:00:00 grep -i garbd
[root@percona-pxc55-1 mysql]#
[root@percona-pxc55-1 mysql]# /etc/init.d/garb stop
[root@percona-pxc55-1 mysql]# [ OK ]
[root@percona-pxc55-1 mysql]# /etc/init.d/garb status
garbd (pid 1971) is running...
[root@percona-pxc55-1 mysql]#

Changed in percona-xtradb-cluster:
status: New → Confirmed
Revision history for this message
Przemek (pmalkowski) wrote :

The same issue with Garb 2 in PXC 5.5.39:

[root@percona3 ~]# rpm -qa|grep -i garb
Percona-XtraDB-Cluster-garbd-2-2.11-1.2675.rhel6.x86_64

[root@percona3 ~]# ps -ef|grep garb
root 8868 8413 0 14:56 pts/0 00:00:00 grep garb
[root@percona3 ~]# service garb status
garbd is stopped
[root@percona3 ~]# echo $?
3
[root@percona3 ~]# service garb start
Starting /usr/bin/garbd: [FAILED]
[root@percona3 ~]# echo $?
0
[root@percona3 ~]# service garb status
garbd (pid 8905) is running...
[root@percona3 ~]# service garb start
/usr/bin/garbd is already running with PID [FAILED]
[root@percona3 ~]# echo $?
3
[root@percona3 ~]# ps -ef|grep garb
nobody 8905 1 0 14:56 ? 00:00:00 /usr/bin/garbd -d -a gcomm://192.168.90.3:4567 -g PXC1 -o gmcast.listen_addr=tcp://0.0.0.0:4500

Revision history for this message
Raghavendra D Prabhu (raghavendra-prabhu) wrote :

The question is why the PID file not created? Can you obtain xtrace (bash -x) of service garb start? The stop is exiting earlier since pid file doesn't exist or has been removed.

Revision history for this message
Marco Schirrmeister (marcoschirrmeister) wrote :
Download full text (4.4 KiB)

In my case the PIDFILE is created, but it is empty. bash -x output is below for a start.
I only see this behaviour on one arbiter. On other that I have, it works always.
If I put in a sleep for 1 second for example, before the pid is gathered and written to the pid file, it works also always on this problematic node.

garb version is Percona-XtraDB-Cluster-garbd-2-2.11-1.2675.rhel6.x86_64

[root@arbiter40 ~]# bash -x /etc/init.d/garb start
+ '[' -f /etc/redhat-release ']'
+ . /etc/init.d/functions
++ TEXTDOMAIN=initscripts
++ umask 022
++ PATH=/sbin:/usr/sbin:/bin:/usr/bin
++ export PATH
++ '[' -z '' ']'
++ COLUMNS=80
++ '[' -z '' ']'
+++ /sbin/consoletype
++ CONSOLETYPE=pty
++ '[' -f /etc/sysconfig/i18n -a -z '' -a -z '' ']'
++ . /etc/profile.d/lang.sh
++ unset LANGSH_SOURCED
++ '[' -z '' ']'
++ '[' -f /etc/sysconfig/init ']'
++ . /etc/sysconfig/init
+++ BOOTUP=color
+++ RES_COL=60
+++ MOVE_TO_COL='echo -en \033[60G'
+++ SETCOLOR_SUCCESS='echo -en \033[0;32m'
+++ SETCOLOR_FAILURE='echo -en \033[0;31m'
+++ SETCOLOR_WARNING='echo -en \033[0;33m'
+++ SETCOLOR_NORMAL='echo -en \033[0;39m'
+++ PROMPT=yes
+++ AUTOSWAP=no
+++ ACTIVE_CONSOLES='/dev/tty[1-6]'
+++ SINGLE=/sbin/sushell
++ '[' pty = serial ']'
++ __sed_discard_ignored_files='/\(~\|\.bak\|\.orig\|\.rpmnew\|\.rpmorig\|\.rpmsave\)$/d'
+ . /etc/sysconfig/network
++ NETWORKING=yes
++ NETWORKING_IPV6=no
++ HOSTNAME=arbiter40.exmaple.com
++ GATEWAY=10.28.33.250
+ config=/etc/sysconfig/garb
+ PIDFILE=/var/run/garbd
++ which garbd
+ prog=/usr/bin/garbd
+ case "$1" in
+ start
+ '[' 0 '!=' 0 ']'
+ '[' yes = no ']'
+ grep -q -E '^# REMOVE' /etc/sysconfig/garb
+ '[' -r /var/run/garbd ']'
+ '[' -x /usr/bin/garbd ']'
+ '[' -f /etc/sysconfig/garb ']'
+ . /etc/sysconfig/garb
++ GALERA_NODES='10.6.166.235:4010 10.6.166.221:4010 10.115.20.166:4010 10.115.20.167:4010'
++ GALERA_GROUP=OProdLRCluster
++ GALERA_OPTIONS='evs.keepalive_period=PT3S;evs.suspect_timeout=PT30S;evs.inactive_timeout=PT1M;evs.install_timeout=PT1M;evs.send_window=64;evs.user_send_window=32;evs.debug_log_mask=EVS_DEBUG'
++ LOG_FILE=/var/log/garbd.log
++ GALERA_PORT=4010
+ '[' -z '10.6.166.235:4010 10.6.166.221:4010 10.115.20.166:4010 10.115.20.167:4010' ']'
+ '[' -z OProdLRCluster ']'
+ GALERA_PORT=4010
+ for ADDRESS in '${GALERA_NODES}' 0
++ cut -d : -f 1
++ echo 10.6.166.235:4010
+ HOST=10.6.166.235
++ cut -d : -f 2
++ echo 10.6.166.235:4010
+ PORT=4010
+ PORT=4010
++ which nc
+ [[ -x /usr/bin/nc ]]
+ grep -q -- -z
+ nc -h
+ nc -z 10.6.166.235 4010
+ break
+ '[' 10.6.166.235:4010 == 0 ']'
+ OPTIONS='-d -a gcomm://10.6.166.235:4010'
+ '[' -n OProdLRCluster ']'
+ OPTIONS='-d -a gcomm://10.6.166.235:4010 -g OProdLRCluster'
+ '[' -n 'evs.keepalive_period=PT3S;evs.suspect_timeout=PT30S;evs.inactive_timeout=PT1M;evs.install_timeout=PT1M;evs.send_window=64;evs.user_send_window=32;evs.debug_log_mask=EVS_DEBUG' ']'
+ OPTIONS='-d -a gcomm://10.6.166.235:4010 -g OProdLRCluster -o evs.keepalive_period=PT3S;evs.suspect_timeout=PT30S;evs.inactive_timeout=PT1M;evs.install_timeout=PT1M;evs.send_window=64;evs.user_send_window=32;evs.debug_log_mask=EVS_DEBUG'
+ '[' -n /var/log/garbd.log ']'
+ OPTIONS='-d -a gcomm://10.6.166.235:4010 -g OProdLRCluster -...

Read more...

Revision history for this message
Shahriyar Rzayev (rzayev-sehriyar) wrote :

Percona now uses JIRA for bug reports so this bug report is migrated to: https://jira.percona.com/browse/PXC-1735

To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.