Comment 11 for bug 1584902

Revision history for this message
Bert JW Regeer (bertjwregeer) wrote :

I grabbed the patch and tried to deploy just a single instance. That single instance came up without issues.

Then using juju add-unit, I started another two nodes. One joined the cluster successfully, the other seems to be hanging on:

Clustering with remote rabbit host (rabbit@juju-machine-17-lxc-0).

Tail end of the JuJu logs:

Reading package lists... Doneter-relation-changed
Building dependency tree r-relation-changed
Reading state information... Donerelation-changed
2016-05-24 14:40:37 INFO worker.uniter.jujuc server.go:173 running hook tool "status-set" ["maintenance" "Clustering with remote rabbit host (rabbit@juju-machine-17-lxc-0)."]
2016-05-24 14:40:37 DEBUG worker.uniter.jujuc server.go:174 hook context id "rabbitmq/3-cluster-relation-changed-383633683969241899"; dir "/var/lib/juju/agents/unit-rabbitmq-3/charm"
2016-05-24 14:40:37 INFO worker.uniter.jujuc server.go:173 running hook tool "juju-log" ["-l" "DEBUG" "Running ['/usr/sbin/rabbitmqctl', 'stop_app']"]
2016-05-24 14:40:37 DEBUG worker.uniter.jujuc server.go:174 hook context id "rabbitmq/3-cluster-relation-changed-383633683969241899"; dir "/var/lib/juju/agents/unit-rabbitmq-3/charm"
2016-05-24 14:40:37 DEBUG juju-log cluster:65: Running ['/usr/sbin/rabbitmqctl', 'stop_app']
2016-05-24 14:40:37 INFO cluster-relation-changed Stopping node 'rabbit@juju-machine-13-lxc-3' ...

It never seems to stop.

Full process list:

root@juju-machine-13-lxc-3:/var/log/juju# ps auxwww
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 1 0.0 0.0 37540 5676 ? Ss 14:36 0:01 /sbin/init
root 48 0.0 0.0 35276 8056 ? Ss 14:36 0:00 /lib/systemd/systemd-journald
root 86 0.0 0.0 274488 6208 ? Ssl 14:36 0:00 /usr/lib/accountsservice/accounts-daemon
syslog 87 0.0 0.0 260632 3244 ? Ssl 14:36 0:00 /usr/sbin/rsyslogd -n
root 88 0.0 0.0 27728 2972 ? Ss 14:36 0:00 /usr/sbin/cron -f
root 92 0.0 0.0 28548 3024 ? Ss 14:36 0:00 /lib/systemd/systemd-logind
daemon 97 0.0 0.0 26044 2284 ? Ss 14:36 0:00 /usr/sbin/atd -f
message+ 98 0.0 0.0 42948 4016 ? Ss 14:36 0:00 /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation
root 103 0.0 0.0 277180 6120 ? Ssl 14:36 0:00 /usr/lib/policykit-1/polkitd --no-debug
root 274 0.0 0.0 65612 6244 ? Ss 14:36 0:00 /usr/sbin/sshd -D
root 278 0.0 0.0 5224 160 ? Ss 14:36 0:00 /sbin/iscsid
root 279 0.0 0.0 5724 3536 ? S<Ls 14:36 0:00 /sbin/iscsid
root 303 0.0 0.0 14476 2180 pts/3 Ss+ 14:36 0:00 /sbin/agetty --noclear --keep-baud pts/3 115200 38400 9600 vt220
root 305 0.0 0.0 14476 2284 lxc/console Ss+ 14:36 0:00 /sbin/agetty --noclear --keep-baud console 115200 38400 9600 vt220
root 308 0.0 0.0 14476 2140 pts/0 Ss+ 14:36 0:00 /sbin/agetty --noclear --keep-baud pts/0 115200 38400 9600 vt220
root 310 0.0 0.0 14476 2180 pts/2 Ss+ 14:36 0:00 /sbin/agetty --noclear --keep-baud pts/2 115200 38400 9600 vt220
root 311 0.0 0.0 14476 2124 pts/1 Ss+ 14:36 0:00 /sbin/agetty --noclear --keep-baud pts/1 115200 38400 9600 vt220
root 681 0.0 0.0 19696 3304 ? Ss 14:36 0:00 bash /var/lib/juju/init/jujud-machine-13-lxc-3/exec-start.sh
root 685 0.0 0.0 1649380 58932 ? Sl 14:36 0:00 /var/lib/juju/tools/machine-13-lxc-3/jujud machine --data-dir /var/lib/juju --machine-id 13/lxc/3 --debug
root 770 0.0 0.0 19696 3280 ? Ss 14:36 0:00 bash /var/lib/juju/init/jujud-unit-rabbitmq-3/exec-start.sh
root 774 0.0 0.0 1568768 61784 ? Sl 14:36 0:02 /var/lib/juju/tools/unit-rabbitmq-3/jujud unit --data-dir /var/lib/juju --unit-name rabbitmq/3 --debug
rabbitmq 16455 0.0 0.0 4508 716 ? Ss 14:39 0:00 /bin/sh /usr/sbin/rabbitmq-server
rabbitmq 16463 0.0 0.0 4508 1756 ? S 14:39 0:00 /bin/sh -e /usr/lib/rabbitmq/bin/rabbitmq-server
rabbitmq 16570 0.0 0.0 26304 2124 ? S 14:39 0:00 /usr/lib/erlang/erts-7.3/bin/epmd -daemon
rabbitmq 16773 0.0 0.0 5258856 106724 ? Sl 14:39 1:52 /usr/lib/erlang/erts-7.3/bin/beam.smp -W w -A 64 -P 1048576 -K true -B i -- -root /usr/lib/erlang -progname erl -- -home /var/lib/rabbitmq -- -pa /usr/lib/rabbitmq/lib/rabbitmq_server-3.5.7/sbin/../ebin -noshell -noinput -s rabbit boot -sname rabbit@juju-machine-13-lxc-3 -boot start_sasl -config /etc/rabbitmq/rabbitmq -kernel inet_default_connect_options [{nodelay,true}] -sasl errlog_type error -sasl sasl_error_logger false -rabbit error_logger {file,"/<email address hidden>"} -rabbit sasl_error_logger {file,"/<email address hidden>"} -rabbit enabled_plugins_file "/etc/rabbitmq/enabled_plugins" -rabbit plugins_dir "/usr/lib/rabbitmq/lib/rabbitmq_server-3.5.7/sbin/../plugins" -rabbit plugins_expand_dir "/var/lib/rabbitmq/mnesia/rabbit@juju-machine-13-lxc-3-plugins-expand" -os_mon start_cpu_sup false -os_mon start_disksup false -os_mon start_memsup false -mnesia dir "/var/lib/rabbitmq/mnesia/rabbit@juju-machine-13-lxc-3" -kernel inet_dist_listen_min 25672 -kernel inet_dist_listen_max 25672
rabbitmq 17005 0.0 0.0 7504 912 ? Ss 14:39 0:11 inet_gethost 4
rabbitmq 17006 0.0 0.0 13856 1992 ? S 14:39 0:23 inet_gethost 4
root 26590 0.0 0.0 129484 60504 ? S 14:40 0:01 /usr/bin/python /var/lib/juju/agents/unit-rabbitmq-3/charm/hooks/cluster-relation-changed
root 27360 0.0 0.0 4508 1576 ? S 14:40 0:00 /bin/sh /usr/sbin/rabbitmqctl join_cluster rabbit@juju-machine-17-lxc-0
root 27369 0.0 0.0 49344 3160 ? S 14:40 0:00 su rabbitmq -s /bin/sh -c /usr/lib/rabbitmq/bin/rabbitmqctl "join_cluster" "rabbit@juju-machine-17-lxc-0"
rabbitmq 27370 0.0 0.0 4508 712 ? Ss 14:40 0:00 sh -c /usr/lib/rabbitmq/bin/rabbitmqctl "join_cluster" "rabbit@juju-machine-17-lxc-0"
rabbitmq 27371 0.0 0.0 1276856 71508 ? Sl 14:40 0:00 /usr/lib/erlang/erts-7.3/bin/beam.smp -- -root /usr/lib/erlang -progname erl -- -home /var/lib/rabbitmq -- -pa /usr/lib/rabbitmq/lib/rabbitmq_server-3.5.7/sbin/../ebin -noshell -noinput -hidden -boot start_clean -sasl errlog_type error -mnesia dir "/var/lib/rabbitmq/mnesia/rabbit@juju-machine-13-lxc-3" -s rabbit_control_main -nodename rabbit@juju-machine-13-lxc-3 -extra join_cluster rabbit@juju-machine-17-lxc-0
rabbitmq 27592 0.0 0.0 7504 896 ? Ss 14:40 0:00 inet_gethost 4
rabbitmq 27593 0.0 0.0 13856 1728 ? S 14:40 0:00 inet_gethost 4
root 28048 0.0 0.0 95460 7108 ? Ss 14:48 0:00 sshd: ubuntu [priv]
ubuntu 28050 0.0 0.0 45108 4872 ? Ss 14:48 0:00 /lib/systemd/systemd --user
ubuntu 28051 0.0 0.0 60992 1664 ? S 14:48 0:00 (sd-pam)
ubuntu 28084 0.0 0.0 95460 4072 ? R 14:48 0:00 sshd: ubuntu@pts/4
ubuntu 28085 0.0 0.0 21176 5028 pts/4 Ss 14:48 0:00 -bash
root 28118 0.0 0.0 55756 3952 pts/4 S 14:48 0:00 sudo su
root 28119 0.0 0.0 51004 3436 pts/4 S 14:48 0:00 su
root 28120 0.0 0.0 19920 3692 pts/4 S 14:48 0:00 bash
root 28391 0.0 0.0 36084 3224 pts/4 R+ 14:50 0:00 ps auxwww