Comment 53 for bug 722201

Revision history for this message
Rafael David Tinoco (rafaeldtinoco) wrote :

Following https://discourse.ubuntu.com/t/ctdb-create-a-3-node-nfs-ha-backed-by-a-clustered-filesystem/11608:

(c)inaddy@ctdb01:~$ cat /etc/ctdb/public_addresses
192.168.0.1/24 fakeinternal01
192.168.0.2/24 fakeinternal01
192.168.0.3/24 fakeinternal01

(c)inaddy@ctdb01:~$ cat /etc/ctdb/nodes
172.16.0.200
172.16.0.201
172.16.0.202

(c)inaddy@ctdb01:~$ onnode -p all systemctl stop ctdb

(c)inaddy@ctdb01:~$ onnode -p all "ctdb event script enable legacy 60.nfs"
(c)inaddy@ctdb01:~$ onnode -p all "ctdb event script enable legacy 06.nfs"

(c)inaddy@ctdb01:~$ onnode -p 0 "systemctl start ctdb"

(c)inaddy@ctdb01:~$ ctdb status
Number of nodes:3
pnn:0 172.16.0.200 UNHEALTHY (THIS NODE)
pnn:1 172.16.0.201 DISCONNECTED|UNHEALTHY|INACTIVE
pnn:2 172.16.0.202 DISCONNECTED|UNHEALTHY|INACTIVE
Generation:126780233
Size:1
hash:0 lmaster:0
Recovery mode:NORMAL (0)
Recovery master:0

(c)inaddy@ctdb01:~$ sudo tail -f /var/log/ctdb/log.ctdb
2019/08/30 18:35:54.015358 ctdb-recoverd[3872]: Unassigned IP 192.168.0.3 can be served by this node
2019/08/30 18:35:54.015421 ctdb-recoverd[3872]: Unassigned IP 192.168.0.2 can be served by this node
2019/08/30 18:35:54.015438 ctdb-recoverd[3872]: Unassigned IP 192.168.0.1 can be served by this node
2019/08/30 18:35:54.015518 ctdb-recoverd[3872]: Trigger takeoverrun
2019/08/30 18:35:54.015618 ctdb-recoverd[3872]: Takeover run starting
2019/08/30 18:35:54.018475 ctdbd[3817]: Takeover of IP 192.168.0.3/24 on interface fakeinternal01
2019/08/30 18:35:54.018556 ctdbd[3817]: Takeover of IP 192.168.0.2/24 on interface fakeinternal01
2019/08/30 18:35:54.018597 ctdbd[3817]: Takeover of IP 192.168.0.1/24 on interface fakeinternal01
2019/08/30 18:35:54.133116 ctdb-eventd[3819]: 60.nfs: Reconfiguring service "nfs-kernel-server"...
2019/08/30 18:35:54.133351 ctdb-recoverd[3872]: Takeover run completed successfully

(c)inaddy@ctdb01:~$ ctdb status
Number of nodes:3
pnn:0 172.16.0.200 OK (THIS NODE)
pnn:1 172.16.0.201 DISCONNECTED|UNHEALTHY|INACTIVE
pnn:2 172.16.0.202 DISCONNECTED|UNHEALTHY|INACTIVE
Generation:126780233
Size:1
hash:0 lmaster:0
Recovery mode:NORMAL (0)
Recovery master:0

(c)inaddy@ctdb01:~$ onnode -p 1 "systemctl start ctdb"
(c)inaddy@ctdb01:~$ onnode -p 2 "systemctl start ctdb"

(c)inaddy@ctdb01:~$ ctdb status
Number of nodes:3
pnn:0 172.16.0.200 OK (THIS NODE)
pnn:1 172.16.0.201 OK
pnn:2 172.16.0.202 OK
Generation:1337933514
Size:3
hash:0 lmaster:0
hash:1 lmaster:1
hash:2 lmaster:2
Recovery mode:NORMAL (0)
Recovery master:0

(c)inaddy@ctdb01:~$ onnode -p all "systemctl status ctdb" | grep Active
[172.16.0.200] Active: active (running) since Fri 2019-08-30 18:35:31 UTC; 6min ago
[172.16.0.201] Active: active (running) since Fri 2019-08-30 18:40:35 UTC; 1min 26s ago
[172.16.0.202] Active: active (running) since Fri 2019-08-30 18:40:39 UTC; 1min 22s ago

(c)inaddy@ctdb01:~$ onnode -p all "ip addr show fakeinternal01 | grep 192.168.0"
[172.16.0.201] inet 192.168.0.2/24 brd 192.168.0.255 scope global fakeinternal01
[172.16.0.202] inet 192.168.0.3/24 brd 192.168.0.255 scope global fakeinternal01
[172.16.0.200] inet 192.168.0.1/24 brd 192.168.0.255 scope global fakeinternal01

(c)inaddy@ctdb01:~$ sudo mount -t nfs -o vers=3 ctdb01.public:/home/inaddy/work /mnt
(c)inaddy@ctdb01:~$ sudo umount /mnt
(c)inaddy@ctdb01:~$ sudo mount -t nfs -o vers=3 ctdb02.public:/home/inaddy/work /mnt
(c)inaddy@ctdb01:~$ sudo umount /mnt
(c)inaddy@ctdb01:~$ sudo mount -t nfs -o vers=3 ctdb03.public:/home/inaddy/work /mnt
(c)inaddy@ctdb01:~$ sudo umount /mnt

# failover:

(c)inaddy@ctdb01:~$ onnode -p all "hostname ; ip addr show fakeinternal01 | egrep -E '(ctdb0|192.168.0)'"
[172.16.0.201] ctdb02
[172.16.0.201] inet 192.168.0.2/24 brd 192.168.0.255 scope global fakeinternal01
[172.16.0.200] ctdb01
[172.16.0.200] inet 192.168.0.1/24 brd 192.168.0.255 scope global fakeinternal01
[172.16.0.202] ctdb03
[172.16.0.202] inet 192.168.0.3/24 brd 192.168.0.255 scope global fakeinternal01

(c)inaddy@ctdb01:~$ sudo mount -t nfs -o vers=3 ctdb03.public:/home/inaddy/work /mnt

(c)inaddy@ctdb01:~$ ping -c 1 ctdb03.public
PING ctdb03.public (192.168.0.3) 56(84) bytes of data.
64 bytes from ctdb03.public (192.168.0.3): icmp_seq=1 ttl=64 time=0.030 ms

# in parallel:

(c)inaddy@ctdb03:~$ systemctl stop ctdb

# HA works:

(c)inaddy@ctdb01:/mnt$ while true; do sleep 2; dd if=/dev/zero of=./file bs=4k count=1; done
1+0 records in
1+0 records out
4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00330109 s, 1.2 MB/s
1+0 records in
1+0 records out
4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00335382 s, 1.2 MB/s
1+0 records in
1+0 records out
4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00434941 s, 942 kB/s
1+0 records in
1+0 records out

(c)inaddy@ctdb01:/mnt$ onnode -p all "ip addr show fakeinternal01 | grep 192.168.0"
[172.16.0.200] inet 192.168.0.1/24 brd 192.168.0.255 scope global fakeinternal01
[172.16.0.200] inet 192.168.0.3/24 brd 192.168.0.255 scope global secondary fakeinternal01
[172.16.0.201] inet 192.168.0.2/24 brd 192.168.0.255 scope global fakeinternal01