Logical router load-balancer issue with distributed gateway ports
Affects | Status | Importance | Assigned to | Milestone | |
---|---|---|---|---|---|
ovn (Ubuntu) |
New
|
Undecided
|
Unassigned |
Bug Description
Hi,
We noticed incorrect behavior when creating DNAT rules made by the OVN load-balancer engine, and applied to a logical router that has multiple distributed gateway ports (DGP).
The load-balancer creates a lr_in_dnat rule using only the first entrt in the DGP structure to set the port into the 'is_chassis_
For example:
router f657da90-
port r1_s1
mac: "00:de:ad:fe:00:02"
networks: ["172.16.0.1/24"]
port r1_public
mac: "00:de:ad:ff:00:01"
networks: ["173.16.0.1/16"]
gateway chassis: [hv1]
port r1-ts2
mac: "00:00:01:02:03:05"
networks: ["172.2.0.1/24"]
gateway chassis: [hv3]
port r1-ts1
mac: "00:00:01:02:03:04"
networks: ["172.1.0.1/24"]
gateway chassis: [hv2]
If we have a LR "r1" with 3 DGP, the load-balancer engine create the lr_in_dnat rule using the first gateay port created on NB database.
root@ubuntu:~# ovn-sbctl dump-flows | grep 30.0.0.1
table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[2] == 1 && ip4.dst == 30.0.0.1), action=(reg1 = 30.0.0.1; ct_lb_mark;)
table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 30.0.0.1), action=(reg0[1] = 0; ct_lb_mark(
table=5 (lr_in_defrag ), priority=100 , match=(ip && ip4.dst == 30.0.0.1), action=(reg0 = 30.0.0.1; ct_dnat;)
table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 30.0.0.1 && ct_mark.natted == 1 && is_chassis_
table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 30.0.0.1 && is_chassis_
table=
table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[2] == 1 && ip4.dst == 30.0.0.1), action=(reg1 = 30.0.0.1; ct_lb_mark;)
table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 30.0.0.1), action=(reg0[1] = 0; ct_lb_mark(
We noticed the following wanring message in the northd log:
2024-02-
However, since the inclusion of DGP support, the load-balancer applies the rule to gateway ports incompletely, taking LB usage to an invalid state - "making it impossible to use in this scenario".
Steps to reproduce:
vm4 == s2 == r2 == public (LRP's: r1-ts1, r1-ts2, r1_public) == r1 == s1 == vm1
vm4 => test case
vm1, vm2, vm3 => OVN LB backends
ovn-nbctl ls-add public
uuid1=`ovn-nbctl create load_balancer vips:30.
lbg=$(ovn-nbctl create load_balancer_group name=lbg -- \
add load_balancer_group lbg load_balancer $uuid1)
ovn-nbctl --wait=sb add logical_switch public load_balancer_group $lbg
# Router r1
i=1
ovn-nbctl lr-add r1
# Add DGP port TS1 first
ovn-nbctl ls-add ts1
ovn-nbctl lrp-add r1 r1-ts1 00:00:01:02:03:04 172.1.0.1/24
ovn-nbctl lrp-set-
# Add DGP port TS2 first
ovn-nbctl ls-add ts2
ovn-nbctl lrp-add r1 r1-ts2 00:00:01:02:03:05 172.2.0.1/24
ovn-nbctl lrp-set-
# Add public port
ovn-nbctl lrp-add r1 r1_public 00:de:ad:ff:00:01 173.16.0.1/16
ovn-nbctl lrp-add r1 r1_s1 00:de:ad:fe:00:02 172.16.0.1/24
ovn-nbctl lrp-set-
ovn-nbctl --wait=sb add logical_router r1 load_balancer_group $lbg
# s1
ovn-nbctl ls-add s1
ovn-nbctl --wait=sb add logical_switch s1 load_balancer_group $lbg
# s1 - r1
ovn-nbctl lsp-add s1 s1_r1
ovn-nbctl lsp-set-type s1_r1 router
ovn-nbctl lsp-set-addresses s1_r1 "00:de:ad:fe:00:02 172.16.0.1"
ovn-nbctl lsp-set-options s1_r1 router-port=r1_s1
# s1 - vm1
ovn-nbctl lsp-add s1 vm1
ovn-nbctl lsp-set-addresses vm1 "00:de:ad:01:00:01 172.16.0.101"
ovn-nbctl lsp-add s1 vm2
ovn-nbctl lsp-set-addresses vm2 "00:de:ad:01:00:02 172.16.0.102"
ovn-nbctl lsp-add s1 vm3
ovn-nbctl lsp-set-addresses vm3 "00:de:ad:01:00:03 172.16.0.103"
ovn-nbctl lsp-add public public_r1_ts1
ovn-nbctl lsp-set-type public_r1_ts1 router
ovn-nbctl lsp-set-addresses public_r1_ts1 router
ovn-nbctl lsp-set-options public_r1_ts1 router-port=r1-ts1 nat-addresses=
ovn-nbctl lsp-add public public_r1_ts2
ovn-nbctl lsp-set-type public_r1_ts2 router
ovn-nbctl lsp-set-addresses public_r1_ts2 router
ovn-nbctl lsp-set-options public_r1_ts2 router-port=r1-ts2 nat-addresses=
ovn-nbctl lsp-add public public_r1
ovn-nbctl lsp-set-type public_r1 router
ovn-nbctl lsp-set-addresses public_r1 router
ovn-nbctl lsp-set-options public_r1 router-
ovn-nbctl lsp-add public ln_p1
ovn-nbctl lsp-set-addresses ln_p1 unknown
ovn-nbctl lsp-set-type ln_p1 localnet
ovn-nbctl lsp-set-options ln_p1 network_
#add host vm1
ip netns add vm1
ovs-vsctl add-port br-int vm1 -- set interface vm1 type=internal
ip link set vm1 netns vm1
ip netns exec vm1 ip link set vm1 address 00:de:ad:01:00:01
ip netns exec vm1 ip addr add 172.16.0.101/24 dev vm1
ip netns exec vm1 ip link set vm1 up
ovs-vsctl set Interface vm1 external_
ip netns add vm2
ovs-vsctl add-port br-int vm2 -- set interface vm2 type=internal
ip link set vm2 netns vm2
ip netns exec vm2 ip link set vm2 address 00:de:ad:01:00:02
ip netns exec vm2 ip addr add 172.16.0.102/24 dev vm2
ip netns exec vm2 ip link set vm2 up
ovs-vsctl set Interface vm2 external_
ip netns add vm3
ovs-vsctl add-port br-int vm3 -- set interface vm3 type=internal
ip link set vm3 netns vm3
ip netns exec vm3 ip link set vm3 address 00:de:ad:01:00:03
ip netns exec vm3 ip addr add 172.16.0.103/24 dev vm3
ip netns exec vm3 ip link set vm3 up
ovs-vsctl set Interface vm3 external_
ip netns exec vm1 ip route add default via 172.16.0.1
ip netns exec vm2 ip route add default via 172.16.0.1
ip netns exec vm3 ip route add default via 172.16.0.1
# Router r2
ovn-nbctl lr-add r2
ovn-nbctl lrp-add r2 r2_public 00:de:ad:ff:00:02 173.16.0.2/24
ovn-nbctl lrp-add r2 r2_s2 00:de:ad:fe:99:02 10.1.0.1/24
ovn-nbctl lrp-set-
ovn-nbctl ls-add s2
# s2 - r2
ovn-nbctl lsp-add s2 s2_r2
ovn-nbctl lsp-set-type s2_r2 router
ovn-nbctl lsp-set-addresses s2_r2 "00:de:ad:fe:99:02 10.1.0.1"
ovn-nbctl lsp-set-options s2_r2 router-port=r2_s2
# s2 - vm4
ovn-nbctl lsp-add s2 vm4
ovn-nbctl lsp-set-addresses vm4 "00:de:ad:01:99:04 10.1.0.101"
ovn-nbctl lsp-add public public_r2
ovn-nbctl lsp-set-type public_r2 router
ovn-nbctl lsp-set-addresses public_r2 router
ovn-nbctl lsp-set-options public_r2 router-
ip netns add vm4
ovs-vsctl add-port br-int vm4 -- set interface vm4 type=internal
ip link set vm4 netns vm4
ip netns exec vm4 ip link set vm4 address 00:de:ad:01:99:04
ip netns exec vm4 ip addr add 10.1.0.101/24 dev vm4
ip netns exec vm4 ip link set vm4 up
ovs-vsctl set Interface vm4 external_
ovn-nbctl lr-nat-add r2 snat 173.16.0.2 10.1.0.0/24
ovn-nbctl lr-route-add r2 30.0.0.1/32 173.16.0.1
ip netns exec vm4 ip route add default via 10.1.0.1
root@ubuntu:~# ip netns exec vm4 ping 30.0.0.1
PING 30.0.0.1 (30.0.0.1) 56(84) bytes of data.
^C
--- 30.0.0.1 ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 1017ms
root@ubuntu:~#
root@ubuntu:~# ovn-sbctl dump-flows | grep 30.0.0.1
table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[2] == 1 && ip4.dst == 30.0.0.1), action=(reg1 = 30.0.0.1; ct_lb_mark;)
table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 30.0.0.1), action=(reg0[1] = 0; ct_lb_mark(
table=5 (lr_in_defrag ), priority=100 , match=(ip && ip4.dst == 30.0.0.1), action=(reg0 = 30.0.0.1; ct_dnat;)
table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 30.0.0.1 && ct_mark.natted == 1 && is_chassis_
table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 30.0.0.1 && is_chassis_
table=
table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[2] == 1 && ip4.dst == 30.0.0.1), action=(reg1 = 30.0.0.1; ct_lb_mark;)
table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 30.0.0.1), action=(reg0[1] = 0; ct_lb_mark(
Update: Once we apply the proposed fix [1], the ingress traffic to load-balancer works as expected.
[1] https:/ /mail.openvswit ch.org/ pipermail/ ovs-dev/ 2024-February/ 411981. html
root@ubuntu:~# ip netns exec vm4 ping 30.0.0.1 081/0.117/ 0.025 ms
PING 30.0.0.1 (30.0.0.1) 56(84) bytes of data.
64 bytes from 30.0.0.1: icmp_seq=1 ttl=62 time=0.117 ms
64 bytes from 30.0.0.1: icmp_seq=2 ttl=62 time=0.063 ms
64 bytes from 30.0.0.1: icmp_seq=3 ttl=62 time=0.064 ms
^C
--- 30.0.0.1 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2025ms
rtt min/avg/max/mdev = 0.063/0.
root@ubuntu:~# ovn-sbctl dump-flows | grep 30.0.0.1 backends= 172.16. 0.103,172. 16.0.102, 172.16. 0.101); ) resident( "cr-r1- ts1")), action=(next;) resident( "cr-r1- ts2")), action=(next;) resident( "cr-r1_ public" )), action=(next;) resident( "cr-r1- ts1")), action= (ct_lb_ mark(backends= 172.16. 0.103,172. 16.0.102, 172.16. 0.101); ) resident( "cr-r1- ts2")), action= (ct_lb_ mark(backends= 172.16. 0.103,172. 16.0.102, 172.16. 0.101); ) resident( "cr-r1_ public" )), action= (ct_lb_ mark(backends= 172.16. 0.103,172. 16.0.102, 172.16. 0.101); ) 13(lr_in_ ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 30.0.0.1/32), action=(ip.ttl--; reg8[0..15] = 0; reg0 = 173.16.0.1; reg1 = 173.16.0.2; eth.src = 00:de:ad:ff:00:02; outport = "r2_public"; flags.loopback = 1; next;) backends= 172.16. 0.103,172. 16.0.102, 172.16. 0.101); )
table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[2] == 1 && ip4.dst == 30.0.0.1), action=(reg1 = 30.0.0.1; ct_lb_mark;)
table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 30.0.0.1), action=(reg0[1] = 0; ct_lb_mark(
table=5 (lr_in_defrag ), priority=100 , match=(ip && ip4.dst == 30.0.0.1), action=(reg0 = 30.0.0.1; ct_dnat;)
table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 30.0.0.1 && ct_mark.natted == 1 && is_chassis_
table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 30.0.0.1 && ct_mark.natted == 1 && is_chassis_
table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 30.0.0.1 && ct_mark.natted == 1 && is_chassis_
table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 30.0.0.1 && is_chassis_
table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 30.0.0.1 && is_chassis_
table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 30.0.0.1 && is_chassis_
table=
table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[2] == 1 && ip4.dst == 30.0.0.1), action=(reg1 = 30.0.0.1; ct_lb_mark;)
table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 30.0.0.1), action=(reg0[1] = 0; ct_lb_mark(
root@ubuntu:~#