From 5e9a748678e9cc5fce5d43d0c6663ff1463db07c Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Thu, 29 Nov 2018 03:48:52 -0500 Subject: [PATCH] net_failover: delay taking over primary device to accommodate udevd renaming There is an inherent race with udev rename in userspace due to the exposure of two lower slave devices while kernel attempts to manage the creation for failover bonding itself automatically. The existing userspace naming logic in udevd was not specifically written for this in-kernel automagic. The clean fix for the problem is either to update the udevd to not try rename the 3-netdev (ideally rename the device in a coordinated manner), or to fix the kernel to hide the 2 lower devices which does not have to be shown to userspace unless needed (1-netdev model). However, our pursuance of 1-netdev model has not been acknowledged by upstream, and there's no motivation in the systemd/udevd community at this point to refactor the rename logic and make it work well with 3-netdev. Hyper-V's netvsc mitigated this by postponing the VF's dev_open() to allow a userspace thread to rename the device within a 100ms worth of window. For the interim, we follow the same as done by netvsc to avoid the renaming failure, until we move to the point where a clean solution is available in upstream. OraBug: 28938696 Signed-off-by: Si-Wei Liu --- drivers/net/net_failover.c | 71 ++++++++++++++++++++++++++++++++++++++++------ include/net/net_failover.h | 6 ++++ 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index 0774525..8ce2e47 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -28,6 +28,10 @@ #include #include +#define TAKEOVER_DELAY_DEFAULT 100 +static unsigned long takeover_delay = TAKEOVER_DELAY_DEFAULT; +module_param(takeover_delay, ulong, 0); + static bool net_failover_xmit_ready(struct net_device *dev) { return netif_running(dev) && netif_carrier_ok(dev); @@ -500,6 +504,7 @@ static int net_failover_slave_register(struct net_device *slave_dev, { struct net_device *standby_dev, *primary_dev; struct net_failover_info *nfo_info; + bool work_scheduled = false; bool slave_is_standby; u32 orig_mtu; int err; @@ -515,12 +520,20 @@ static int net_failover_slave_register(struct net_device *slave_dev, dev_hold(slave_dev); + slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; + nfo_info = netdev_priv(failover_dev); + if (netif_running(failover_dev)) { - err = dev_open(slave_dev); - if (err && (err != -EBUSY)) { - netdev_err(failover_dev, "Opening slave %s failed err:%d\n", - slave_dev->name, err); - goto err_dev_open; + if (takeover_delay && !slave_is_standby) { + schedule_delayed_work(&nfo_info->takeover, takeover_delay * HZ / 1000); + work_scheduled = true; + } else { + err = dev_open(slave_dev); + if (err && (err != -EBUSY)) { + netdev_err(failover_dev, "Opening slave %s failed err:%d\n", + slave_dev->name, err); + goto err_dev_open; + } } } @@ -533,13 +546,13 @@ static int net_failover_slave_register(struct net_device *slave_dev, if (err) { netdev_err(failover_dev, "Failed to add vlan ids to device %s err:%d\n", slave_dev->name, err); + if (work_scheduled) + cancel_delayed_work(&nfo_info->takeover); goto err_vlan_add; } - nfo_info = netdev_priv(failover_dev); standby_dev = rtnl_dereference(nfo_info->standby_dev); primary_dev = rtnl_dereference(nfo_info->primary_dev); - slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; if (slave_is_standby) { rcu_assign_pointer(nfo_info->standby_dev, slave_dev); @@ -676,11 +689,47 @@ static int net_failover_slave_name_change(struct net_device *slave_dev, /* We need to bring up the slave after the rename by udev in case * open failed with EBUSY when it was registered. */ - dev_open(slave_dev); + if (netif_running(failover_dev)) { + dev_open(slave_dev); + + net_failover_lower_state_changed(slave_dev, + primary_dev, standby_dev); + } return 0; } +static void net_failover_takeover_primary(struct work_struct *w) +{ + struct net_failover_info *nfo_info + = container_of(w, struct net_failover_info, takeover.work); + struct net_device *primary_dev, *standby_dev; + struct net_device *failover_dev; + int err; + + if (!rtnl_trylock()) { + schedule_delayed_work(&nfo_info->takeover, 0); + return; + } + + failover_dev = nfo_info->failover_dev; + primary_dev = rtnl_dereference(nfo_info->primary_dev); + standby_dev = rtnl_dereference(nfo_info->standby_dev); + + if (primary_dev && netif_running(failover_dev)) { + err = dev_open(primary_dev); + if (err) { + netdev_err(failover_dev, "Opening primary %s failed err:%d\n", + primary_dev->name, err); + } else { + net_failover_lower_state_changed(primary_dev, + primary_dev, standby_dev); + } + } + + rtnl_unlock(); +} + static struct failover_ops net_failover_ops = { .slave_pre_register = net_failover_slave_pre_register, .slave_register = net_failover_slave_register, @@ -707,6 +756,7 @@ static int net_failover_slave_name_change(struct net_device *slave_dev, struct failover *net_failover_create(struct net_device *standby_dev) { struct device *dev = standby_dev->dev.parent; + struct net_failover_info *nfo_info; struct net_device *failover_dev; struct failover *failover; int err; @@ -758,6 +808,9 @@ struct failover *net_failover_create(struct net_device *standby_dev) } netif_carrier_off(failover_dev); + nfo_info = netdev_priv(failover_dev); + nfo_info->failover_dev = failover_dev; + INIT_DELAYED_WORK(&nfo_info->takeover, net_failover_takeover_primary); failover = failover_register(failover_dev, &net_failover_ops); if (IS_ERR(failover)) @@ -797,6 +850,8 @@ void net_failover_destroy(struct failover *failover) failover_dev = rcu_dereference(failover->failover_dev); nfo_info = netdev_priv(failover_dev); + cancel_delayed_work_sync(&nfo_info->takeover); + netif_device_detach(failover_dev); rtnl_lock(); diff --git a/include/net/net_failover.h b/include/net/net_failover.h index b12a1c4..3cd0a61 100644 --- a/include/net/net_failover.h +++ b/include/net/net_failover.h @@ -25,6 +25,12 @@ struct net_failover_info { /* spinlock while updating stats */ spinlock_t stats_lock; + + /* back reference to associated net_device */ + struct net_device *failover_dev; + + /* delayed work to take over primary netdev */ + struct delayed_work takeover; }; struct failover *net_failover_create(struct net_device *standby_dev); -- 1.8.3.1