dhcpd not working when only 1 out of 2 controllers have an ipv6 address

Bug #2049950 reported by maasuser1
10
This bug affects 2 people
Affects Status Importance Assigned to Milestone
MAAS
Triaged
High
Unassigned

Bug Description

MAAS stopped providing DHCP service for the default VLAN (DHCP HA enabled with MAAS Regional Controller (main) and MAAS Rack Controller (Secondary)).

On the regional controller, rackd.log keeps showing

```
[TIMESTAMP] maasserver.rack_controller: [critical] Failed configuring DHCP on rack controller 'id:1'.
 Traceback (most recent call last):
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1750, in gotResult
     current_context.run(_inlineCallbacks, r, gen, status)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1740, in _inlineCallbacks
     status.deferred.errback()
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 700, in errback
     self._startRunCallbacks(fail)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
     self._runCallbacks()
 --- <exception caught here> ---
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 857, in _runCallbacks
     current.result = callback( # type: ignore[misc]
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/rack_controller.py", line 281, in <lambda>
     d.addErrback(lambda f: f.trap(NoConnectionsAvailable))
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 451, in trap
     self.raiseException()
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 475, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 857, in _runCallbacks
     current.result = callback( # type: ignore[misc]
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/rack_controller.py", line 300, in unwatch_if_does_not_exist
     f.trap(RackController.DoesNotExist)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 451, in trap
     self.raiseException()
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 475, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1656, in _inlineCallbacks
     result = current_context.run(
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 489, in throwExceptionIntoGenerator
     return g.throw(self.type, self.value, self.tb)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 867, in configure_dhcp
     config = yield deferToDatabase(get_dhcp_configuration, rack_controller)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/threadpool.py", line 244, in inContext
     result = inContext.theWork() # type: ignore[attr-defined]
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/threadpool.py", line 260, in <lambda>
     inContext.theWork = lambda: context.call( # type: ignore[attr-defined]
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/context.py", line 117, in callWithContext
     return self.currentContext().callWithContext(ctx, func, *args, **kw)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/context.py", line 82, in callWithContext
     return func(*args, **kw)
   File "/snap/maas/32469/lib/python3.10/site-packages/provisioningserver/utils/twisted.py", line 856, in callInContext
     return func(*args, **kwargs)
   File "/snap/maas/32469/lib/python3.10/site-packages/provisioningserver/utils/twisted.py", line 203, in wrapper
     result = func(*args, **kwargs)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/utils/orm.py", line 771, in call_within_transaction
     return func_outside_txn(*args, **kwargs)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/utils/orm.py", line 574, in retrier
     return func(*args, **kwargs)
   File "/usr/lib/python3.10/contextlib.py", line 79, in inner
     return func(*args, **kwds)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 786, in get_dhcp_configuration
     config = get_dhcp_configure_for(
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 639, in get_dhcp_configure_for
     peer_name, peer_config, peer_rack = make_failover_peer_config(
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 515, in make_failover_peer_config
     peer_address = get_ip_address_for_rack_controller(
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 258, in get_ip_address_for_rack_controller
     return get_ip_address_for_interface(interface, vlan, ip_version)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 232, in get_ip_address_for_interface
     for ip_address in interface.ip_addresses.all():
 builtins.AttributeError: 'NoneType' object has no attribute 'ip_addresses'

[TIMESTAMP] maasserver.rack_controller: [critical] Failed configuring DHCP on rack controller 'id:102'.
 Traceback (most recent call last):
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1750, in gotResult
     current_context.run(_inlineCallbacks, r, gen, status)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1740, in _inlineCallbacks
     status.deferred.errback()
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 700, in errback
     self._startRunCallbacks(fail)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
     self._runCallbacks()
 --- <exception caught here> ---
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 857, in _runCallbacks
     current.result = callback( # type: ignore[misc]
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/rack_controller.py", line 281, in <lambda>
     d.addErrback(lambda f: f.trap(NoConnectionsAvailable))
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 451, in trap
     self.raiseException()
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 475, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 857, in _runCallbacks
     current.result = callback( # type: ignore[misc]
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/rack_controller.py", line 300, in unwatch_if_does_not_exist
     f.trap(RackController.DoesNotExist)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 451, in trap
     self.raiseException()
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 475, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1656, in _inlineCallbacks
     result = current_context.run(
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/failure.py", line 489, in throwExceptionIntoGenerator
     return g.throw(self.type, self.value, self.tb)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 867, in configure_dhcp
     config = yield deferToDatabase(get_dhcp_configuration, rack_controller)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/threadpool.py", line 244, in inContext
     result = inContext.theWork() # type: ignore[attr-defined]
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/threadpool.py", line 260, in <lambda>
     inContext.theWork = lambda: context.call( # type: ignore[attr-defined]
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/context.py", line 117, in callWithContext
     return self.currentContext().callWithContext(ctx, func, *args, **kw)
   File "/snap/maas/32469/usr/lib/python3/dist-packages/twisted/python/context.py", line 82, in callWithContext
     return func(*args, **kw)
   File "/snap/maas/32469/lib/python3.10/site-packages/provisioningserver/utils/twisted.py", line 856, in callInContext
     return func(*args, **kwargs)
   File "/snap/maas/32469/lib/python3.10/site-packages/provisioningserver/utils/twisted.py", line 203, in wrapper
     result = func(*args, **kwargs)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/utils/orm.py", line 771, in call_within_transaction
     return func_outside_txn(*args, **kwargs)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/utils/orm.py", line 574, in retrier
     return func(*args, **kwargs)
   File "/usr/lib/python3.10/contextlib.py", line 79, in inner
     return func(*args, **kwds)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 786, in get_dhcp_configuration
     config = get_dhcp_configure_for(
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 639, in get_dhcp_configure_for
     peer_name, peer_config, peer_rack = make_failover_peer_config(
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 508, in make_failover_peer_config
     interface_ip_address = get_ip_address_for_rack_controller(
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 258, in get_ip_address_for_rack_controller
     return get_ip_address_for_interface(interface, vlan, ip_version)
   File "/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py", line 232, in get_ip_address_for_interface
     for ip_address in interface.ip_addresses.all():
 builtins.AttributeError: 'NoneType' object has no attribute 'ip_addresses'
```

On the secondary controller, `rackd.log` doesn't produce any errors.

Additional information:

- Currently, we don't have Internet connection for another reason, but it should not affect the DHCP service.
- NTP servers in MAAS were configured in domains: `time.cloudflare.com`, `time.apple.com` and `pool.ntp.org`.

Revision history for this message
maasuser1 (maasuser1) wrote :

Workarounds: go to the default VLAN settings, and change DHCP duty to the main controller only. After that, the main controller will stop producing such error logs, and the DHCP service now working as expected.

Revision history for this message
Javier Fuentes (javier-fs) wrote :

Hello @maasuser1

Thanks for bringing this topic and providing the logs.

What is the MAAS version that you are using?

Did DHCP stop working suddenly or due to some changes in the setup such at adding the secondary rack? In case that it is possible, are you able to reproduce this error in a fresh installation?

Revision history for this message
maasuser1 (maasuser1) wrote :

Hello Javier,

Both controllers run 3.4.0-14321-g.1027c7664 installed from Snap.

The problem happened after we lost Internet access from the upstream, but I'm not sure whether it happened suddenly or not. Unfortunately, I don't have available nodes for reproducing this.

I guess the problem might be the resolution of NTP domains failed due to the Internet outage, but I am not sure whether this is reproducible.

Revision history for this message
Javier Fuentes (javier-fs) wrote :

Could you please provide the full logs of the region and rack controllers?

Revision history for this message
maasuser1 (maasuser1) wrote :

Hi Javier,

Sure. Can I send it to you privately instead of attaching it here, due do privacy/sensitive information?
Many thanks!

Revision history for this message
Javier Fuentes (javier-fs) wrote :

In case that it helps, I link you to the way of sharing documents with the team that r00ta wrote previously in Discourse :)
https://discourse.maas.io/t/cannot-deploy-official-ubuntu-22-04-and-18-04-only-20-04-works-on-multiple-maas-versions/7657/7

Basically:
- if you are a Canonical customer, you should open a case on the customer portal
- if you are a community user since there is no formal agreement it’s up to you to share your logs/data with Canonical and/or the community (email, google drive, dropbox...).

Changed in maas:
status: New → Incomplete
Revision history for this message
maasuser1 (maasuser1) wrote :

Our Internet connection has been recovered, and the problem still exist. I've reproduced the problem and attached the log of `regiond` on the main controller (region + rack).

Revision history for this message
Javier Fuentes (javier-fs) wrote :

Could you run the following command and post back the output?

> maas $PROFILE rack-controllers read

where $PROFILE represents the administrative MAAS username created during the installation of MAAS.

Revision history for this message
maasuser1 (maasuser1) wrote :

Attached is the output.

Revision history for this message
maasuser1 (maasuser1) wrote :

By dug into the sourcecode of `/snap/maas/32469/lib/python3.10/site-packages/maasserver/dhcp.py`, I've identified the problem:

1. The target `fabric-0` has two subnets, both are handled by MAAS's `dhcpd` service.
     - `192.168.0.0/24`
     - `fdc3:a1b1:9f28::/64`
2. The main controller (id:1, regiond + rackd) has two IP addresses:
     - `192.168.0.2/24`
     - `fdc3:a1b1:9f28:0:feaa:14ff:feda:e3ec/64`
3. The secondary controller (id: 102, rackd) has only one IP address:
     - `192.168.0.147/24`
4. `get_ip_address_for_rack_controller(rack_controller, vlan, ip_version: int)` traverses all interfaces of the given controller according to the given IP version (IPv4 or IPv6), and then returns the `best` IP address.
5. Since the secondary controller has no IPv6 address on this fabric, `get_best_interface(interfaces)` returns `None`, and finally causes the "'NoneType' object has no attribute 'ip_addresses'" error on line 232, in `get_ip_address_for_interface()`.

Proposed solution:

1. Check whether all MAAS controllers have IP addresses on both IPv4 and IPv6 according to the fabric. If not, highlight this on Web UI.
2. Handle the exception in `dhcp.py`, which allows the controller to serve DHCP service for a single IP version (IPv4 or IPv6).

summary: - dhcpd not working
+ dhcpd not working when only 1 out of 2 controllers have an ipv6 address
Changed in maas:
status: Incomplete → Triaged
milestone: none → 3.5.x
importance: Undecided → High
Revision history for this message
Alan Baghumian (alanbach) wrote (last edit ):
Download full text (5.5 KiB)

This started happening to me out of the blue since yesterday. I am not using IPv6 with any of my subnets, however even with network discovery being off, the IPv6 address on the Rack Controllers (I have two) was added under fabric-0 and this was causing the Region Controllers (Two again on separate machines) go completely haywire!

The initial workaround was to configure the untagged subnet to only use one Rack Controller for DHCP services and then I went ahead and deleted the IPv6 auto-added subnet from fabric-0.

After that re-added the second Rack controller to restore HA and everything seems to be normal so far.

The trace looks the same as what was originally reported:

2024-02-19 03:16:46 maasserver.dhcp: [info] Successfully configured DHCPv4 on rack controller 'maas-rack-2 (6ks83s)'.
2024-02-19 03:16:46 maasserver.rack_controller: [critical] Failed configuring DHCP on rack controller 'id:2'.
 Traceback (most recent call last):
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1750, in gotResult
     current_context.run(_inlineCallbacks, r, gen, status)
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1740, in _inlineCallbacks
     status.deferred.errback()
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 700, in errback
     self._startRunCallbacks(fail)
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
     self._runCallbacks()
 --- <exception caught here> ---
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 857, in _runCallbacks
     current.result = callback( # type: ignore[misc]
   File "/snap/maas/33524/lib/python3.10/site-packages/maasserver/rack_controller.py", line 281, in <lambda>
     d.addErrback(lambda f: f.trap(NoConnectionsAvailable))
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/python/failure.py", line 451, in trap
     self.raiseException()
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/python/failure.py", line 475, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 857, in _runCallbacks
     current.result = callback( # type: ignore[misc]
   File "/snap/maas/33524/lib/python3.10/site-packages/maasserver/rack_controller.py", line 300, in unwatch_if_does_not_exist
     f.trap(RackController.DoesNotExist)
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/python/failure.py", line 451, in trap
     self.raiseException()
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/python/failure.py", line 475, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1656, in _inlineCallbacks
     result = current_context.run(
   File "/snap/maas/33524/usr/lib/python3/dist-packages/twisted/python/failure.py", line 489, in throwExceptionIntoGenerator
     return g.throw(self.type, self.value, self.tb)
   File "/snap/maas/33524/lib/python3.10/site-packages/m...

Read more...

To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.