what is working to get rid of this issue , add some checks after agent revived in "fetch_and_sync_all_routers" method @neutron/agent/l3/agent.py if r.get('_ha_state') == 'unknown': LOG.debug("Processing router with l3_agent in " "unknown state") self._process_added_router(r) and entire method looks like this : def fetch_and_sync_all_routers(self, context, ns_manager): prev_router_ids = set(self.router_info) curr_router_ids = set() timestamp = timeutils.utcnow() router_ids = [] chunk = [] is_snat_agent = (self.conf.agent_mode == lib_const.L3_AGENT_MODE_DVR_SNAT) try: router_ids = self.plugin_rpc.get_router_ids(context) LOG.debug("Router IDs from Neutron API: %s", router_ids) self._remove_orphan_routers_config( ha_confs_path=self.ha_confs_path, router_ids=set(router_ids), ) # fetch routers by chunks to reduce the load on server and to # start router processing earlier for i in range(0, len(router_ids), self.sync_routers_chunk_size): chunk = router_ids[i:i + self.sync_routers_chunk_size] routers = self.plugin_rpc.get_routers(context, chunk) LOG.debug('Processing :%r', routers) for r in routers: curr_router_ids.add(r['id']) ns_manager.keep_router(r['id']) if r.get('distributed'): # need to keep fip namespaces as well ext_net_id = (r['external_gateway_info'] or {}).get( 'network_id') if ext_net_id: ns_manager.keep_ext_net(ext_net_id) elif is_snat_agent and not r.get('ha'): ns_manager.ensure_snat_cleanup(r['id']) if r.get('_ha_state') == 'unknown': LOG.debug("Processing router with l3_agent in " "unknown state") self._process_added_router(r) update = queue.ResourceUpdate( r['id'], PRIORITY_SYNC_ROUTERS_TASK, resource=r, action=ADD_UPDATE_ROUTER, timestamp=timestamp) self._queue.add(update) except oslo_messaging.MessagingTimeout: if self.sync_routers_chunk_size > SYNC_ROUTERS_MIN_CHUNK_SIZE: self.sync_routers_chunk_size = max( self.sync_routers_chunk_size // 2, SYNC_ROUTERS_MIN_CHUNK_SIZE) LOG.error('Server failed to return info for routers in ' 'required time, decreasing chunk size to: %s', self.sync_routers_chunk_size) else: LOG.error('Server failed to return info for routers in ' 'required time even with min chunk size: %s. ' 'It might be under very high load or ' 'just inoperable', self.sync_routers_chunk_size) raise except oslo_messaging.MessagingException: failed_routers = chunk or router_ids LOG.exception("Failed synchronizing routers '%s' " "due to RPC error", failed_routers) raise l3_exc.AbortSyncRouters() self.fullsync = False LOG.debug("periodic_sync_routers_task successfully completed") # adjust chunk size after successful sync if self.sync_routers_chunk_size < SYNC_ROUTERS_MAX_CHUNK_SIZE: self.sync_routers_chunk_size = min( self.sync_routers_chunk_size + SYNC_ROUTERS_MIN_CHUNK_SIZE, SYNC_ROUTERS_MAX_CHUNK_SIZE) # Delete routers that have disappeared since the last sync for router_id in prev_router_ids - curr_router_ids: ns_manager.keep_router(router_id) update = queue.ResourceUpdate(router_id, PRIORITY_SYNC_ROUTERS_TASK, timestamp=timestamp, action=DELETE_ROUTER) self._queue.add(update)