From 675561504daecabc39a5e4307e392c4a1e8ef023 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Tue, 29 May 2012 16:04:16 -0400 Subject: [PATCH] Loop around on an rpc timeout during startup. Potential fix for bug 999698. nova-compute requests network info for each instance on startup via rpc. If all services get (re)started at the same time, nova-network may not be available to take this request, resulting in a lost request. To combat this issue, set the request timeout to smaller than usual (10 seconds) and try again after a timeout instead of failing and stopping the compute service. Change-Id: I0bbd475e078ac2a67c99c2be4711e86d617c609a --- nova/compute/manager.py | 27 ++++++++++++++++++--------- nova/network/api.py | 4 ++-- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 2e56bc5..96c8aff 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -68,6 +68,7 @@ from nova.openstack.common import excutils from nova.openstack.common import importutils from nova.openstack.common import jsonutils from nova import rpc +from nova.rpc import common as rpc_common from nova import utils from nova.virt import driver from nova import volume @@ -292,13 +293,21 @@ class ComputeManager(manager.SchedulerDependentManager): self.reboot_instance(context, instance['uuid']) elif drv_state == power_state.RUNNING: # VMWareAPI drivers will raise an exception - try: - net_info = self._get_instance_nw_info(context, instance) - self.driver.ensure_filtering_rules_for_instance(instance, - self._legacy_nw_info(net_info)) - except NotImplementedError: - LOG.warning(_('Hypervisor driver does not support ' - 'firewall rules'), instance=instance) + while True: + try: + net_info = self._get_instance_nw_info(context, + instance, timeout=10) + self.driver.ensure_filtering_rules_for_instance(instance, + self._legacy_nw_info(net_info)) + except rpc_common.Timeout: + LOG.warning(_('Timed out waiting for instance network ' + 'info, Retrying. (Is the nova-network ' + 'service running?)'), instance=instance) + continue + except NotImplementedError: + LOG.warning(_('Hypervisor driver does not support ' + 'firewall rules'), instance=instance) + break def _get_power_state(self, context, instance): """Retrieve the power state for the given instance.""" @@ -347,7 +356,7 @@ class ComputeManager(manager.SchedulerDependentManager): """This call passes straight through to the virtualization driver.""" return self.driver.refresh_provider_fw_rules(**kwargs) - def _get_instance_nw_info(self, context, instance): + def _get_instance_nw_info(self, context, instance, timeout=None): """Get a list of dictionaries of network data of an instance. Returns an empty list if stub_network flag is set.""" if FLAGS.stub_network: @@ -355,7 +364,7 @@ class ComputeManager(manager.SchedulerDependentManager): # get the network info from network network_info = self.network_api.get_instance_nw_info(context, - instance) + instance, timeout) return network_info def _legacy_nw_info(self, network_info): diff --git a/nova/network/api.py b/nova/network/api.py index fa95674..ff041c3 100644 --- a/nova/network/api.py +++ b/nova/network/api.py @@ -206,7 +206,7 @@ class API(base.Base): {'method': 'add_network_to_project', 'args': {'project_id': project_id}}) - def get_instance_nw_info(self, context, instance): + def get_instance_nw_info(self, context, instance, timeout=None): """Returns all network info related to an instance.""" args = {'instance_id': instance['id'], 'instance_uuid': instance['uuid'], @@ -215,7 +215,7 @@ class API(base.Base): 'project_id': instance['project_id']} nw_info = rpc.call(context, FLAGS.network_topic, {'method': 'get_instance_nw_info', - 'args': args}) + 'args': args}, timeout=timeout) return network_model.NetworkInfo.hydrate(nw_info) def validate_networks(self, context, requested_networks): -- 1.7.10.2