diff -Nru oslo.messaging-1.4.1/debian/changelog oslo.messaging-1.4.1/debian/changelog --- oslo.messaging-1.4.1/debian/changelog 2015-09-17 19:55:37.000000000 +0800 +++ oslo.messaging-1.4.1/debian/changelog 2015-12-17 16:12:37.000000000 +0800 @@ -1,3 +1,11 @@ +oslo.messaging (1.4.1-0ubuntu1.2~cloud0ubuntu1) trusty-juno; urgency=medium + + * Backport of upstream release. (LP: #1318721): + - d/p/0007-fix-reconnect-race-condition-with-rabbitmq-cluster.patch: + Redeclare if exception is catched after self.queue.declare() failed. + + -- Hui Xiang Thu, 17 Dec 2015 16:09:57 +0800 + oslo.messaging (1.4.1-0ubuntu1.2~cloud0) trusty-juno; urgency=medium * Backport of upstream fix for LP: #1338732. diff -Nru oslo.messaging-1.4.1/debian/patches/0007-fix-reconnect-race-condition-with-rabbitmq-cluster.patch oslo.messaging-1.4.1/debian/patches/0007-fix-reconnect-race-condition-with-rabbitmq-cluster.patch --- oslo.messaging-1.4.1/debian/patches/0007-fix-reconnect-race-condition-with-rabbitmq-cluster.patch 1970-01-01 08:00:00.000000000 +0800 +++ oslo.messaging-1.4.1/debian/patches/0007-fix-reconnect-race-condition-with-rabbitmq-cluster.patch 2016-01-07 13:41:44.000000000 +0800 @@ -0,0 +1,76 @@ +Description: Fix reconnect race condition with RabbitMQ cluster + + commit 7ad0d7eaf9cb095a14b07a08c814d9f1f9c8ff12 + Author: Jens Rosenboom + Date: Fri Jun 27 16:46:47 2014 +0200 + + Retry Queue creation to workaround race condition + that may happen when both the client and broker race over + exchange creation and deletion respectively which happen only + when the Queue/Exchange were created with auto-delete flag. + + Queues/Exchange declared with auto-delete instruct the Broker to + delete the Queue when the last Consumer disconnect from it, and + the Exchange when the last Queue is deleted from this Exchange. + + Now in a RabbitMQ cluster setup, if the cluster node that we are + connected to go down, 2 things will happen: + + 1. From RabbitMQ side, the Queues w/ auto-delete will be deleted + from the other cluster nodes and then the Exchanges that the + Queues are bind to if they were also created w/ auto-delete. + 2. From client side, client will reconnect to another cluster + node and call queue.declare() which create Exchanges then + Queues then Binding in that order. + + Now in a happy path the queues/exchanges will be deleted from the + broker before client start re-creating them again, but it also + possible that the client first start by creating queues/exchange + as part of the queue.declare() call, which are no-op operations + b/c they alreay existed, but before it could bind Queue to + Exchange, RabbitMQ nodes just received the 'signal' that the + queue doesn't have any consumer so it should be delete, and the + same with exchanges, which will lead to binding fail with + NotFound error. + + Illustration of the time line from Client and RabbitMQ cluster + respectively when the race condition happen: + + + e-declare(E) q-declare(Q) q-bind(Q, E) + -----+------------------+----------------+-----------> + e-delete(E) + ------------------------------+----------------------> + + Change-Id: Ideb73af6f246a8282780cdb204d675d5d4555bf0 + Closes-Bug: #1318721 + +Author: Jens Rosenboom +Origin: backport, https://review.openstack.org/#/c/103157/ +Bug: https://bugs.launchpad.net/neutron/+bug/1318721 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/oslo/messaging/_drivers/impl_rabbit.py ++++ b/oslo/messaging/_drivers/impl_rabbit.py +@@ -159,7 +159,20 @@ + self.channel = channel + self.kwargs['channel'] = channel + self.queue = kombu.entity.Queue(**self.kwargs) +- self.queue.declare() ++ try: ++ self.queue.declare() ++ except Exception as e: ++ # NOTE: This exception may be triggered by a race condition. ++ # Simply retrying will solve the error most of the time and ++ # should work well enough as a workaround until the race condition ++ # itself can be fixed. ++ # TODO(jrosenboom): In order to be able to match the Execption ++ # more specifically, we have to refactor ConsumerBase to use ++ # 'channel_errors' of the kombu connection object that ++ # has created the channel. ++ # See https://bugs.launchpad.net/neutron/+bug/1318721 for details. ++ LOG.exception(_("Declaring queue failed with (%s), retrying"), e) ++ self.queue.declare() + + def _callback_handler(self, message, callback): + """Call callback with deserialized message. diff -Nru oslo.messaging-1.4.1/debian/patches/series oslo.messaging-1.4.1/debian/patches/series --- oslo.messaging-1.4.1/debian/patches/series 2015-09-17 19:55:06.000000000 +0800 +++ oslo.messaging-1.4.1/debian/patches/series 2015-12-17 16:07:40.000000000 +0800 @@ -5,3 +5,4 @@ 0004-rabbit-redeclare-consumers-when-ack-requeue-fail.patch 0005-Fix-possible-usage-of-undefined-variable.patch 0006-Declare-DirectPublisher-exchanges-with-passive-True.patch +0007-fix-reconnect-race-condition-with-rabbitmq-cluster.patch