diff -Nru masakari-8.0.0/debian/changelog masakari-8.0.0/debian/changelog --- masakari-8.0.0/debian/changelog 2019-10-16 10:31:44.000000000 -0700 +++ masakari-8.0.0/debian/changelog 2020-09-23 20:27:13.000000000 -0700 @@ -1,3 +1,12 @@ +masakari (8.0.0-0ubuntu1~cloud0+lp1773765) bionic-train; urgency=medium + + * Check expired notifications and clean running notifications past an expiration + to allow for a host to become manageable again (LP #1773765). + - d/p/check-expired-notifications.patch: Adds a periodic task to check for running + notifications that have exceeded a threshold and removes them if necessary. + + -- Billy Olsen Wed, 23 Sep 2020 20:27:13 -0700 + masakari (8.0.0-0ubuntu1~cloud0) bionic-train; urgency=medium * New upstream release for the Ubuntu Cloud Archive. diff -Nru masakari-8.0.0/debian/patches/check-expired-notifications.patch masakari-8.0.0/debian/patches/check-expired-notifications.patch --- masakari-8.0.0/debian/patches/check-expired-notifications.patch 1969-12-31 17:00:00.000000000 -0700 +++ masakari-8.0.0/debian/patches/check-expired-notifications.patch 2020-09-23 20:27:13.000000000 -0700 @@ -0,0 +1,139 @@ +From: suzhengwei +Subject: [PATCH 1/1] check expired notifications + +Occasionally, there would be notifications which will remain 'new', +'error' or 'running' status all times, and not to be processed again. +Due to this, operator can not update the segment or host. + +This patch add one task to periodically check unfinished notifications. +If one unfinished notification is expired, just set its status to +'failed'. + +Conflicts: + masakari/tests/unit/engine/test_engine_mgr.py + - Update test for difference of unittest.mock and mock + +Closes-Bug: #1773765 +Change-Id: If49635639dd976aeec3ea73e702ad2636fcf1e0a +(cherry picked from commit 3a4f782441f6bdcfb8ee49a393937267fc246c56) +--- + masakari/conf/engine.py | 7 +++++ + masakari/engine/manager.py | 28 +++++++++++++++++++ + masakari/tests/unit/engine/test_engine_mgr.py | 17 +++++++++-- + 3 files changed, 50 insertions(+), 2 deletions(-) + +Origin: upstream, https://git.openstack.org/cgit/openstack/masakari/commit/?id=e0423906a7aabb4ea02b754e041f725c16535be5 +Bug: https://bugs.launchpad.net/masakari/+bug/1773765 + +diff --git a/masakari/conf/engine.py b/masakari/conf/engine.py +index cd19227..d8ecf4d 100644 +--- a/masakari/conf/engine.py ++++ b/masakari/conf/engine.py +@@ -97,6 +97,13 @@ notification_opts = [ + "generated_time, then it is considered that notification " + "is ignored by the messaging queue and will be processed " + "by 'process_unfinished_notifications' periodic task."), ++ cfg.IntOpt('check_expired_notifications_interval', ++ default=600, ++ help='Interval in seconds for checking running notifications.'), ++ cfg.IntOpt('notifications_expired_interval', ++ default=86400, ++ help='Interval in seconds for identifying running ' ++ 'notifications expired.'), + cfg.IntOpt('host_failure_recovery_threads', + default=3, + min=1, +diff --git a/masakari/engine/manager.py b/masakari/engine/manager.py +index 5a7bccf..a48a01e 100644 +--- a/masakari/engine/manager.py ++++ b/masakari/engine/manager.py +@@ -358,6 +358,34 @@ class MasakariManager(manager.Manager): + {'notification_uuid': notification.notification_uuid, + 'status': notification_status}) + ++ @periodic_task.periodic_task( ++ spacing=CONF.check_expired_notifications_interval) ++ def _check_expired_notifications(self, context): ++ filters = { ++ 'status': [fields.NotificationStatus.RUNNING, ++ fields.NotificationStatus.ERROR, ++ fields.NotificationStatus.NEW] ++ } ++ notifications_list = objects.NotificationList.get_all(context, ++ filters=filters) ++ ++ for notification in notifications_list: ++ if timeutils.is_older_than( ++ notification.generated_time, ++ CONF.notifications_expired_interval): ++ # update running expired notification status as failed ++ notification_status = fields.NotificationStatus.FAILED ++ update_data = { ++ 'status': notification_status ++ } ++ ++ notification.update(update_data) ++ notification.save() ++ LOG.error( ++ "Periodic task 'check_expired_notifications': " ++ "Notification %(notification_uuid)s is expired.", ++ {'notification_uuid': notification.notification_uuid}) ++ + def get_notification_recovery_workflow_details(self, context, + notification): + """Retrieve recovery workflow details of the notification""" +diff --git a/masakari/tests/unit/engine/test_engine_mgr.py b/masakari/tests/unit/engine/test_engine_mgr.py +index 1066835..c5d25a5 100644 +--- a/masakari/tests/unit/engine/test_engine_mgr.py ++++ b/masakari/tests/unit/engine/test_engine_mgr.py +@@ -12,6 +12,7 @@ + # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + # License for the specific language governing permissions and limitations + # under the License. ++import datetime + import mock + from oslo_utils import importutils + from oslo_utils import timeutils +@@ -33,6 +34,8 @@ from masakari.tests import uuidsentinel + CONF = masakari.conf.CONF + + NOW = timeutils.utcnow().replace(microsecond=0) ++EXPIRED_TIME = timeutils.utcnow().replace(microsecond=0) \ ++ - datetime.timedelta(seconds=CONF.notifications_expired_interval) + + + def _get_vm_type_notification(status="new"): +@@ -68,14 +71,15 @@ class EngineManagerUnitTestCase(test.NoDBTestCase): + generated_time=NOW, status="new", + notification_uuid=uuidsentinel.fake_notification) + +- def _get_compute_host_type_notification(self): ++ def _get_compute_host_type_notification(self, expired=False): + return fakes.create_fake_notification( + type="COMPUTE_HOST", id=1, payload={ + 'event': 'stopped', 'host_status': 'NORMAL', + 'cluster_status': 'ONLINE' + }, + source_host_uuid=uuidsentinel.fake_host, +- generated_time=NOW, status="new", ++ generated_time=EXPIRED_TIME if expired else NOW, ++ status="new", + notification_uuid=uuidsentinel.fake_notification) + + @mock.patch("masakari.engine.drivers.taskflow." +@@ -1116,3 +1120,12 @@ class EngineManagerUnitTestCase(test.NoDBTestCase): + + mock_progress_details.assert_called_once_with( + self.context, notification) ++ ++ @mock.patch.object(notification_obj.Notification, "save") ++ @mock.patch.object(notification_obj.NotificationList, "get_all") ++ def test_check_expired_notifications(self, mock_get_all, mock_save, ++ mock_notification_get): ++ notification = self._get_compute_host_type_notification(expired=True) ++ mock_get_all.return_value = [notification] ++ self.engine._check_expired_notifications(self.context) ++ self.assertEqual("failed", notification.status) +-- +2.25.1 + diff -Nru masakari-8.0.0/debian/patches/series masakari-8.0.0/debian/patches/series --- masakari-8.0.0/debian/patches/series 1969-12-31 17:00:00.000000000 -0700 +++ masakari-8.0.0/debian/patches/series 2020-09-23 20:27:08.000000000 -0700 @@ -0,0 +1 @@ +check-expired-notifications.patch