diff -Nru masakari-10.0.0~b3~git2020091407.ee48dc2/debian/changelog masakari-10.0.0~b3~git2020091407.ee48dc2/debian/changelog --- masakari-10.0.0~b3~git2020091407.ee48dc2/debian/changelog 2020-09-14 00:18:38.000000000 -0700 +++ masakari-10.0.0~b3~git2020091407.ee48dc2/debian/changelog 2020-09-21 20:06:20.000000000 -0700 @@ -1,3 +1,12 @@ +masakari (10.0.0~b3~git2020091407.ee48dc2-0ubuntu1+lp1773765) groovy; urgency=medium + + * Check expired notifications and clean running notifications past an expiration + to allow for a host to become manageable again (LP #1773765). + - d/p/check-expired-notifications.patch: Adds a periodic task to check for running + notifications that have exceeded a threshold and removes them if necessary. + + -- Billy Olsen Mon, 21 Sep 2020 20:06:20 -0700 + masakari (10.0.0~b3~git2020091407.ee48dc2-0ubuntu1) groovy; urgency=medium * New upstream snapshot for OpenStack Victoria. diff -Nru masakari-10.0.0~b3~git2020091407.ee48dc2/debian/patches/check-expired-notifications.patch masakari-10.0.0~b3~git2020091407.ee48dc2/debian/patches/check-expired-notifications.patch --- masakari-10.0.0~b3~git2020091407.ee48dc2/debian/patches/check-expired-notifications.patch 1969-12-31 17:00:00.000000000 -0700 +++ masakari-10.0.0~b3~git2020091407.ee48dc2/debian/patches/check-expired-notifications.patch 2020-09-21 20:06:20.000000000 -0700 @@ -0,0 +1,135 @@ +From: suzhengwei +Subject: [PATCH 1/1] check expired notifications + +Occasionally, there would be notifications which will remain 'new', +'error' or 'running' status all times, and not to be processed again. +Due to this, operator can not update the segment or host. + +This patch add one task to periodically check unfinished notifications. +If one unfinished notification is expired, just set its status to +'failed'. + +Close-Bug: #1773765 +Change-Id: If49635639dd976aeec3ea73e702ad2636fcf1e0a +--- + masakari/conf/engine.py | 7 +++++ + masakari/engine/manager.py | 28 +++++++++++++++++++ + masakari/tests/unit/engine/test_engine_mgr.py | 18 ++++++++++-- + 3 files changed, 51 insertions(+), 2 deletions(-) + +Origin: upstream, https://git.openstack.org/cgit/openstack/masakari/commit/?id=3a4f782441f6bdcfb8ee49a393937267fc246c56 +Bug: https://bugs.launchpad.net/masakari/+bug/1773765 + +diff --git a/masakari/conf/engine.py b/masakari/conf/engine.py +index cd19227..d8ecf4d 100644 +--- a/masakari/conf/engine.py ++++ b/masakari/conf/engine.py +@@ -97,6 +97,13 @@ notification_opts = [ + "generated_time, then it is considered that notification " + "is ignored by the messaging queue and will be processed " + "by 'process_unfinished_notifications' periodic task."), ++ cfg.IntOpt('check_expired_notifications_interval', ++ default=600, ++ help='Interval in seconds for checking running notifications.'), ++ cfg.IntOpt('notifications_expired_interval', ++ default=86400, ++ help='Interval in seconds for identifying running ' ++ 'notifications expired.'), + cfg.IntOpt('host_failure_recovery_threads', + default=3, + min=1, +diff --git a/masakari/engine/manager.py b/masakari/engine/manager.py +index ac5cdfa..ef59cfd 100644 +--- a/masakari/engine/manager.py ++++ b/masakari/engine/manager.py +@@ -368,6 +368,34 @@ class MasakariManager(manager.Manager): + {'notification_uuid': notification.notification_uuid, + 'status': notification_status}) + ++ @periodic_task.periodic_task( ++ spacing=CONF.check_expired_notifications_interval) ++ def _check_expired_notifications(self, context): ++ filters = { ++ 'status': [fields.NotificationStatus.RUNNING, ++ fields.NotificationStatus.ERROR, ++ fields.NotificationStatus.NEW] ++ } ++ notifications_list = objects.NotificationList.get_all(context, ++ filters=filters) ++ ++ for notification in notifications_list: ++ if timeutils.is_older_than( ++ notification.generated_time, ++ CONF.notifications_expired_interval): ++ # update running expired notification status as failed ++ notification_status = fields.NotificationStatus.FAILED ++ update_data = { ++ 'status': notification_status ++ } ++ ++ notification.update(update_data) ++ notification.save() ++ LOG.error( ++ "Periodic task 'check_expired_notifications': " ++ "Notification %(notification_uuid)s is expired.", ++ {'notification_uuid': notification.notification_uuid}) ++ + def get_notification_recovery_workflow_details(self, context, + notification): + """Retrieve recovery workflow details of the notification""" +diff --git a/masakari/tests/unit/engine/test_engine_mgr.py b/masakari/tests/unit/engine/test_engine_mgr.py +index 22cf286..4edc8d0 100644 +--- a/masakari/tests/unit/engine/test_engine_mgr.py ++++ b/masakari/tests/unit/engine/test_engine_mgr.py +@@ -12,6 +12,8 @@ + # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + # License for the specific language governing permissions and limitations + # under the License. ++ ++import datetime + from unittest import mock + + from oslo_utils import importutils +@@ -34,6 +36,8 @@ from masakari.tests import uuidsentinel + CONF = masakari.conf.CONF + + NOW = timeutils.utcnow().replace(microsecond=0) ++EXPIRED_TIME = timeutils.utcnow().replace(microsecond=0) \ ++ - datetime.timedelta(seconds=CONF.notifications_expired_interval) + + + def _get_vm_type_notification(status="new"): +@@ -69,14 +73,15 @@ class EngineManagerUnitTestCase(test.NoDBTestCase): + generated_time=NOW, status="new", + notification_uuid=uuidsentinel.fake_notification) + +- def _get_compute_host_type_notification(self): ++ def _get_compute_host_type_notification(self, expired=False): + return fakes.create_fake_notification( + type="COMPUTE_HOST", id=1, payload={ + 'event': 'stopped', 'host_status': 'NORMAL', + 'cluster_status': 'ONLINE' + }, + source_host_uuid=uuidsentinel.fake_host, +- generated_time=NOW, status="new", ++ generated_time=EXPIRED_TIME if expired else NOW, ++ status="new", + notification_uuid=uuidsentinel.fake_notification) + + @mock.patch("masakari.engine.drivers.taskflow." +@@ -1147,3 +1152,12 @@ class EngineManagerUnitTestCase(test.NoDBTestCase): + + mock_progress_details.assert_called_once_with( + self.context, notification) ++ ++ @mock.patch.object(notification_obj.Notification, "save") ++ @mock.patch.object(notification_obj.NotificationList, "get_all") ++ def test_check_expired_notifications(self, mock_get_all, mock_save, ++ mock_notification_get): ++ notification = self._get_compute_host_type_notification(expired=True) ++ mock_get_all.return_value = [notification] ++ self.engine._check_expired_notifications(self.context) ++ self.assertEqual("failed", notification.status) +-- +2.25.1 + diff -Nru masakari-10.0.0~b3~git2020091407.ee48dc2/debian/patches/series masakari-10.0.0~b3~git2020091407.ee48dc2/debian/patches/series --- masakari-10.0.0~b3~git2020091407.ee48dc2/debian/patches/series 2020-09-14 00:18:38.000000000 -0700 +++ masakari-10.0.0~b3~git2020091407.ee48dc2/debian/patches/series 2020-09-21 20:05:57.000000000 -0700 @@ -1,2 +1,3 @@ allow-bare-hostnames.patch monkey-patch-original-current-thread.patch +check-expired-notifications.patch