From d4fa11d81fd871bdcd3fbdb95e66b46e3923a0f9 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Fri, 26 Jul 2019 10:53:02 -0400 Subject: [PATCH] WIP: Obfuscate non-nova server fault message TODO: fix test fallout and add a release note The server fault "message" is always shown in the API server response, regardless of policy or user role. The fault "details" are only shown to users with the admin role when the fault code is 500. The problem with this is for non-nova exceptions, the fault message is a string-ified version of the exception (see nova.compute.utils.exception_to_dict) which can contain sensitive information which the non-admin owner of the server can see. This change adds a functional test to recreate the issue and a change to exception_to_dict which for the non-nova case obfuscates the fault message by simply storing the exception type class name. Admins can still see the fault traceback in the "details" key of the fault dict in the server API response. Note that nova exceptions with a %(reason)s replacement variable could potentially be leaking sensitive details as well but those would need to be cleaned up on a case-by-case basis since we don't want to obfuscate all fault messages otherwise users might not see information like NoValidHost when their server goes to ERROR status. Change-Id: I5e0a43ec59341c9ac62f89105ddf82c4a014df81 Closes-Bug: #1837877 --- nova/compute/utils.py | 41 +++++-- nova/tests/functional/test_server_faults.py | 114 ++++++++++++++++++++ nova/tests/unit/compute/test_compute.py | 5 +- 3 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 nova/tests/functional/test_server_faults.py diff --git a/nova/compute/utils.py b/nova/compute/utils.py index 74873d258a..98a1ae9194 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -59,7 +59,20 @@ LOG = log.getLogger(__name__) def exception_to_dict(fault, message=None): - """Converts exceptions to a dict for use in notifications.""" + """Converts exceptions to a dict for use in notifications. + + :param fault: Exception that occurred + :param message: Optional fault message, otherwise the message is derived + from the fault itself. + :returns: dict with the following items: + + - exception: the fault itself + - message: one of (in priority order): + - the provided message to this method + - a formatted NovaException message + - the fault class name + - code: integer code for the fault (defaults to 500) + """ # TODO(johngarbutt) move to nova/exception.py to share with wrap_exception code = 500 @@ -74,11 +87,17 @@ def exception_to_dict(fault, message=None): # These exception handlers are broad so we don't fail to log the fault # just because there is an unexpected error retrieving the message except Exception: - try: - message = six.text_type(fault) - except Exception: - message = None - if not message: + # In this case either we have a NovaException which failed to format + # the message or we have a non-nova exception which could contain + # sensitive details. Since we're not sure, be safe and set the message + # to the exception class name. Note that we don't guard on + # context.is_admin here because the message is always shown in the API, + # even to non-admin users (e.g. NoValidHost) but only the traceback + # details are shown to users with the admin role. Checking for admin + # context here is also not helpful because admins can perform + # operations on a tenant user's server (migrations, reboot, etc) and + # service startup and periodic tasks could take actions on a server + # and those use an admin context. message = fault.__class__.__name__ # NOTE(dripton) The message field in the database is limited to 255 chars. # MySQL silently truncates overly long messages, but PostgreSQL throws an @@ -92,10 +111,14 @@ def exception_to_dict(fault, message=None): def _get_fault_details(exc_info, error_code): details = '' + # TODO(mriedem): Why do we only include the details if the code is 500? + # Though for non-nova exceptions the code will probably be 500. if exc_info and error_code == 500: - tb = exc_info[2] - if tb: - details = ''.join(traceback.format_tb(tb)) + # We get the full exception details including the value since + # the fault message may not contain that information for non-nova + # exceptions (see exception_to_dict). + details = ''.join(traceback.format_exception( + exc_info[0], exc_info[1], exc_info[2])) return six.text_type(details) diff --git a/nova/tests/functional/test_server_faults.py b/nova/tests/functional/test_server_faults.py new file mode 100644 index 0000000000..67853ed4af --- /dev/null +++ b/nova/tests/functional/test_server_faults.py @@ -0,0 +1,114 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import mock + +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import fixtures as func_fixtures +from nova.tests.functional import integrated_helpers +from nova.tests.unit.image import fake as fake_image +from nova.tests.unit import policy_fixture + + +class HypervisorError(Exception): + """This is just used to make sure the exception type is in the fault.""" + pass + + +class ServerFaultTestCase(test.TestCase, + integrated_helpers.InstanceHelperMixin): + """Tests for the server faults reporting from the API.""" + + def setUp(self): + super(ServerFaultTestCase, self).setUp() + # Setup the standard fixtures. + fake_image.stub_out_image_service(self) + self.addCleanup(fake_image.FakeImageService_reset) + self.useFixture(nova_fixtures.NeutronFixture(self)) + self.useFixture(func_fixtures.PlacementFixture()) + self.useFixture(policy_fixture.RealPolicyFixture()) + + # Start the compute services. + self.start_service('conductor') + self.start_service('scheduler') + self.compute = self.start_service('compute') + api_fixture = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')) + self.api = api_fixture.api + self.admin_api = api_fixture.admin_api + + def test_server_fault_non_nova_exception(self): + """Creates a server using the non-admin user, then reboots it which + will generate a non-NovaException fault and put the instance into + ERROR status. Then checks that fault details are only visible to the + admin user. + """ + # Create the server with the non-admin user. + server = self._build_minimal_create_server_request( + self.api, 'test_server_fault_non_nova_exception', + image_uuid=fake_image.get_valid_image_id(), + networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}]) + server = self.api.post_server({'server': server}) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + + # Stop the server before rebooting it so that after the driver.reboot + # method raises an exception, the fake driver does not report the + # instance power state as running - that will make the compute manager + # set the instance vm_state to error. + self.api.post_server_action(server['id'], {'os-stop': None}) + server = self._wait_for_state_change(self.admin_api, server, 'SHUTOFF') + + # Stub out the compute driver reboot method to raise a non-nova + # exception to simulate some error from the underlying hypervisor + # which in this case we are going to say has sensitive content. + error_msg = 'sensitive info' + with mock.patch.object( + self.compute.manager.driver, 'reboot', + side_effect=HypervisorError(error_msg)) as mock_reboot: + reboot_request = {'reboot': {'type': 'HARD'}} + self.api.post_server_action(server['id'], reboot_request) + # In this case we wait for the status to change to ERROR using + # the non-admin user so we can assert the fault details. We also + # wait for the task_state to be None since the wrap_instance_fault + # decorator runs before the reverts_task_state decorator so we will + # be sure the fault is set on the server. + server = self._wait_for_server_parameter( + self.api, server, {'status': 'ERROR', + 'OS-EXT-STS:task_state': None}) + mock_reboot.assert_called_once() + # The server fault from the non-admin user API response should not + # have details in it. + self.assertIn('fault', server) + fault = server['fault'] + self.assertNotIn('details', fault) + # And the sensitive details from the non-nova exception should not be + # in the message. + self.assertIn('message', fault) + self.assertNotIn(error_msg, fault['message']) + # The exception type class name should be in the message. + self.assertIn('HypervisorError', fault['message']) + + # Get the server fault details for the admin user. + server = self.admin_api.get_server(server['id']) + fault = server['fault'] + # The admin can see the fault details which includes the traceback. + self.assertIn('details', fault) + # The details also contain the exception message (which is not in the + # fault message). + self.assertIn(error_msg, fault['details']) + # Make sure the traceback is there by looking for part of it. + self.assertIn('in reboot_instance', fault['details']) + # The exception type class name should be in the message for the admin + # user as well since the fault handling code cannot distinguish who + # is going to see the message so it only sets class name. + self.assertIn('HypervisorError', fault['message']) diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 29a80bf6d4..4b2c4119d0 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -4425,7 +4425,10 @@ class ComputeTestCase(BaseTestCase, self.assertEqual('ERROR', msg.priority) payload = msg.payload message = payload['message'] - self.assertNotEqual(-1, message.find("i'm dying")) + # The fault message does not contain the exception value, only the + # class name. + self.assertEqual(-1, message.find("i'm dying")) + self.assertIn('TestingException', message) def test_terminate_usage_notification(self): # Ensure terminate_instance generates correct usage notification. -- 2.17.1