From 1f57d25b6b42533436811f191fdfda1fb5460821 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Fri, 26 Jul 2019 10:53:02 -0400 Subject: [PATCH] WIP: Obfuscate non-nova server fault message TODO: need to update reno with CVE/OSSA info The server fault "message" is always shown in the API server response, regardless of policy or user role. The fault "details" are only shown to users with the admin role when the fault code is 500. The problem with this is for non-nova exceptions, the fault message is a string-ified version of the exception (see nova.compute.utils.exception_to_dict) which can contain sensitive information which the non-admin owner of the server can see. This change adds a functional test to recreate the issue and a change to exception_to_dict which for the non-nova case obfuscates the fault message by simply storing the exception type class name. Admins can still see the fault traceback in the "details" key of the fault dict in the server API response. Note that _get_fault_details is changed so that the details also includes the exception value which is what used to be in the fault message for non-nova exceptions. This is necessary so admins can still get the exception message with the traceback details. Note that nova exceptions with a %(reason)s replacement variable could potentially be leaking sensitive details as well but those would need to be cleaned up on a case-by-case basis since we don't want to obfuscate all fault messages otherwise users might not see information like NoValidHost when their server goes to ERROR status. Change-Id: I5e0a43ec59341c9ac62f89105ddf82c4a014df81 Closes-Bug: #1837877 --- nova/compute/utils.py | 41 +++++-- nova/tests/functional/test_server_faults.py | 114 ++++++++++++++++++ nova/tests/unit/compute/test_compute.py | 22 ++-- ...ult-message-exposure-5360d794f4976b7c.yaml | 21 ++++ 4 files changed, 182 insertions(+), 16 deletions(-) create mode 100644 nova/tests/functional/test_server_faults.py create mode 100644 releasenotes/notes/bug-1837877-cve-fault-message-exposure-5360d794f4976b7c.yaml diff --git a/nova/compute/utils.py b/nova/compute/utils.py index 74873d258a..98a1ae9194 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -37,87 +37,110 @@ from nova import notifications from nova.notifications.objects import aggregate as aggregate_notification from nova.notifications.objects import base as notification_base from nova.notifications.objects import compute_task as task_notification from nova.notifications.objects import exception as notification_exception from nova.notifications.objects import flavor as flavor_notification from nova.notifications.objects import instance as instance_notification from nova.notifications.objects import keypair as keypair_notification from nova.notifications.objects import libvirt as libvirt_notification from nova.notifications.objects import metrics as metrics_notification from nova.notifications.objects import request_spec as reqspec_notification from nova.notifications.objects import scheduler as scheduler_notification from nova.notifications.objects import server_group as sg_notification from nova.notifications.objects import volume as volume_notification from nova import objects from nova.objects import fields from nova import rpc from nova import safe_utils from nova import utils from nova.virt import driver CONF = nova.conf.CONF LOG = log.getLogger(__name__) def exception_to_dict(fault, message=None): - """Converts exceptions to a dict for use in notifications.""" + """Converts exceptions to a dict for use in notifications. + + :param fault: Exception that occurred + :param message: Optional fault message, otherwise the message is derived + from the fault itself. + :returns: dict with the following items: + + - exception: the fault itself + - message: one of (in priority order): + - the provided message to this method + - a formatted NovaException message + - the fault class name + - code: integer code for the fault (defaults to 500) + """ # TODO(johngarbutt) move to nova/exception.py to share with wrap_exception code = 500 if hasattr(fault, "kwargs"): code = fault.kwargs.get('code', 500) # get the message from the exception that was thrown # if that does not exist, use the name of the exception class itself try: if not message: message = fault.format_message() # These exception handlers are broad so we don't fail to log the fault # just because there is an unexpected error retrieving the message except Exception: - try: - message = six.text_type(fault) - except Exception: - message = None - if not message: + # In this case either we have a NovaException which failed to format + # the message or we have a non-nova exception which could contain + # sensitive details. Since we're not sure, be safe and set the message + # to the exception class name. Note that we don't guard on + # context.is_admin here because the message is always shown in the API, + # even to non-admin users (e.g. NoValidHost) but only the traceback + # details are shown to users with the admin role. Checking for admin + # context here is also not helpful because admins can perform + # operations on a tenant user's server (migrations, reboot, etc) and + # service startup and periodic tasks could take actions on a server + # and those use an admin context. message = fault.__class__.__name__ # NOTE(dripton) The message field in the database is limited to 255 chars. # MySQL silently truncates overly long messages, but PostgreSQL throws an # error if we don't truncate it. u_message = utils.safe_truncate(message, 255) fault_dict = dict(exception=fault) fault_dict["message"] = u_message fault_dict["code"] = code return fault_dict def _get_fault_details(exc_info, error_code): details = '' + # TODO(mriedem): Why do we only include the details if the code is 500? + # Though for non-nova exceptions the code will probably be 500. if exc_info and error_code == 500: - tb = exc_info[2] - if tb: - details = ''.join(traceback.format_tb(tb)) + # We get the full exception details including the value since + # the fault message may not contain that information for non-nova + # exceptions (see exception_to_dict). + details = ''.join(traceback.format_exception( + exc_info[0], exc_info[1], exc_info[2])) return six.text_type(details) def add_instance_fault_from_exc(context, instance, fault, exc_info=None, fault_message=None): """Adds the specified fault to the database.""" fault_obj = objects.InstanceFault(context=context) fault_obj.host = CONF.host fault_obj.instance_uuid = instance.uuid fault_obj.update(exception_to_dict(fault, message=fault_message)) code = fault_obj.code fault_obj.details = _get_fault_details(exc_info, code) fault_obj.create() def get_device_name_for_instance(instance, bdms, device): """Validates (or generates) a device name for instance. This method is a wrapper for get_next_device_name that gets the list of used devices and the root device from a block device mapping. :raises TooManyDiskDevices: if the maxmimum allowed devices to attach to a single instance is exceeded. """ diff --git a/nova/tests/functional/test_server_faults.py b/nova/tests/functional/test_server_faults.py new file mode 100644 index 0000000000..67853ed4af --- /dev/null +++ b/nova/tests/functional/test_server_faults.py @@ -0,0 +1,114 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import mock + +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import fixtures as func_fixtures +from nova.tests.functional import integrated_helpers +from nova.tests.unit.image import fake as fake_image +from nova.tests.unit import policy_fixture + + +class HypervisorError(Exception): + """This is just used to make sure the exception type is in the fault.""" + pass + + +class ServerFaultTestCase(test.TestCase, + integrated_helpers.InstanceHelperMixin): + """Tests for the server faults reporting from the API.""" + + def setUp(self): + super(ServerFaultTestCase, self).setUp() + # Setup the standard fixtures. + fake_image.stub_out_image_service(self) + self.addCleanup(fake_image.FakeImageService_reset) + self.useFixture(nova_fixtures.NeutronFixture(self)) + self.useFixture(func_fixtures.PlacementFixture()) + self.useFixture(policy_fixture.RealPolicyFixture()) + + # Start the compute services. + self.start_service('conductor') + self.start_service('scheduler') + self.compute = self.start_service('compute') + api_fixture = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')) + self.api = api_fixture.api + self.admin_api = api_fixture.admin_api + + def test_server_fault_non_nova_exception(self): + """Creates a server using the non-admin user, then reboots it which + will generate a non-NovaException fault and put the instance into + ERROR status. Then checks that fault details are only visible to the + admin user. + """ + # Create the server with the non-admin user. + server = self._build_minimal_create_server_request( + self.api, 'test_server_fault_non_nova_exception', + image_uuid=fake_image.get_valid_image_id(), + networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}]) + server = self.api.post_server({'server': server}) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + + # Stop the server before rebooting it so that after the driver.reboot + # method raises an exception, the fake driver does not report the + # instance power state as running - that will make the compute manager + # set the instance vm_state to error. + self.api.post_server_action(server['id'], {'os-stop': None}) + server = self._wait_for_state_change(self.admin_api, server, 'SHUTOFF') + + # Stub out the compute driver reboot method to raise a non-nova + # exception to simulate some error from the underlying hypervisor + # which in this case we are going to say has sensitive content. + error_msg = 'sensitive info' + with mock.patch.object( + self.compute.manager.driver, 'reboot', + side_effect=HypervisorError(error_msg)) as mock_reboot: + reboot_request = {'reboot': {'type': 'HARD'}} + self.api.post_server_action(server['id'], reboot_request) + # In this case we wait for the status to change to ERROR using + # the non-admin user so we can assert the fault details. We also + # wait for the task_state to be None since the wrap_instance_fault + # decorator runs before the reverts_task_state decorator so we will + # be sure the fault is set on the server. + server = self._wait_for_server_parameter( + self.api, server, {'status': 'ERROR', + 'OS-EXT-STS:task_state': None}) + mock_reboot.assert_called_once() + # The server fault from the non-admin user API response should not + # have details in it. + self.assertIn('fault', server) + fault = server['fault'] + self.assertNotIn('details', fault) + # And the sensitive details from the non-nova exception should not be + # in the message. + self.assertIn('message', fault) + self.assertNotIn(error_msg, fault['message']) + # The exception type class name should be in the message. + self.assertIn('HypervisorError', fault['message']) + + # Get the server fault details for the admin user. + server = self.admin_api.get_server(server['id']) + fault = server['fault'] + # The admin can see the fault details which includes the traceback. + self.assertIn('details', fault) + # The details also contain the exception message (which is not in the + # fault message). + self.assertIn(error_msg, fault['details']) + # Make sure the traceback is there by looking for part of it. + self.assertIn('in reboot_instance', fault['details']) + # The exception type class name should be in the message for the admin + # user as well since the fault handling code cannot distinguish who + # is going to see the message so it only sets class name. + self.assertIn('HypervisorError', fault['message']) diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 29a80bf6d4..feb474dee4 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -4403,51 +4403,54 @@ class ComputeTestCase(BaseTestCase, message = payload['message'] self.assertNotEqual(-1, message.find("something bad happened")) def test_run_instance_error_notification_on_failure(self): # Test that error notif is sent if build fails hard instance = self._create_fake_instance_obj() def build_inst_fail(*args, **kwargs): raise test.TestingException("i'm dying") self.stub_out('nova.virt.fake.FakeDriver.spawn', build_inst_fail) self.compute.build_and_run_instance( self.context, instance, {}, {}, {}, block_device_mapping=[]) self.assertGreaterEqual(len(fake_notifier.NOTIFICATIONS), 2) msg = fake_notifier.NOTIFICATIONS[0] self.assertEqual(msg.event_type, 'compute.instance.create.start') msg = fake_notifier.NOTIFICATIONS[-1] self.assertEqual(msg.event_type, 'compute.instance.create.error') self.assertEqual('ERROR', msg.priority) payload = msg.payload message = payload['message'] - self.assertNotEqual(-1, message.find("i'm dying")) + # The fault message does not contain the exception value, only the + # class name. + self.assertEqual(-1, message.find("i'm dying")) + self.assertIn('TestingException', message) def test_terminate_usage_notification(self): # Ensure terminate_instance generates correct usage notification. old_time = datetime.datetime(2012, 4, 1) cur_time = datetime.datetime(2012, 12, 21, 12, 21) time_fixture = self.useFixture(utils_fixture.TimeFixture(old_time)) instance = self._create_fake_instance_obj() self.compute.build_and_run_instance(self.context, instance, {}, {}, {}, block_device_mapping=[]) fake_notifier.NOTIFICATIONS = [] time_fixture.advance_time_delta(cur_time - old_time) self.compute.terminate_instance(self.context, instance, []) self.assertEqual(len(fake_notifier.NOTIFICATIONS), 4) msg = fake_notifier.NOTIFICATIONS[0] self.assertEqual(msg.priority, 'INFO') self.assertEqual(msg.event_type, 'compute.instance.delete.start') msg1 = fake_notifier.NOTIFICATIONS[1] self.assertEqual(msg1.event_type, 'compute.instance.shutdown.start') msg1 = fake_notifier.NOTIFICATIONS[2] self.assertEqual(msg1.event_type, 'compute.instance.shutdown.end') msg1 = fake_notifier.NOTIFICATIONS[3] @@ -6818,91 +6821,94 @@ class ComputeTestCase(BaseTestCase, # Force the compute manager to do its periodic poll ctxt = context.get_admin_context() self.compute._sync_power_states(ctxt) instances = db.instance_get_all(self.context) LOG.info("After force-killing instances: %s", instances) self.assertEqual(len(instances), 1) self.assertIsNone(instances[0]['task_state']) def _fill_fault(self, values): extra = {x: None for x in ['created_at', 'deleted_at', 'updated_at', 'deleted']} extra['id'] = 1 extra['details'] = '' extra.update(values) return extra def test_add_instance_fault(self): instance = self._create_fake_instance_obj() exc_info = None def fake_db_fault_create(ctxt, values): self.assertIn('raise NotImplementedError', values['details']) + self.assertIn('test', values['details']) del values['details'] expected = { 'code': 500, - 'message': 'test', + 'message': 'NotImplementedError', 'instance_uuid': instance['uuid'], 'host': self.compute.host } self.assertEqual(expected, values) return self._fill_fault(expected) try: raise NotImplementedError('test') except NotImplementedError: exc_info = sys.exc_info() self.stub_out('nova.db.api.instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() compute_utils.add_instance_fault_from_exc(ctxt, instance, NotImplementedError('test'), exc_info) def test_add_instance_fault_with_remote_error(self): instance = self._create_fake_instance_obj() exc_info = None raised_exc = None def fake_db_fault_create(ctxt, values): global exc_info global raised_exc self.assertIn('raise messaging.RemoteError', values['details']) + self.assertIn('Remote error: test My Test Message\nNone.', + values['details']) del values['details'] expected = { 'code': 500, 'instance_uuid': instance['uuid'], - 'message': 'Remote error: test My Test Message\nNone.', + 'message': 'RemoteError', 'host': self.compute.host } self.assertEqual(expected, values) return self._fill_fault(expected) try: raise messaging.RemoteError('test', 'My Test Message') except messaging.RemoteError as exc: raised_exc = exc exc_info = sys.exc_info() self.stub_out('nova.db.api.instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() compute_utils.add_instance_fault_from_exc(ctxt, instance, raised_exc, exc_info) def test_add_instance_fault_user_error(self): instance = self._create_fake_instance_obj() exc_info = None def fake_db_fault_create(ctxt, values): expected = { @@ -6913,89 +6919,91 @@ class ComputeTestCase(BaseTestCase, 'host': self.compute.host } self.assertEqual(expected, values) return self._fill_fault(expected) user_exc = exception.Invalid('fake details', code=400) try: raise user_exc except exception.Invalid: exc_info = sys.exc_info() self.stub_out('nova.db.api.instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() compute_utils.add_instance_fault_from_exc(ctxt, instance, user_exc, exc_info) def test_add_instance_fault_no_exc_info(self): instance = self._create_fake_instance_obj() def fake_db_fault_create(ctxt, values): expected = { 'code': 500, - 'message': 'test', + 'message': 'NotImplementedError', 'details': '', 'instance_uuid': instance['uuid'], 'host': self.compute.host } self.assertEqual(expected, values) return self._fill_fault(expected) self.stub_out('nova.db.api.instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() compute_utils.add_instance_fault_from_exc(ctxt, instance, NotImplementedError('test')) def test_add_instance_fault_long_message(self): instance = self._create_fake_instance_obj() message = 300 * 'a' def fake_db_fault_create(ctxt, values): expected = { 'code': 500, 'message': message[:255], 'details': '', 'instance_uuid': instance['uuid'], 'host': self.compute.host } self.assertEqual(expected, values) return self._fill_fault(expected) self.stub_out('nova.db.api.instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() - compute_utils.add_instance_fault_from_exc(ctxt, - instance, - NotImplementedError(message)) + # Use a NovaException because non-nova exceptions will just have the + # class name recorded in the fault message which will not exercise our + # length trim code. + exc = exception.NovaException(message=message) + compute_utils.add_instance_fault_from_exc(ctxt, instance, exc) def test_add_instance_fault_with_message(self): instance = self._create_fake_instance_obj() exc_info = None def fake_db_fault_create(ctxt, values): self.assertIn('raise NotImplementedError', values['details']) del values['details'] expected = { 'code': 500, 'message': 'hoge', 'instance_uuid': instance['uuid'], 'host': self.compute.host } self.assertEqual(expected, values) return self._fill_fault(expected) try: raise NotImplementedError('test') except NotImplementedError: exc_info = sys.exc_info() self.stub_out('nova.db.api.instance_fault_create', fake_db_fault_create) diff --git a/releasenotes/notes/bug-1837877-cve-fault-message-exposure-5360d794f4976b7c.yaml b/releasenotes/notes/bug-1837877-cve-fault-message-exposure-5360d794f4976b7c.yaml new file mode 100644 index 0000000000..3b3115bf96 --- /dev/null +++ b/releasenotes/notes/bug-1837877-cve-fault-message-exposure-5360d794f4976b7c.yaml @@ -0,0 +1,21 @@ +--- +security: + - | + This release contains a security fix for `bug 1837877`_ where users + without the admin role can be exposed to sensitive error details in + the server resource fault ``message``. + + There is a behavior change where non-nova exceptions will only record + the exception class name in the fault ``message`` field which is exposed + to all users, regardless of the admin role. + + The fault ``details``, which are only exposed to users with the admin role, + will continue to include the traceback and also include the exception + value which for non-nova exceptions is what used to be exposed in the + fault ``message`` field. Meaning, the information that admins could see + for server faults is still available, but the exception value may be in + ``details`` rather than ``message`` now. + + TODO: add link to CVE/OSSA + + .. bug 1837877: https://bugs.launchpad.net/nova/+bug/1837877 -- 2.17.1