From 98339a68dbf2b4048eb33cc987bb248eda5ecd22 Mon Sep 17 00:00:00 2001 From: Dan Prince Date: Fri, 1 Feb 2013 17:04:27 -0500 Subject: [PATCH] Add a safe_minidom_parse_string function. Adds a new utils.safe_minidom_parse_string function and updates external API facing Nova modules to use it. This ensures we have safe defaults on our incoming API XML parsing. Internally safe_minidom_parse_string uses a ProtectedExpatParser class to disable DTDs and entities from being parsed when using minidom. Fixes LP Bug #1100282. Change-Id: Ib90d6379320ff1d007f8a661f7ddaa286ba6918e --- nova/api/openstack/common.py | 10 ++--- nova/api/openstack/compute/contrib/cells.py | 4 +- nova/api/openstack/compute/contrib/hosts.py | 4 +- .../openstack/compute/contrib/security_groups.py | 7 ++-- nova/api/openstack/compute/contrib/volumes.py | 3 +- nova/api/openstack/compute/servers.py | 5 +-- nova/api/openstack/wsgi.py | 13 ++++--- nova/tests/test_utils.py | 33 ++++++++++++++++ nova/utils.py | 44 ++++++++++++++++++++++ 9 files changed, 99 insertions(+), 24 deletions(-) diff --git a/nova/api/openstack/common.py b/nova/api/openstack/common.py index b258358..1f479b8 100644 --- a/nova/api/openstack/common.py +++ b/nova/api/openstack/common.py @@ -21,7 +21,6 @@ import re import urlparse import webob -from xml.dom import minidom from nova.api.openstack import wsgi from nova.api.openstack import xmlutil @@ -32,6 +31,7 @@ from nova import exception from nova.openstack.common import cfg from nova.openstack.common import log as logging from nova import quota +from nova import utils osapi_opts = [ cfg.IntOpt('osapi_max_limit', @@ -356,7 +356,7 @@ def raise_http_conflict_for_instance_invalid_state(exc, action): class MetadataDeserializer(wsgi.MetadataXMLDeserializer): def deserialize(self, text): - dom = minidom.parseString(text) + dom = utils.safe_minidom_parse_string(text) metadata_node = self.find_first_child_named(dom, "metadata") metadata = self.extract_metadata(metadata_node) return {'body': {'metadata': metadata}} @@ -364,7 +364,7 @@ class MetadataDeserializer(wsgi.MetadataXMLDeserializer): class MetaItemDeserializer(wsgi.MetadataXMLDeserializer): def deserialize(self, text): - dom = minidom.parseString(text) + dom = utils.safe_minidom_parse_string(text) metadata_item = self.extract_metadata(dom) return {'body': {'meta': metadata_item}} @@ -382,7 +382,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer): return metadata def _extract_metadata_container(self, datastring): - dom = minidom.parseString(datastring) + dom = utils.safe_minidom_parse_string(datastring) metadata_node = self.find_first_child_named(dom, "metadata") metadata = self.extract_metadata(metadata_node) return {'body': {'metadata': metadata}} @@ -394,7 +394,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer): return self._extract_metadata_container(datastring) def update(self, datastring): - dom = minidom.parseString(datastring) + dom = utils.safe_minidom_parse_string(datastring) metadata_item = self.extract_metadata(dom) return {'body': {'meta': metadata_item}} diff --git a/nova/api/openstack/compute/contrib/cells.py b/nova/api/openstack/compute/contrib/cells.py index 03e2e4c..29a9ffc 100644 --- a/nova/api/openstack/compute/contrib/cells.py +++ b/nova/api/openstack/compute/contrib/cells.py @@ -16,7 +16,6 @@ # under the License. """The cells extension.""" -from xml.dom import minidom from xml.parsers import expat from webob import exc @@ -32,6 +31,7 @@ from nova import exception from nova.openstack.common import cfg from nova.openstack.common import log as logging from nova.openstack.common import timeutils +from nova import utils LOG = logging.getLogger(__name__) @@ -99,7 +99,7 @@ class CellDeserializer(wsgi.XMLDeserializer): def default(self, string): """Deserialize an xml-formatted cell create request.""" try: - node = minidom.parseString(string) + node = utils.safe_minidom_parse_string(string) except expat.ExpatError: msg = _("cannot understand XML") raise exception.MalformedRequestBody(reason=msg) diff --git a/nova/api/openstack/compute/contrib/hosts.py b/nova/api/openstack/compute/contrib/hosts.py index d1b39d6..51505d9 100644 --- a/nova/api/openstack/compute/contrib/hosts.py +++ b/nova/api/openstack/compute/contrib/hosts.py @@ -16,7 +16,6 @@ """The hosts admin extension.""" import webob.exc -from xml.dom import minidom from xml.parsers import expat from nova.api.openstack import extensions @@ -25,6 +24,7 @@ from nova.api.openstack import xmlutil from nova import compute from nova import exception from nova.openstack.common import log as logging +from nova import utils LOG = logging.getLogger(__name__) authorize = extensions.extension_authorizer('compute', 'hosts') @@ -72,7 +72,7 @@ class HostShowTemplate(xmlutil.TemplateBuilder): class HostUpdateDeserializer(wsgi.XMLDeserializer): def default(self, string): try: - node = minidom.parseString(string) + node = utils.safe_minidom_parse_string(string) except expat.ExpatError: msg = _("cannot understand XML") raise exception.MalformedRequestBody(reason=msg) diff --git a/nova/api/openstack/compute/contrib/security_groups.py b/nova/api/openstack/compute/contrib/security_groups.py index a15c395..60b0247 100644 --- a/nova/api/openstack/compute/contrib/security_groups.py +++ b/nova/api/openstack/compute/contrib/security_groups.py @@ -16,8 +16,6 @@ """The security groups extension.""" -from xml.dom import minidom - import webob from webob import exc @@ -30,6 +28,7 @@ from nova.compute import api as compute_api from nova import db from nova import exception from nova.openstack.common import log as logging +from nova import utils LOG = logging.getLogger(__name__) authorize = extensions.extension_authorizer('compute', 'security_groups') @@ -108,7 +107,7 @@ class SecurityGroupXMLDeserializer(wsgi.MetadataXMLDeserializer): """ def default(self, string): """Deserialize an xml-formatted security group create request.""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) security_group = {} sg_node = self.find_first_child_named(dom, 'security_group') @@ -129,7 +128,7 @@ class SecurityGroupRulesXMLDeserializer(wsgi.MetadataXMLDeserializer): def default(self, string): """Deserialize an xml-formatted security group create request.""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) security_group_rule = self._extract_security_group_rule(dom) return {'body': {'security_group_rule': security_group_rule}} diff --git a/nova/api/openstack/compute/contrib/volumes.py b/nova/api/openstack/compute/contrib/volumes.py index 3fc5032..42a9f76 100644 --- a/nova/api/openstack/compute/contrib/volumes.py +++ b/nova/api/openstack/compute/contrib/volumes.py @@ -17,7 +17,6 @@ import webob from webob import exc -from xml.dom import minidom from nova.api.openstack import common from nova.api.openstack import extensions @@ -161,7 +160,7 @@ class CreateDeserializer(CommonDeserializer): def default(self, string): """Deserialize an xml-formatted volume create request.""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) vol = self._extract_volume(dom) return {'body': {'volume': vol}} diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index 88a5200..af14e5a 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -21,7 +21,6 @@ import socket import webob from webob import exc -from xml.dom import minidom from nova.api.openstack import common from nova.api.openstack.compute import ips @@ -312,7 +311,7 @@ class ActionDeserializer(CommonDeserializer): """ def default(self, string): - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) action_node = dom.childNodes[0] action_name = action_node.tagName @@ -419,7 +418,7 @@ class CreateDeserializer(CommonDeserializer): def default(self, string): """Deserialize an xml-formatted server create request.""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) server = self._extract_server(dom) return {'body': {'server': server}} diff --git a/nova/api/openstack/wsgi.py b/nova/api/openstack/wsgi.py index f68eff2..8c77f1c 100644 --- a/nova/api/openstack/wsgi.py +++ b/nova/api/openstack/wsgi.py @@ -27,6 +27,7 @@ import webob from nova import exception from nova.openstack.common import jsonutils from nova.openstack.common import log as logging +from nova import utils from nova import wsgi @@ -217,7 +218,7 @@ class XMLDeserializer(TextDeserializer): plurals = set(self.metadata.get('plurals', {})) try: - node = minidom.parseString(datastring).childNodes[0] + node = utils.safe_minidom_parse_string(datastring).childNodes[0] return {node.nodeName: self._from_xml_node(node, plurals)} except expat.ExpatError: msg = _("cannot understand XML") @@ -268,11 +269,11 @@ class XMLDeserializer(TextDeserializer): def extract_text(self, node): """Get the text field contained by the given node.""" - if len(node.childNodes) == 1: - child = node.childNodes[0] + ret_val = "" + for child in node.childNodes: if child.nodeType == child.TEXT_NODE: - return child.nodeValue - return "" + ret_val += child.nodeValue + return ret_val def extract_elements(self, node): """Get only Element type childs from node.""" @@ -633,7 +634,7 @@ def action_peek_json(body): def action_peek_xml(body): """Determine action to invoke.""" - dom = minidom.parseString(body) + dom = utils.safe_minidom_parse_string(body) action_node = dom.childNodes[0] return action_node.tagName diff --git a/nova/tests/test_utils.py b/nova/tests/test_utils.py index 84d56ca..8cab7df 100644 --- a/nova/tests/test_utils.py +++ b/nova/tests/test_utils.py @@ -448,6 +448,39 @@ class GenericUtilsTestCase(test.TestCase): self.assertEqual(fake_execute.uid, 2) self.assertEqual(fake_execute.uid, os.getuid()) + def test_safe_parse_xml(self): + + normal_body = (""" + + + hey + there + + """).strip() + + def killer_body(): + return ((""" + + ]> + + + %(d)s + + """) % { + 'a': 'A' * 10, + 'b': '&a;' * 10, + 'c': '&b;' * 10, + 'd': '&c;' * 9999, + }).strip() + + dom = utils.safe_minidom_parse_string(normal_body) + self.assertEqual(normal_body, str(dom.toxml())) + + self.assertRaises(ValueError, + utils.safe_minidom_parse_string, + killer_body()) + def test_xhtml_escape(self): self.assertEqual('"foo"', utils.xhtml_escape('"foo"')) self.assertEqual(''foo'', utils.xhtml_escape("'foo'")) diff --git a/nova/utils.py b/nova/utils.py index 83bf555..5c8b8cb 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -36,6 +36,10 @@ import struct import sys import tempfile import time +from xml.dom import minidom +from xml.parsers import expat +from xml import sax +from xml.sax import expatreader from xml.sax import saxutils from eventlet import event @@ -649,6 +653,46 @@ class DynamicLoopingCall(LoopingCallBase): return self.done +class ProtectedExpatParser(expatreader.ExpatParser): + """An expat parser which disables DTD's and entities by default.""" + + def __init__(self, forbid_dtd=True, forbid_entities=True, + *args, **kwargs): + # Python 2.x old style class + expatreader.ExpatParser.__init__(self, *args, **kwargs) + self.forbid_dtd = forbid_dtd + self.forbid_entities = forbid_entities + + def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): + raise ValueError("Inline DTD forbidden") + + def entity_decl(self, entityName, is_parameter_entity, value, base, + systemId, publicId, notationName): + raise ValueError(" forbidden") + + def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): + # expat 1.2 + raise ValueError(" forbidden") + + def reset(self): + expatreader.ExpatParser.reset(self) + if self.forbid_dtd: + self._parser.StartDoctypeDeclHandler = self.start_doctype_decl + if self.forbid_entities: + self._parser.EntityDeclHandler = self.entity_decl + self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl + + +def safe_minidom_parse_string(xml_string): + """Parse an XML string using minidom safely. + + """ + try: + return minidom.parseString(xml_string, parser=ProtectedExpatParser()) + except sax.SAXParseException as se: + raise expat.ExpatError() + + def xhtml_escape(value): """Escapes a string so it is valid within XML or XHTML. -- 1.8.1