From 2ae74f8222058e475350458ca0c820adb910582c Mon Sep 17 00:00:00 2001 From: Dan Prince Date: Sat, 2 Feb 2013 11:34:25 -0500 Subject: [PATCH] Add a safe_minidom_parse_string function. Adds a new utils.safe_minidom_parse_string function and updates external API facing Nova modules to use it. This ensures we have safe defaults on our incoming API XML parsing. Internally safe_minidom_parse_string uses a ProtectedExpatParser class to disable DTDs and entities from being parsed when using minidom. Fixes LP Bug #1100282 for Folsom. Change-Id: I6a4051b5e66f3ce5a330b2589c42e6e9e5b9268e --- nova/api/openstack/common.py | 10 ++--- nova/api/openstack/compute/contrib/hosts.py | 4 +- .../openstack/compute/contrib/security_groups.py | 7 ++-- nova/api/openstack/compute/contrib/volumes.py | 3 +- nova/api/openstack/compute/servers.py | 5 +-- .../api/openstack/volume/contrib/volume_actions.py | 4 +- nova/api/openstack/volume/volumes.py | 3 +- nova/api/openstack/wsgi.py | 13 ++++--- nova/tests/test_utils.py | 33 ++++++++++++++++ nova/utils.py | 44 ++++++++++++++++++++++ 10 files changed, 100 insertions(+), 26 deletions(-) diff --git a/nova/api/openstack/common.py b/nova/api/openstack/common.py index ccc70cd..7fe3a50 100644 --- a/nova/api/openstack/common.py +++ b/nova/api/openstack/common.py @@ -21,7 +21,6 @@ import re import urlparse import webob -from xml.dom import minidom from nova.api.openstack import wsgi from nova.api.openstack import xmlutil @@ -32,6 +31,7 @@ from nova import exception from nova import flags from nova.openstack.common import log as logging from nova import quota +from nova import utils LOG = logging.getLogger(__name__) @@ -341,7 +341,7 @@ def raise_http_conflict_for_instance_invalid_state(exc, action): class MetadataDeserializer(wsgi.MetadataXMLDeserializer): def deserialize(self, text): - dom = minidom.parseString(text) + dom = utils.safe_minidom_parse_string(text) metadata_node = self.find_first_child_named(dom, "metadata") metadata = self.extract_metadata(metadata_node) return {'body': {'metadata': metadata}} @@ -349,7 +349,7 @@ class MetadataDeserializer(wsgi.MetadataXMLDeserializer): class MetaItemDeserializer(wsgi.MetadataXMLDeserializer): def deserialize(self, text): - dom = minidom.parseString(text) + dom = utils.safe_minidom_parse_string(text) metadata_item = self.extract_metadata(dom) return {'body': {'meta': metadata_item}} @@ -367,7 +367,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer): return metadata def _extract_metadata_container(self, datastring): - dom = minidom.parseString(datastring) + dom = utils.safe_minidom_parse_string(datastring) metadata_node = self.find_first_child_named(dom, "metadata") metadata = self.extract_metadata(metadata_node) return {'body': {'metadata': metadata}} @@ -379,7 +379,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer): return self._extract_metadata_container(datastring) def update(self, datastring): - dom = minidom.parseString(datastring) + dom = utils.safe_minidom_parse_string(datastring) metadata_item = self.extract_metadata(dom) return {'body': {'meta': metadata_item}} diff --git a/nova/api/openstack/compute/contrib/hosts.py b/nova/api/openstack/compute/contrib/hosts.py index 95a80f3..fa3c961 100644 --- a/nova/api/openstack/compute/contrib/hosts.py +++ b/nova/api/openstack/compute/contrib/hosts.py @@ -16,7 +16,6 @@ """The hosts admin extension.""" import webob.exc -from xml.dom import minidom from xml.parsers import expat from nova.api.openstack import extensions @@ -27,6 +26,7 @@ from nova import db from nova import exception from nova import flags from nova.openstack.common import log as logging +from nova import utils LOG = logging.getLogger(__name__) @@ -80,7 +80,7 @@ class HostShowTemplate(xmlutil.TemplateBuilder): class HostDeserializer(wsgi.XMLDeserializer): def default(self, string): try: - node = minidom.parseString(string) + node = utils.safe_minidom_parse_string(string) except expat.ExpatError: msg = _("cannot understand XML") raise exception.MalformedRequestBody(reason=msg) diff --git a/nova/api/openstack/compute/contrib/security_groups.py b/nova/api/openstack/compute/contrib/security_groups.py index e5b1797..4a6be1e 100644 --- a/nova/api/openstack/compute/contrib/security_groups.py +++ b/nova/api/openstack/compute/contrib/security_groups.py @@ -16,8 +16,6 @@ """The security groups extension.""" -from xml.dom import minidom - import webob from webob import exc @@ -30,6 +28,7 @@ from nova import db from nova import exception from nova import flags from nova.openstack.common import log as logging +from nova import utils LOG = logging.getLogger(__name__) @@ -110,7 +109,7 @@ class SecurityGroupXMLDeserializer(wsgi.MetadataXMLDeserializer): """ def default(self, string): """Deserialize an xml-formatted security group create request""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) security_group = {} sg_node = self.find_first_child_named(dom, 'security_group') @@ -131,7 +130,7 @@ class SecurityGroupRulesXMLDeserializer(wsgi.MetadataXMLDeserializer): def default(self, string): """Deserialize an xml-formatted security group create request""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) security_group_rule = self._extract_security_group_rule(dom) return {'body': {'security_group_rule': security_group_rule}} diff --git a/nova/api/openstack/compute/contrib/volumes.py b/nova/api/openstack/compute/contrib/volumes.py index 9940e30..9f1fa62 100644 --- a/nova/api/openstack/compute/contrib/volumes.py +++ b/nova/api/openstack/compute/contrib/volumes.py @@ -17,7 +17,6 @@ import webob from webob import exc -from xml.dom import minidom from nova.api.openstack import common from nova.api.openstack import extensions @@ -155,7 +154,7 @@ class CreateDeserializer(CommonDeserializer): def default(self, string): """Deserialize an xml-formatted volume create request.""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) volume = self._extract_volume(dom) return {'body': {'volume': volume}} diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index 3a14b4f..a093266 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -21,7 +21,6 @@ import socket import webob from webob import exc -from xml.dom import minidom from nova.api.openstack import common from nova.api.openstack.compute import ips @@ -297,7 +296,7 @@ class ActionDeserializer(CommonDeserializer): """ def default(self, string): - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) action_node = dom.childNodes[0] action_name = action_node.tagName @@ -404,7 +403,7 @@ class CreateDeserializer(CommonDeserializer): def default(self, string): """Deserialize an xml-formatted server create request.""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) server = self._extract_server(dom) return {'body': {'server': server}} diff --git a/nova/api/openstack/volume/contrib/volume_actions.py b/nova/api/openstack/volume/contrib/volume_actions.py index 8a453bf..7e47065 100644 --- a/nova/api/openstack/volume/contrib/volume_actions.py +++ b/nova/api/openstack/volume/contrib/volume_actions.py @@ -13,7 +13,6 @@ # under the License. import webob -from xml.dom import minidom from nova.api.openstack import extensions from nova.api.openstack import wsgi @@ -22,6 +21,7 @@ from nova import exception from nova import flags from nova.openstack.common import log as logging from nova.openstack.common.rpc import common as rpc_common +from nova import utils from nova import volume @@ -54,7 +54,7 @@ class VolumeToImageSerializer(xmlutil.TemplateBuilder): class VolumeToImageDeserializer(wsgi.XMLDeserializer): """Deserializer to handle xml-formatted requests""" def default(self, string): - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) action_node = dom.childNodes[0] action_name = action_node.tagName diff --git a/nova/api/openstack/volume/volumes.py b/nova/api/openstack/volume/volumes.py index 6cc4af8..ef33f92 100644 --- a/nova/api/openstack/volume/volumes.py +++ b/nova/api/openstack/volume/volumes.py @@ -17,7 +17,6 @@ import webob from webob import exc -from xml.dom import minidom from nova.api.openstack import common from nova.api.openstack import wsgi @@ -191,7 +190,7 @@ class CreateDeserializer(CommonDeserializer): def default(self, string): """Deserialize an xml-formatted volume create request.""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) volume = self._extract_volume(dom) return {'body': {'volume': volume}} diff --git a/nova/api/openstack/wsgi.py b/nova/api/openstack/wsgi.py index 1ff7745..6ab4f63 100644 --- a/nova/api/openstack/wsgi.py +++ b/nova/api/openstack/wsgi.py @@ -27,6 +27,7 @@ import webob from nova import exception from nova.openstack.common import jsonutils from nova.openstack.common import log as logging +from nova import utils from nova import wsgi @@ -217,7 +218,7 @@ class XMLDeserializer(TextDeserializer): plurals = set(self.metadata.get('plurals', {})) try: - node = minidom.parseString(datastring).childNodes[0] + node = utils.safe_minidom_parse_string(datastring).childNodes[0] return {node.nodeName: self._from_xml_node(node, plurals)} except expat.ExpatError: msg = _("cannot understand XML") @@ -268,11 +269,11 @@ class XMLDeserializer(TextDeserializer): def extract_text(self, node): """Get the text field contained by the given node""" - if len(node.childNodes) == 1: - child = node.childNodes[0] + ret_val = "" + for child in node.childNodes: if child.nodeType == child.TEXT_NODE: - return child.nodeValue - return "" + ret_val += child.nodeValue + return ret_val def extract_elements(self, node): """Get only Element type childs from node""" @@ -631,7 +632,7 @@ def action_peek_json(body): def action_peek_xml(body): """Determine action to invoke.""" - dom = minidom.parseString(body) + dom = utils.safe_minidom_parse_string(body) action_node = dom.childNodes[0] return action_node.tagName diff --git a/nova/tests/test_utils.py b/nova/tests/test_utils.py index adc62d8..46e4f14 100644 --- a/nova/tests/test_utils.py +++ b/nova/tests/test_utils.py @@ -457,6 +457,39 @@ class GenericUtilsTestCase(test.TestCase): result = utils.service_is_up(service) self.assertFalse(result) + def test_safe_parse_xml(self): + + normal_body = (""" + + + hey + there + + """).strip() + + def killer_body(): + return ((""" + + ]> + + + %(d)s + + """) % { + 'a': 'A' * 10, + 'b': '&a;' * 10, + 'c': '&b;' * 10, + 'd': '&c;' * 9999, + }).strip() + + dom = utils.safe_minidom_parse_string(normal_body) + self.assertEqual(normal_body, str(dom.toxml())) + + self.assertRaises(ValueError, + utils.safe_minidom_parse_string, + killer_body()) + def test_xhtml_escape(self): self.assertEqual('"foo"', utils.xhtml_escape('"foo"')) self.assertEqual(''foo'', utils.xhtml_escape("'foo'")) diff --git a/nova/utils.py b/nova/utils.py index 5722cf5..4a605cb 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -39,6 +39,10 @@ import tempfile import time import uuid import weakref +from xml.dom import minidom +from xml.parsers import expat +from xml import sax +from xml.sax import expatreader from xml.sax import saxutils from eventlet import event @@ -567,6 +571,46 @@ class LoopingCall(object): return self.done.wait() +class ProtectedExpatParser(expatreader.ExpatParser): + """An expat parser which disables DTD's and entities by default.""" + + def __init__(self, forbid_dtd=True, forbid_entities=True, + *args, **kwargs): + # Python 2.x old style class + expatreader.ExpatParser.__init__(self, *args, **kwargs) + self.forbid_dtd = forbid_dtd + self.forbid_entities = forbid_entities + + def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): + raise ValueError("Inline DTD forbidden") + + def entity_decl(self, entityName, is_parameter_entity, value, base, + systemId, publicId, notationName): + raise ValueError(" forbidden") + + def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): + # expat 1.2 + raise ValueError(" forbidden") + + def reset(self): + expatreader.ExpatParser.reset(self) + if self.forbid_dtd: + self._parser.StartDoctypeDeclHandler = self.start_doctype_decl + if self.forbid_entities: + self._parser.EntityDeclHandler = self.entity_decl + self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl + + +def safe_minidom_parse_string(xml_string): + """Parse an XML string using minidom safely. + + """ + try: + return minidom.parseString(xml_string, parser=ProtectedExpatParser()) + except sax.SAXParseException as se: + raise expat.ExpatError() + + def xhtml_escape(value): """Escapes a string so it is valid within XML or XHTML. -- 1.8.1