From 53c38ca4e76b75ffc4b9a48c807298c74dab69cd Mon Sep 17 00:00:00 2001 From: Dan Prince Date: Sat, 2 Feb 2013 14:32:12 -0500 Subject: [PATCH] Add a safe_minidom_parse_string function. Adds a new utils.safe_minidom_parse_string function and updates external API facing Nova modules to use it. This ensures we have safe defaults on our incoming API XML parsing. Internally safe_minidom_parse_string uses a ProtectedExpatParser class to disable DTDs and entities from being parsed when using minidom. Fixes LP Bug #1100282 for Essex. Change-Id: I815b27ff2845293e3d6771ff8f99944ec08ccbd5 --- nova/api/openstack/common.py | 10 ++--- nova/api/openstack/compute/contrib/hosts.py | 4 +- .../openstack/compute/contrib/security_groups.py | 5 +-- nova/api/openstack/compute/servers.py | 5 +-- nova/api/openstack/wsgi.py | 12 +++--- nova/tests/test_utils.py | 35 +++++++++++++++++ nova/utils.py | 44 ++++++++++++++++++++++ 7 files changed, 96 insertions(+), 19 deletions(-) diff --git a/nova/api/openstack/common.py b/nova/api/openstack/common.py index 8e3daaa..87eccc5 100644 --- a/nova/api/openstack/common.py +++ b/nova/api/openstack/common.py @@ -21,7 +21,6 @@ import re import urlparse import webob -from xml.dom import minidom from nova.api.openstack import wsgi from nova.api.openstack import xmlutil @@ -31,6 +30,7 @@ from nova.compute import utils as compute_utils from nova import flags from nova import log as logging from nova import quota +from nova import utils LOG = logging.getLogger(__name__) @@ -338,7 +338,7 @@ def raise_http_conflict_for_instance_invalid_state(exc, action): class MetadataDeserializer(wsgi.MetadataXMLDeserializer): def deserialize(self, text): - dom = minidom.parseString(text) + dom = utils.safe_minidom_parse_string(text) metadata_node = self.find_first_child_named(dom, "metadata") metadata = self.extract_metadata(metadata_node) return {'body': {'metadata': metadata}} @@ -346,7 +346,7 @@ class MetadataDeserializer(wsgi.MetadataXMLDeserializer): class MetaItemDeserializer(wsgi.MetadataXMLDeserializer): def deserialize(self, text): - dom = minidom.parseString(text) + dom = utils.safe_minidom_parse_string(text) metadata_item = self.extract_metadata(dom) return {'body': {'meta': metadata_item}} @@ -364,7 +364,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer): return metadata def _extract_metadata_container(self, datastring): - dom = minidom.parseString(datastring) + dom = utils.safe_minidom_parse_string(datastring) metadata_node = self.find_first_child_named(dom, "metadata") metadata = self.extract_metadata(metadata_node) return {'body': {'metadata': metadata}} @@ -376,7 +376,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer): return self._extract_metadata_container(datastring) def update(self, datastring): - dom = minidom.parseString(datastring) + dom = utils.safe_minidom_parse_string(datastring) metadata_item = self.extract_metadata(dom) return {'body': {'meta': metadata_item}} diff --git a/nova/api/openstack/compute/contrib/hosts.py b/nova/api/openstack/compute/contrib/hosts.py index 202c8ca..2918733 100644 --- a/nova/api/openstack/compute/contrib/hosts.py +++ b/nova/api/openstack/compute/contrib/hosts.py @@ -16,7 +16,6 @@ """The hosts admin extension.""" import webob.exc -from xml.dom import minidom from xml.parsers import expat from nova.api.openstack import wsgi @@ -27,6 +26,7 @@ from nova import db from nova import exception from nova import flags from nova import log as logging +from nova import utils LOG = logging.getLogger(__name__) @@ -80,7 +80,7 @@ class HostShowTemplate(xmlutil.TemplateBuilder): class HostDeserializer(wsgi.XMLDeserializer): def default(self, string): try: - node = minidom.parseString(string) + node = utils.safe_minidom_parse_string(string) except expat.ExpatError: msg = _("cannot understand XML") raise exception.MalformedRequestBody(reason=msg) diff --git a/nova/api/openstack/compute/contrib/security_groups.py b/nova/api/openstack/compute/contrib/security_groups.py index a9368c5..4078c54 100644 --- a/nova/api/openstack/compute/contrib/security_groups.py +++ b/nova/api/openstack/compute/contrib/security_groups.py @@ -17,7 +17,6 @@ """The security groups extension.""" import urllib -from xml.dom import minidom from webob import exc import webob @@ -112,7 +111,7 @@ class SecurityGroupXMLDeserializer(wsgi.MetadataXMLDeserializer): """ def default(self, string): """Deserialize an xml-formatted security group create request""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) security_group = {} sg_node = self.find_first_child_named(dom, 'security_group') @@ -133,7 +132,7 @@ class SecurityGroupRulesXMLDeserializer(wsgi.MetadataXMLDeserializer): def default(self, string): """Deserialize an xml-formatted security group create request""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) security_group_rule = self._extract_security_group_rule(dom) return {'body': {'security_group_rule': security_group_rule}} diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index f23a2a3..49f3cec 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -17,7 +17,6 @@ import base64 import os import socket -from xml.dom import minidom from webob import exc import webob @@ -240,7 +239,7 @@ class ActionDeserializer(CommonDeserializer): """ def default(self, string): - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) action_node = dom.childNodes[0] action_name = action_node.tagName @@ -336,7 +335,7 @@ class CreateDeserializer(CommonDeserializer): def default(self, string): """Deserialize an xml-formatted server create request""" - dom = minidom.parseString(string) + dom = utils.safe_minidom_parse_string(string) server = self._extract_server(dom) return {'body': {'server': server}} diff --git a/nova/api/openstack/wsgi.py b/nova/api/openstack/wsgi.py index f654252..3ec9e36 100644 --- a/nova/api/openstack/wsgi.py +++ b/nova/api/openstack/wsgi.py @@ -153,7 +153,7 @@ class XMLDeserializer(TextDeserializer): plurals = set(self.metadata.get('plurals', {})) try: - node = minidom.parseString(datastring).childNodes[0] + node = utils.safe_minidom_parse_string(datastring).childNodes[0] return {node.nodeName: self._from_xml_node(node, plurals)} except expat.ExpatError: msg = _("cannot understand XML") @@ -195,11 +195,11 @@ class XMLDeserializer(TextDeserializer): def extract_text(self, node): """Get the text field contained by the given node""" - if len(node.childNodes) == 1: - child = node.childNodes[0] + ret_val = "" + for child in node.childNodes: if child.nodeType == child.TEXT_NODE: - return child.nodeValue - return "" + ret_val += child.nodeValue + return ret_val def find_attribute_or_element(self, parent, name): """Get an attribute value; fallback to an element if not found""" @@ -550,7 +550,7 @@ def action_peek_json(body): def action_peek_xml(body): """Determine action to invoke.""" - dom = minidom.parseString(body) + dom = utils.safe_minidom_parse_string(body) action_node = dom.childNodes[0] return action_node.tagName diff --git a/nova/tests/test_utils.py b/nova/tests/test_utils.py index 5babf4c..3d2bdd2 100644 --- a/nova/tests/test_utils.py +++ b/nova/tests/test_utils.py @@ -718,6 +718,41 @@ class DeprecationTest(test.TestCase): result = utils.service_is_up(service) self.assertFalse(result) + + def test_safe_parse_xml(self): + + normal_body = (""" + + + hey + there + + """).strip() + + def killer_body(): + return ((""" + + ]> + + + %(d)s + + """) % { + 'a': 'A' * 10, + 'b': '&a;' * 10, + 'c': '&b;' * 10, + 'd': '&c;' * 9999, + }).strip() + + dom = utils.safe_minidom_parse_string(normal_body) + self.assertEqual(normal_body, str(dom.toxml())) + + self.assertRaises(ValueError, + utils.safe_minidom_parse_string, + killer_body()) + + def test_xhtml_escape(self): self.assertEqual('"foo"', utils.xhtml_escape('"foo"')) self.assertEqual(''foo'', utils.xhtml_escape("'foo'")) diff --git a/nova/utils.py b/nova/utils.py index 6535b06..306740f 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -42,6 +42,10 @@ import time import types import uuid import warnings +from xml.dom import minidom +from xml.parsers import expat +from xml import sax +from xml.sax import expatreader from xml.sax import saxutils from eventlet import corolocal @@ -723,6 +727,46 @@ class LoopingCall(object): return self.done.wait() +class ProtectedExpatParser(expatreader.ExpatParser): + """An expat parser which disables DTD's and entities by default.""" + + def __init__(self, forbid_dtd=True, forbid_entities=True, + *args, **kwargs): + # Python 2.x old style class + expatreader.ExpatParser.__init__(self, *args, **kwargs) + self.forbid_dtd = forbid_dtd + self.forbid_entities = forbid_entities + + def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): + raise ValueError("Inline DTD forbidden") + + def entity_decl(self, entityName, is_parameter_entity, value, base, + systemId, publicId, notationName): + raise ValueError(" forbidden") + + def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): + # expat 1.2 + raise ValueError(" forbidden") + + def reset(self): + expatreader.ExpatParser.reset(self) + if self.forbid_dtd: + self._parser.StartDoctypeDeclHandler = self.start_doctype_decl + if self.forbid_entities: + self._parser.EntityDeclHandler = self.entity_decl + self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl + + +def safe_minidom_parse_string(xml_string): + """Parse an XML string using minidom safely. + + """ + try: + return minidom.parseString(xml_string, parser=ProtectedExpatParser()) + except sax.SAXParseException as se: + raise expat.ExpatError() + + def xhtml_escape(value): """Escapes a string so it is valid within XML or XHTML. -- 1.8.1