Index: src/zope/pagetemplate/pagetemplatefile.py =================================================================== --- src/zope/pagetemplate/pagetemplatefile.py (revision 124430) +++ src/zope/pagetemplate/pagetemplatefile.py (working copy) @@ -23,19 +23,49 @@ import re import logging +from HTMLParser import HTMLParser, HTMLParseError + from zope.pagetemplate.pagetemplate import PageTemplate DEFAULT_ENCODING = "utf-8" -meta_pattern = re.compile( - r'\s*\s*', +meta_pattern = re.compile(r'\s*["\']?([^;]+);\s*charset=([^"\']+)', re.IGNORECASE) + def package_home(gdict): filename = gdict["__file__"] return os.path.dirname(filename) + +class FoundMetaContentTypeTag(Exception): + def __init__(self, value): + self.parameter = value + def __str__(self): + return repr(self.parameter) + + +class FindMetaContentTypeHTMLParser(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self.content_type = None + self.encoding = DEFAULT_ENCODING + + def handle_startendtag(self, tag, attrs): + if tag == "meta": + http_equiv = [a[1] for a in attrs if a[0] == "http-equiv"] + if http_equiv and http_equiv[0].lower() == "content-type": + content = [a[1] for a in attrs if a[0] == "content"] + if content: + match = meta_pattern.search(content[0]) + if match is not None: + self.content_type, self.encoding = match.groups() + raise FoundMetaContentTypeTag("Content Type Meta tag found") + + def get_params(self): + return self.content_type, self.encoding + + class PageTemplateFile(PageTemplate): "Zope wrapper for filesystem Page Template using TAL, TALES, and METAL" @@ -57,16 +87,16 @@ return path def _prepare_html(self, text): - match = meta_pattern.search(text) - if match is not None: - type_, encoding = match.groups() - # TODO: Shouldn't / stripping - # be in PageTemplate.__call__()? - text = meta_pattern.sub("", text) - else: - type_ = None - encoding = DEFAULT_ENCODING - return unicode(text, encoding), type_ + parser = FindMetaContentTypeHTMLParser() + content_type = None + encoding = DEFAULT_ENCODING + try: + parser.feed(text) + except FoundMetaContentTypeTag: + content_type, encoding = parser.get_params() + except HTMLParseError: + pass + return unicode(text, encoding), content_type def _read_file(self): __traceback_info__ = self.filename Index: src/zope/pagetemplate/tests/test_ptfile.py =================================================================== --- src/zope/pagetemplate/tests/test_ptfile.py (revision 124430) +++ src/zope/pagetemplate/tests/test_ptfile.py (working copy) @@ -161,7 +161,9 @@ self.failUnlessEqual(rendered.strip(), u"" u"\u0422\u0435\u0441\u0442" - u"") + u'' + u"") def test_xhtml(self): pt = self.get_pt( @@ -176,7 +178,9 @@ self.failUnlessEqual(rendered.strip(), u"" u"\u0422\u0435\u0441\u0442" - u"") + u'' + u"")