=== modified file 'loggerhead/controllers/annotate_ui.py' --- loggerhead/controllers/annotate_ui.py 2009-10-17 06:35:33 +0000 +++ loggerhead/controllers/annotate_ui.py 2011-03-24 01:24:53 +0000 @@ -17,7 +17,6 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -import cgi import os import time @@ -71,7 +70,7 @@ hl_lines = highlight(file_name, file_text, encoding) hl_lines.extend([u''] * (len(file_lines) - len(hl_lines))) else: - hl_lines = map(cgi.escape, file_lines) + hl_lines = map(util.html_escape, file_lines) change_cache = {} === modified file 'loggerhead/templatefunctions.py' --- loggerhead/templatefunctions.py 2009-10-17 08:47:38 +0000 +++ loggerhead/templatefunctions.py 2011-03-24 01:24:53 +0000 @@ -14,8 +14,8 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -import cgi import os +import urllib import pkg_resources @@ -23,6 +23,7 @@ import loggerhead from loggerhead.zptsupport import zpt +from loggerhead.util import html_format templatefunctions = {} @@ -49,16 +50,21 @@ if style == 'fragment': def file_link(filename): if currently_showing and filename == currently_showing: - return '%s' % ( - cgi.escape(filename), cgi.escape(filename)) + return html_format( + '%s', + urllib.quote(filename.encode('utf-8')), filename) else: return revision_link( - url, entry.revno, filename, '#' + filename) + url, entry.revno, filename, + '#' + urllib.quote(filename.encode('utf-8'))) else: def file_link(filename): - return '%s' % ( - url(['/revision', entry.revno]), '#' + filename, cgi.escape(filename), - cgi.escape(entry.revno), cgi.escape(filename)) + return html_format( + '' + '%s', + url(['/revision', entry.revno]), + '#' + urllib.quote(filename.encode('utf-8')), + filename, entry.revno, filename) return _pt('revisionfilechanges').expand( entry=entry, file_changes=file_changes, file_link=file_link, **templatefunctions) @@ -122,14 +128,16 @@ @templatefunc def annotate_link(url, revno, path): - return '%s' % ( - url(['/annotate', revno, path]), cgi.escape(path), cgi.escape(path)) + return html_format( + '%s', + url(['/annotate', revno, path]), path, path) + @templatefunc def revision_link(url, revno, path, frag=''): - return '%s' % ( - url(['/revision', revno, path]), frag, cgi.escape(path), - cgi.escape(revno), cgi.escape(path)) + return html_format( + '%s', + url(['/revision', revno, path]), frag, path, revno, path) @templatefunc === modified file 'loggerhead/tests/__init__.py' --- loggerhead/tests/__init__.py 2010-05-10 19:36:37 +0000 +++ loggerhead/tests/__init__.py 2011-03-24 01:24:53 +0000 @@ -22,5 +22,6 @@ 'test_corners', 'test_simple', 'test_templating', + 'test_util', ]])) return standard_tests === modified file 'loggerhead/tests/test_simple.py' --- loggerhead/tests/test_simple.py 2009-06-08 23:02:49 +0000 +++ loggerhead/tests/test_simple.py 2011-03-24 01:24:53 +0000 @@ -59,9 +59,11 @@ self.filecontents = ('some\nmultiline\ndata\n' 'with&")) + + def test_html_format(self): + self.assertEqual( + '<baz>&', + html_format( + '%s', "baz\"'", "&")) === modified file 'loggerhead/util.py' --- loggerhead/util.py 2010-04-24 12:40:17 +0000 +++ loggerhead/util.py 2011-03-24 01:24:53 +0000 @@ -20,7 +20,6 @@ # import base64 -import cgi import datetime import logging import re @@ -214,16 +213,47 @@ # only do this if unicode turns out to be a problem #_BADCHARS_RE = re.compile(ur'[\u007f-\uffff]') +# Can't be a dict; & needs to be done first. +html_entity_subs = [ + ("&", "&"), + ('"', """), + ("'", "'"), # ' is defined in XML, but not HTML. + (">", ">"), + ("<", "<"), + ] + + +def html_escape(s): + """Transform dangerous (X)HTML characters into entities. + + Like cgi.escape, except also escaping " and '. This makes it safe to use + in both attribute and element content. + + If you want to safely fill a format string with escaped values, use + html_format instead + """ + for char, repl in html_entity_subs: + s = s.replace(char, repl) + return s + + +def html_format(template, *args): + """Safely format an HTML template string, escaping the arguments. + + The template string must not be user-controlled; it will not be escaped. + """ + return template % tuple(html_escape(arg) for arg in args) + + # FIXME: get rid of this method; use fixed_width() and avoid XML(). - def html_clean(s): """ clean up a string for html display. expand any tabs, encode any html entities, and replace spaces with ' '. this is primarily for use in displaying monospace text. """ - s = cgi.escape(s.expandtabs()) + s = html_escape(s.expandtabs()) s = s.replace(' ', ' ') return s @@ -269,7 +299,7 @@ except UnicodeDecodeError: s = s.decode('iso-8859-15') - s = cgi.escape(s).expandtabs().replace(' ', NONBREAKING_SPACE) + s = html_escape(s).expandtabs().replace(' ', NONBREAKING_SPACE) return HSC.clean(s).replace('\n', '
')