55,59c55,83
< def unescape(s):
<   p = htmllib.HTMLParser(None)
<   p.save_bgn()
<   p.feed(s)
<   return p.save_end()
---
> 
> import re, htmlentitydefs
> 
> ##
> # Removes HTML or XML character references and entities from a text string.
> #
> # @param text The HTML (or XML) source text.
> # @return The plain text, as a Unicode string, if necessary.
> 
> def unescape(text):
>   def fixup(m):
>     text = m.group(0)
>     if text[:2] == "&#":
>       # character reference
>       try:
>         if text[:3] == "&#x":
>           return unichr(int(text[3:-1], 16))
>         else:
>           return unichr(int(text[2:-1]))
>       except ValueError:
>         pass
>     else:
>       # named entity
>       try:
>         text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
>       except KeyError:
>         pass
>     return text # leave as is
>   return re.sub("&#?\w+;", fixup, text)