Index: Mailman/Gui/Privacy.py =================================================================== RCS file: /cvsroot/mailman/mailman/Mailman/Gui/Privacy.py,v retrieving revision 2.15.2.2 diff -u -p -r2.15.2.2 Privacy.py --- Mailman/Gui/Privacy.py 1 Dec 2003 01:34:55 -0000 2.15.2.2 +++ Mailman/Gui/Privacy.py 26 Oct 2004 04:11:01 -0000 @@ -30,6 +30,11 @@ except NameError: True = 1 False = 0 +# Compatibility for Python < 2.3 +try: + from unicodedata import normalize as unicode_normalize +except ImportError: + unicode_normalize = lambda dummy, str: str class Privacy(GUIBase): @@ -467,8 +472,11 @@ class Privacy(GUIBase): continue # Make sure the pattern was a legal regular expression try: - re.compile(pattern) - except (re.error, TypeError): + upattern = unicode(pattern, + Utils.GetCharSet(mlist.preferred_language)) + upattern = unicode_normalize('NFKC', upattern) + re.compile(upattern) + except (re.error, TypeError, UnicodeError): safepattern = Utils.websafe(pattern) doc.addError(_("""The header filter rule pattern '%(safepattern)s' is not a legal regular expression. This Index: Mailman/Handlers/SpamDetect.py =================================================================== RCS file: /cvsroot/mailman/mailman/Mailman/Handlers/SpamDetect.py,v retrieving revision 2.3.2.1 diff -u -p -r2.3.2.1 SpamDetect.py --- Mailman/Handlers/SpamDetect.py 1 Dec 2003 01:49:54 -0000 2.3.2.1 +++ Mailman/Handlers/SpamDetect.py 26 Oct 2004 04:11:01 -0000 @@ -40,6 +40,17 @@ except NameError: True = 1 False = 0 +# Compatibility for Python < 2.3 +try: + from unicodedata import normalize as unicode_normalize +except ImportError: + unicode_normalize = lambda dummy, str: str + +from types import UnicodeType +from email.Errors import HeaderParseError +from email.Header import Header, make_header, decode_header +from Mailman.Utils import GetCharSet + # First, play footsie with _ so that the following are marked as translated, # but aren't actually translated until we need the text later on. def _(s): @@ -104,16 +115,47 @@ def process(mlist, msg, msgdata): # we've detected spam, so throw the message away raise SpamDetected # Now do header_filter_rules - g = HeaderGenerator(StringIO()) - g.flatten(msg) - headers = g.header_text() + #XXX g = HeaderGenerator(StringIO()) + #XXX g.flatten(msg) + #XXX headers = g.header_text() + lcset = GetCharSet(mlist.preferred_language) + hl = [] + for h, v in msg.items(): + if not v: + hl.append(u'%s:' % h) + continue + if isinstance(v, Header): + v = v.__unicode__() + else: + # If parsing header failed, assume list charset. + try: + v = make_header(decode_header(v)).__unicode__() + except (HeaderParseError, ValueError, LookupError, UnicodeError): + v = unicode(v, lcset, 'replace') + # Normalize unicode value so that 'Compatibility Characters' will + # match. + header = u'%s: %s' % (h, unicode_normalize('NFKC', v.lower())) + hl.append(header) + headers = u'\n'.join(hl) for patterns, action, empty in mlist.header_filter_rules: if action == mm_cfg.DEFER: continue for pattern in patterns.splitlines(): if pattern.startswith('#'): continue - if re.search(pattern, headers, re.IGNORECASE): + # Ignore empty line. + if not pattern.strip(): + continue + # If pattern is not unicode, assume list charset. + if not isinstance(pattern, UnicodeType): + pattern = unicode(pattern, lcset, 'replace') + # Normalize unicode pattern. + pattern = unicode_normalize('NFKC', pattern) + try: + mo = re.search(pattern, headers, re.IGNORECASE|re.UNICODE) + except (re.error, TypeError): + continue + if mo: if action == mm_cfg.DISCARD: raise Errors.DiscardMessage if action == mm_cfg.REJECT: