--- CookHeaders.py.orig Tue Apr 1 06:49:42 2003 +++ CookHeaders.py Wed Apr 30 13:33:02 2003 @@ -15,6 +15,7 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """Cook a message's Subject header. + (sequence version) """ from __future__ import nested_scopes @@ -29,6 +30,7 @@ from Mailman import Utils from Mailman.i18n import _ from Mailman.Logging.Syslog import syslog +from Mailman.Handlers.ToDigest import oneline CONTINUATION = ',\n\t' COMMASPACE = ', ' @@ -72,7 +74,12 @@ # VirginRunner sets _fasttrack for internally crafted messages. fasttrack = msgdata.get('_fasttrack') if not msgdata.get('isdigest') and not fasttrack: - prefix_subject(mlist, msg, msgdata) + try: + prefix_subject(mlist, msg, msgdata) + except (UnicodeError, ValueError): + # TK: Sometimes subject header is not MIME encoded for 8bit + # simply abort prefixing. + pass # Mark message so we know we've been here, but leave any existing # X-BeenThere's intact. msg['X-BeenThere'] = mlist.GetListEmail() @@ -218,7 +225,9 @@ # Add the subject prefix unless the message is a digest or is being fast # tracked (e.g. internally crafted, delivered to a single user such as the # list admin). - prefix = mlist.subject_prefix + prefix = mlist.subject_prefix.strip() + if not prefix: + return subject = msg.get('subject', '') # Try to figure out what the continuation_ws is for the header if isinstance(subject, Header): @@ -229,35 +238,68 @@ if len(lines) > 1 and lines[1] and lines[1][0] in ' \t': ws = lines[1][0] msgdata['origsubj'] = subject - if not subject: - subject = _('(no subject)') # The header may be multilingual; decode it from base64/quopri and search # each chunk for the prefix. BAW: Note that if the prefix contains spaces # and each word of the prefix is encoded in a different chunk in the # header, we won't find it. I think in practice that's unlikely though. - headerbits = decode_header(subject) - if prefix and subject: - pattern = re.escape(prefix.strip()) - for decodedsubj, charset in headerbits: - if re.search(pattern, decodedsubj, re.IGNORECASE): - # The subject's already got the prefix, so don't change it - return - del msg['subject'] + # TK: We assume the subject is mono-lingual in sequence version + # and search first non-None charset + cmatch = re.search(r'=\?([^\?]+)\?[bq]\?[^\?]+\?=', subject, re.I) + if cmatch: + cset = cmatch.group(1).lower() + else: + cset = 'us-ascii' + subject = oneline(subject, cset) + # Note: searching prefix in subject is REMOVED. (seq version) + # If the subject_prefix contains '%d', it is replaced with the + # mailing list sequential number. Also, if the prefix is closed with + # [],(), or {}, the prefix in the responding post subject will be cared. + # sequential number format allows '%05d' like pattern. + p = re.compile('%\d*d') + if p.search(prefix,1): + # prefix have number, so we should search prefix w/number + # in subject. + prefix_pattern = p.sub(r'\s*\d+\s*', prefix) + else: + prefix_pattern = prefix + prefix_pattern = re.sub('([\[\(\{])', '\\\\\g<1>', prefix_pattern) + subject = re.sub(prefix_pattern, '', subject) + subject = re.compile('(RE:\s*)+', re.I).sub('Re: ', subject, 1) + # At this point, subject may become null if someone post mail with + # subject: [subject prefix] + if subject.strip() == '': + subject = _('(no subject)') + cset = Utils.GetCharSet(mlist.preferred_language) + # and substitute %d in prefix with post_id + try: + prefix = prefix % mlist.post_id + except TypeError: + pass + # If charset is 'us-ascii', try to concatnate as string because there + # is some weirdness in Header module (TK) + if cset == 'us-ascii': + try: + h = prefix + ' ' + subject + if type(h) == UnicodeType: + h = h.encode('us-ascii') + else: + h = unicode(h, 'us-ascii').encode('us-ascii') + del msg['subject'] + msg['Subject'] = h + return + except UnicodeError: + pass # Get the header as a Header instance, with proper unicode conversion h = uheader(mlist, prefix, 'Subject', continuation_ws=ws) - for s, c in headerbits: - # Once again, convert the string to unicode. - if c is None: - c = Charset('iso-8859-1') - if not isinstance(c, Charset): - c = Charset(c) - if not _isunicode(s): - codec = c.input_codec or 'ascii' - try: - s = unicode(s, codec, 'replace') - except LookupError: - # Unknown codec, is this default reasonable? - s = unicode(s, Utils.GetCharSet(mlist.preferred_language), - 'replace') - h.append(s, c) + # in seq version, subject header is already concatnated + if not _isunicode(subject): + try: + subject = unicode(subject, cset, 'replace') + except (LookupError, TypeError): + # unknown codec + cset = Utils.GetCharSet(mlist.preferred_language) + subject = unicode(subject, cset, 'replace') + subject = subject.encode(cset,'replace') + h.append(subject, cset) + del msg['subject'] msg['Subject'] = h