Index: 2_0_3.1/Mailman/Handlers/HandlerAPI.py --- 2_0_3.1/Mailman/Handlers/HandlerAPI.py Tue, 27 Mar 2001 14:03:02 -0800 dairiki (mailman/i/37_HandlerAPI 1.1 664) +++ 0.4(w)/Mailman/Handlers/HandlerAPI.py Wed, 04 Apr 2001 07:55:43 -0700 dairiki (mailman/i/37_HandlerAPI 1.2 664) @@ -56,6 +56,7 @@ # this pipeline of handler modules. LIST_PIPELINE = ['SpamDetect', 'Approve', + 'PlainText', 'Replybot', 'Hold', 'Cleanse', Index: 2_0_3.1/Mailman/Version.py --- 2_0_3.1/Mailman/Version.py Tue, 27 Mar 2001 14:03:02 -0800 dairiki (mailman/j/27_Version.py 1.1 664) +++ 0.4(w)/Mailman/Version.py Wed, 04 Apr 2001 08:38:30 -0700 dairiki (mailman/j/27_Version.py 1.3 664) @@ -36,7 +36,7 @@ (REL_LEVEL << 4) | (REL_SERIAL << 0)) # config.db schema version number -DATA_FILE_VERSION = 21 +DATA_FILE_VERSION = (21, 'plaintext_patch') # qfile/*.db schema version number QFILE_SCHEMA_VERSION = 2 Index: 2_0_3.1/Mailman/MailList.py --- 2_0_3.1/Mailman/MailList.py Tue, 27 Mar 2001 14:03:02 -0800 dairiki (mailman/j/34_MailList.p 1.1 664) +++ 0.4(w)/Mailman/MailList.py Tue, 27 Mar 2001 19:16:52 -0800 dairiki (mailman/j/34_MailList.p 1.2 664) @@ -291,6 +291,7 @@ self.dont_respond_to_post_requests = 0 self.advertised = mm_cfg.DEFAULT_LIST_ADVERTISED self.max_num_recipients = mm_cfg.DEFAULT_MAX_NUM_RECIPIENTS + self.force_plain_text = mm_cfg.DEFAULT_FORCE_PLAIN_TEXT self.max_message_size = mm_cfg.DEFAULT_MAX_MESSAGE_SIZE self.web_page_url = mm_cfg.DEFAULT_URL self.owner = [admin] @@ -541,6 +542,24 @@ " limits except routine list moderation and spam" " filters, for which notices are not sent. This" " option overrides ever sending the notice."), + + ('force_plain_text', mm_cfg.Radio, ('No', 'Yes'), 0, + 'Convert all mail to the list into plain text format?', + + "If you enable this option, all posts to the list will be" + " converted to plain text:" + "" + "

" + "Note: Currently, there are bugs in the" + " handling of non-ASCII HTML entities when the message is" + " in a charset other than iso-8859-1."), ('max_message_size', mm_cfg.Number, 7, 0, 'Maximum length in Kb of a message body. Use 0 for no limit.'), Index: 2_0_3.1/Mailman/Defaults.py.in --- 2_0_3.1/Mailman/Defaults.py.in Tue, 27 Mar 2001 14:03:02 -0800 dairiki (mailman/j/44_Defaults.p 1.1 664) +++ 0.4(w)/Mailman/Defaults.py.in Tue, 27 Mar 2001 18:40:54 -0800 dairiki (mailman/j/44_Defaults.p 1.2 664) @@ -253,6 +253,8 @@ # allowed? DEFAULT_LIST_ADVERTISED = 1 DEFAULT_MAX_NUM_RECIPIENTS = 10 +# Should mail posted to the list be coerced into plain text? +DEFAULT_FORCE_PLAIN_TEXT = 0 DEFAULT_MAX_MESSAGE_SIZE = 40 # KB # These format strings will be expanded w.r.t. the dictionary for the Index: 2_0_3.1/Mailman/richtext.py --- 2_0_3.1/Mailman/richtext.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki () +++ 0.4(w)/Mailman/richtext.py Wed, 04 Apr 2001 07:46:51 -0700 dairiki (mailman/k/3_richtext.p 1.1 664) @@ -0,0 +1,408 @@ +# Copyright (C) 20001 by Geoffrey T. Dairiki +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +"""Classes for parsing text/richtext and text/enriched content. + +This module contains two public classes, RichtextParser and EnrichedTextParser, +which can be used for parsing text. The API of these classes follows that +of HTMLParser. + +SEE ALSO + + HTMLParser + +""" + +import string +import re +from formatter import AS_IS + +class _ParserBase: + """Basic parser functionality.""" + + #This regexp is suitable for richtext + CMD = re.compile('(?P.*?)(?P)', re.I | re.S) + MAX_CMD_LEN = 43 + + def __init__(self): + self.reset() + + def reset(self): + self.buf = '' + self.stack = [] + + def close(self): + self.__parse('flush') + + def feed(self, str): + self.buf = self.buf + str + self.__parse() + + def __parse(self, flush = 0): + buf, i = self.buf, 0 + while i < len(buf): + m = self.CMD.match(buf, i) + if not m: + break + i = m.end() + if m.group('text'): + self.handle_text(m.group('text')) + self.handle_command(m.group('command')) + + if flush: + keep = 0 + else: + keep = self.MAX_CMD_LEN - 1 + end = len(buf) - keep + if i < end: + self.handle_text(buf[i:end]) + i = end + self.buf = buf[i:] + + def handle_command(self, command): + command = string.lower(command) + if command[:2] == ' horizontal rule + self.formatter.add_hor_rule() + self.end('np') # no end tag allowed + + def start_paragraph(self): + self.formatter.end_paragraph(1) + def end_paragraph(self): + self.formatter.end_paragraph(1) + + def start_signature(self): + self.formatter.add_hor_rule() + self.start_italic() + def end_signature(self): + self.formatter.end_paragraph(1) + self.end_italic() + + # Margin control + def start_indent(self): + self.formatter.push_margin('indent') + def end_indent(self): + self.formatter.pop_margin() + def start_indentright(self): + self.formatter.push_margin('indentright') + def end_indentright(self): + self.formatter.pop_margin() + + +class EnrichedTextParser(_ParserCommon): + """A parser for text/enriched. + + This class parses textual data in the text/enriched format defined + in RFC 1896. + + The API is the same as that of HTMLParser. Output is through a + formatter object. + + SEE ALSO + + HTMLParser. + + BUGS + + The following commands from RFC 1896 are ignored by this parser: + + FontFamily, Color, Lang + + Paragraph breaking/new-line handling is not quite right. + + """ + CMD = re.compile('(?P.*?)(?P|<<)', re.I | re.S) + MAX_CMD_LEN = 63 + + def __init__(self, formatter): + _ParserCommon.__init__(self, formatter) + self.nofill = 0 + self.deferred = None + self.param = None + self.pistack = [] + + def handle_command(self, command): + if command == '<<': + self.handle_text('<') + elif self.param is not None: + if string.lower(command) == '': + self.__start_deferred() + else: + self.param = self.param + command + else: + _ParserCommon.handle_command(self, command) + + def __start_deferred(self): + if self.deferred: + _ParserCommon.start(self, self.deferred) + self.deferred = None + self.param = None + + def start(self, command): + if command == 'param': + self.param = '' + else: + self.__start_deferred() + self.deferred = command + + def end(self, command): + self.__start_deferred() + _ParserCommon.end(self, command) + + _re_NLS = re.compile('(.*?)(\n+)') + + def handle_text(self, text): + if self.param is not None: + self.param = self.param + text + return + + self.__start_deferred() + + text = string.replace(text, '\r', '') + if not self.nofill: + def replace_nls(m): + match = m.group() + if len(match) > 1: + return match[1:] + else: + return ' ' + text = re.sub('[\n]+', replace_nls, text) + + i = 0 + while i < len(text): + m = self._re_NLS.match(text, i) + if not m: + self.formatter.add_flowing_data(text[i:]) + break + i = m.end() + if m.group(1): + self.formatter.add_flowing_data(m.group(1)) + self.formatter.add_literal_data(m.group(2)) + + def start_nofill(self): + self.nofill = 1 + def end_nofill(self): + self.nofill = 'nofill' in self.stack + + def start_paraindent(self): + self.formatter.end_paragraph(0) + param = string.strip(string.lower(self.param)) + if param == 'left': + self.formatter.push_margin('indent') + self.pistack.append(self.formatter.pop_margin) + elif param == 'right': + self.formatter.push_margin('rightindent') + self.pistack.append(self.formatter.pop_margin) + elif param == 'in': + self.formatter.push_style('indent') + self.pistack.append(self.formatter.pop_style) + elif param == 'out': + self.formatter.push_style('hangingindent') + self.pistack.append(self.formatter.pop_style) + else: + self.pistack.append(None) + def end_paraindent(self): + endfunc = self.pistack.pop() + if endfunc: + endfunc() + self.formatter.end_paragraph(0) + +# Some tests: +if __name__ == '__main__': + import formatter + import sys + + writer = formatter.DumbWriter(sys.stdout) + formatter = formatter.AbstractFormatter(writer) + parser = EnrichedTextParser(formatter) + + while 1: + line = sys.stdin.readline() + if not line: break + parser.feed(line) + parser.close() + Index: 2_0_3.1/Mailman/FilteringMimeWriter.py --- 2_0_3.1/Mailman/FilteringMimeWriter.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki () +++ 0.4(w)/Mailman/FilteringMimeWriter.py Wed, 04 Apr 2001 09:31:11 -0700 dairiki (mailman/k/4_FilteringM 1.2 664) @@ -0,0 +1,671 @@ +# Copyright (C) 20001 by Geoffrey T. Dairiki +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#FIXME: fix comments +#FIXME: test with no space after colon. + +"""Tools for filtering MIME messages.""" + +import string +import re +import mimetools +import rfc822 +from Mailman.pythonlib import multifile +import StringIO +import cStringIO +import MimeWriter + +def is_raw_encoding(encoding): + """Check whether any transformation is required for encoding. + + Returns true iff no transformation is required for the + MIME Transfer-Encoding given by the argument. + + """ + return not encoding or string.lower(encoding) in ('7bit', '8bit', 'binary') + +def decode(infp, outfp, encoding): + """Wrapper for mimetools.decode(). + + Mimetools.decode() fails on the raw encodings (the ones which are + not really encoded. This will work on any recognized encoding. + + """ + if is_raw_encoding(encoding): + mimetools.copybinary(infp, outfp) + else: + mimetools.decode(infp, outfp, encoding) + +def encode(infp, outfp, encoding): + """Wrapper for mimetools.encode(). + + Mimetools.encode() fails on the raw encodings (the ones which are + not really encoded. This will work on any recognized encoding""" + + if is_raw_encoding(encoding): + mimetools.copybinary(infp, outfp) + else: + mimetools.encode(infp, outfp, encoding) + +_re_NONBINARY = re.compile(r'[\x80-\xff]') + +def is_ascii(str): + """Does string contains only ASCII characters.""" + return not _re_NONBINARY.search(str) + +_re_7BITBODY = re.compile(r'(?:[^\x00\r\n\x80-\xff]{0,200}\r?\n)*' + r'[^\x00\r\n\x80-\xff]{0,200}' + r'$') + +def is_valid_7bit_data(body): + """Check whether BODY contains data suitable for '7bit' MIME encoding. + + The body of MIME messages using the '7bit' transfer-encoding is subject + to the following limitations: + o Only ASCII characters are allowed. + o No NUL ('\x00') characters are allowed. + o CR and LF can only occur as part of CR-LF pairs. (We + extend this to allow bare LF's to separate lines.) + o Lines can be a maximum of 998 characters long (not counting + terminating CR-LF). We further restrict lines to 200 + characters for the hell of it. + + """ + return _re_7BITBODY.match(body) + +_re_TEXTCHARS = re.compile(r'[\r\n\t\x20-\x7e]+') + +def looks_binary(body): + """Check whether BODY looks like binary data. + + If BODY is composed predominantly of ASCII characters, we assume + that it is textual rather than binary data. + + """ + nbin = len(_re_TEXTCHARS.sub('', body)) + return nbin > len(body) / 4 + +class Plist: + def __init__(self, plist = []): + self.plist = plist + + def __str__(self): + params = [] + for key, val in self.plist: + if re.search(r'[][()<>@,;:\\"/?=\r\n]', val): + val = '"%s"' % re.sub(r'(["\n\r])', r'\\\1', val) + params.append(';\n\t%s=%s' % (key, val)) + return string.join(params, '') + + def get(self, key, default = None): + try: + return self[key] + except IndexError: + return default + + def __len__(self): + return len(self.plist) + + def __getitem__(self, key): + key = string.lower(key) + for param, val in self.plist: + if string.lower(param) == key: + return val + raise IndexError + + def __setitem__(self, key, val): + del self[key] + self.plist.insert(0, (key, val)) + + def __delitem__(self, key): + key = string.lower(key) + def out_key (pair, key = string.lower(key)): + param, val = pair + return string.lower(param) != key + self.plist = filter(out_key, self.plist) + +def message_headers(message): + """Get a list of all headers of an rfc822.Message object. + + Returns a list of pairs, (header, value), where header is the + name of the header (case preserved, not lower-cased), and + value is the value of the header (all continuation lines + joined, leading space stripped). + + """ + rheaders, list, head = message.headers, [], '' + rheaders.reverse() + for line in rheaders: + head = line + head + if head[0] in ' \t': + continue + key, val = string.split(head, ':', 1) + list.insert(0, (key, string.lstrip(val))) + head = '' + return list + + +class DigestMessage(mimetools.Message): + """A Message with a default content-type of 'message/rfc822'. + + Subparts of 'multipart/digest's are messages of this type. + + """ + def __init__(self, fp, seekable = 1): + mimetools.Message.__init__(self, fp, seekable) + # A bit of a hack. + if self.getheader('content-type') is None: + self.typeheader = 'message/rfc822' + self.parsetype() + +def write_message(message, mimewriter): + """Write MESSAGE using MIMEWRITER. + + The MESSAGE (which should be a mimetools.Message) is written to + the MIMEWRITER (which should be a MimeWriter). + + """ + + def plist(message): + """Get MimeWriter style plist from mimetools.Message.""" + def split_param(param): + pname, pval = string.split(param, '=', 1) + return (string.lower(pname), rfc822.unquote(pval)) + + return map(split_param, message.getplist()) + + + def discards_data(fp): + """Determine whether file object ignores data written to it. + + The startbody and startmultipartbody methods of + PlaintextMimeWriter can return dummy file-like objects, which, + in reality just discard any data written to them. + + The function detects those dummy file objects, so that + optimizations can be carried out. + + """ + try: + return fp.discards_data + except AttributeError: + return 0 + + # Write the headers + for head, val in message_headers(message): + if string.lower(head) not in ('mime-version', 'content-type'): + mimewriter.addheader(head, val) + mimewriter.addheader('Mime-Version', '1.0 (Plaintext.py)') + + if message.seekable: + message.rewindbody() + + if message.getmaintype() == 'multipart': + # Multipart message + boundary = message.getparam('boundary') + if not boundary: + raise RuntimeError, 'No boundary for multipart data' + + # Coerce input file to MultiFile + if isinstance(message.fp, multifile.MultiFile): + infp = message.fp + else: + infp = multifile.MultiFile(message.fp) + infp.push(boundary) + + params = filter(lambda p: p[0] != 'boundary', plist(message)) + outfp = mimewriter.startmultipartbody(message.getsubtype(), boundary, params) + + if discards_data(outfp): + infp.pop() + else: + if message.getsubtype() == 'digest': + message_type = DigestMessage + else: + message_type = mimetools.Message + + mimetools.copybinary(infp, outfp) # copy preamble + while infp.next(): + subwriter = mimewriter.nextpart() + subpart = message_type(infp, 0) + write_message(subpart, subwriter) + mimewriter.lastpart() + infp.pop() + mimetools.copybinary(infp, outfp) # Copy trailer. + else: + # Single part message + outfp = mimewriter.startbody(message.gettype(), plist(message)) + if not discards_data(outfp): + mimetools.copybinary(message.fp, outfp) + + + mimewriter.flushheaders() + +class FilteringMimeWriter(MimeWriter.MimeWriter): + #FIXME: fix comments + """A class for filter MIME output. + + DESCRIPTION + + This is a sub-class of MimeWriter, and shares the same basic + interface. + + It is designed to be easily sub-classed to perform useful filtering + of MIME data. + + + API DIFFERENCES FROM MimeWriter + + Always call flushheaders() + + You must always call the flushheaders() method. You should + call it once, only after you've finished all output (body as + well as headers) to the PlaintextMimeWriter. + + WARNINGS + + For the output to be valid MIME you must add a Mime-Type: + header to the outer entity (this is true for the plain MimeWriter + as well.) A value of '1.0' will work fine. + + + """ + def __init__(self, fp): + MimeWriter.MimeWriter.__init__(self, fp) + self._reset() + + def _reset(self): + self.input_encoding = None + self.body = None + self.subpart = None + + def addheader(self, header, value): + lchead = string.lower(header) + + assert not self.body, 'addheader() after startbody()' + + # One shouldn't add a content-type to a regular MimeWriter, + # since the MimeWriter generates one itself. + assert lchead != 'content-type', "Attempt to add Content-Type: header to MimeWriter" + + if lchead == 'content-transfer-encoding': + self.input_encoding = string.strip(value) + elif self._keepheader(header): + MimeWriter.MimeWriter.addheader(self, header, value) + + # A list of headers to always accept. + # This list can also include header prefixes (with trailing '*') + # which will match any header beginning with that prefix. + INCLUDE_HEADERS = ('content-description',) + # A list of headers to ignore. (Same format as above.) + EXCLUDE_HEADERS = () #('content-*',) + + def _keepheader(self, header): + def _match(header, pattern_list): + for pat in pattern_list: + if pat[-1] == '*': + if header[:len(pat)-1] == pat[:-1]: + return pat + elif header == pat: + return pat + return None + + header = string.lower(header) + if _match(header, self.INCLUDE_HEADERS): + return 1 + return not _match(header, self.EXCLUDE_HEADERS) + + def flushheaders(self): + if self.is_multipart(): + if self.subpart: + self.lastpart() + else: + assert not self.subpart + if self.body: + self.body.flush() + + self._flushheaders() + + def _flushheaders(self): + body = self.body + if body: + encoding = self._get_encoding_for_body() + self._add_mime_headers(encoding = encoding) + + have_headers = self._headers + MimeWriter.MimeWriter.flushheaders(self) + + if body: + if have_headers: + self._fp.write("\n") + body.seek(0) + encode(body, self._fp, encoding) + body.close() + + self._reset() + + def _get_encoding_for_body(self): + """Get MIME transfer-encoding suitable for encoding BODY. + + The encodings returned are only suitable for textual data (since + 'binary' or 'base64' should be used for non-textual data.) + + FIXME: should probably use some heuristic to identify binary + files and use base64 on them. + """ + if self.body.is_valid_7bit_data(): + return '7bit' + if self.body.looks_binary(): + return 'base64' + return 'quoted-printable' + + def _add_mime_headers(self, encoding = 'binary'): + # Add mime headers for body + body = self.body + if body: + MimeWriter.MimeWriter.addheader(self, "Content-Type", + body.content_type + str(body.plist)) + if encoding: + MimeWriter.MimeWriter.addheader(self, "Content-Transfer-Encoding", + encoding) + + def startbody(self, ctype, plist=[], prefix=1): + assert ctype + assert not self.body, 'can only startbody once' + + plist = Plist(plist) + self.body = self._get_body(ctype, plist) + + decoder = self._get_decoder(self.input_encoding) + if decoder and not self.body.discards_data: + self.body = decoder(self.body, self.input_encoding) + + return self.body + + def _get_body(self, ctype, plist): + return MimeBody(ctype, plist, annotate = self._annotate) + + def _get_decoder(self, encoding): + if is_raw_encoding(encoding): + return None + elif string.lower(encoding) in ('uuencode', 'x-uuencode', 'uue', 'x-uue'): + return _UUDecoder + return _DefaultDecoder + + def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1): + assert subtype + assert not self.body, 'can only startmultipartbody once' + plist = Plist(plist) + plist['boundary'] = boundary + self.body = self._get_multipart_body(subtype, plist) + return self.body + + def _get_multipart_body(self, subtype, plist): + return MultipartBody(subtype, plist = plist, annotate = self._annotate) + + def nextpart(self): + assert self.is_multipart() + + if self.subpart: + self.subpart.flushheaders() # finish previous part + + self.body.add_separator() + + self.subpart = self.body.SubpartWriter(self.body) + return self.subpart + + def lastpart(self): + assert self.is_multipart() + + if self.subpart: + self.subpart.flushheaders() # finish previous part + self.subpart = None + + self.body.add_separator(final = 1) + + def is_multipart(self): + body = self.body + if not body: + return 0 + try: + body.SubpartWriter + except AttributeError: + return 0 + return 1 + + + def _annotate(self, str): + """Add a comment to the header of the message. + + A header with name 'X-Plaintext' and value STR is added to + the output message. + + This method can be called even after output to the body + has started. + + """ + from mimify import mime_encode_header + # Use mime_encode_header to allow non-ascii + # (FIXME: assumes iso-8859-1) + MimeWriter.MimeWriter.addheader(self, 'X-Plaintext', + mime_encode_header(str)) + + +class MimeBody(StringIO.StringIO): + + discards_data = 0 + + def __init__(self, ctype, plist = Plist(), annotate = None): + StringIO.StringIO.__init__(self) + self.content_type = string.lower(ctype) + self.plist = plist + if annotate: + self.annotate = annotate + + def annotate(str): + pass + + def writelines(self, list): + self.write(string.join(list, '')) + + def getvalue(self): + self.flush() + return StringIO.StringIO.getvalue(self) + + def is_valid_7bit_data(self): + return is_valid_7bit_data(self.getvalue()) + + def looks_binary(self): + return looks_binary(self.getvalue()) + + def is_ascii(self): + return is_ascii(self.getvalue()) + + def add_linebreak(self): + body = self.getvalue() + if body and body[-1] != '\n': + self.write('\n') + + def add_comment(self, str): + self.annotate(str) + self.add_linebreak() + self.write('[ %s ]\n' % str) + + +class MultipartBody(MimeBody): + def __init__(self, subtype, plist=Plist(), annotate = None): + MimeBody.__init__(self, 'multipart/' + subtype, plist = plist, annotate = annotate) + self.boundary = plist['boundary'] + + def add_separator(self, final = 0): + if final: + self.write('\n--%s--\n' % self.boundary) + else: + self.write('\n--%s\n' % self.boundary) + + class SubpartWriter(FilteringMimeWriter): + EXCLUDE_HEADERS = ('mime-version',) + FilteringMimeWriter.EXCLUDE_HEADERS + + +################################################################ +class BodyFilter: + """An file-like object which writes to the body of a FilteringMimeWriter. + + This object implements output file semantics. It appends all data + written to it to the body of the underlying FilteringMimeWriter object. + + """ + + def __init__(self, body): + self.body = body + + def __getattr__(self, attr): + # "Inherit" methods from underlying body. + self.__dict__[attr] = getattr(self.body, attr) + return self.__dict__[attr] + + def output(self, str): + self.body.write(str) + + # The following methods should be overloaded + def write(self, str): + self.output(str) + + def writelines(self, lines): + self.write(string.join(lines, '')) + + def flush(self): + self.flushoutput() + + def flushoutput(self): + self.body.flush() + + +class LineFilter(BodyFilter): + def __init__(self, body): + BodyFilter.__init__(self, body) + self.buf = '' + + def write(self, str): + j = string.rfind(str, '\n') + 1 + if not j: + self.buf = self.buf + str + return + self.handle_chunk(self.buf + str[:j]) + self.buf = str[j:] + + def flushinput(self): + if self.buf: + self.handle_chunk(self.buf + '\n') + self.buf = '' + + def flush(self): + self.flushinput() + self.flushoutput() + +class UUFilter(LineFilter): + def __init__(self, body): + LineFilter.__init__(self, body) + self.handle_data = self.handle_text + + SPLIT = re.compile('^(begin[ \t]+[0-7]{3,5}.*?|end)[ \t]*[\n\z]', re.M) + + def handle_chunk(self, chunk): + hunks = self.SPLIT.split(chunk) + self.handle_data(hunks.pop(0)) + assert len(hunks) % 2 == 0 + while hunks: + begin = re.match('begin\s+[0-7]{3,5}\s*(?P.*)', hunks.pop(0)) + if begin: + self.handle_begin(begin.group('name')) + self.handle_data = self.handle_uudata + else: + self.handle_end() + self.handle_data = self.handle_text + self.handle_data(hunks.pop(0)) + + def flush(self): + self.flushinput() + if self.handle_data == self.handle_uudata: + self.handle_end() + self.handle_data = self.handle_text + self.flushoutput() + + def handle_begin(self, filename): pass + def handle_end(self): pass + def handle_uudata(self, line): pass + def handle_text(self, line): pass + + + +################################################################ +class _DefaultDecoder(LineFilter): + def __init__(self, body, encoding): + LineFilter.__init__(self, body) + self.encoding = encoding + self.done = 0 + + def handle_chunk(self, chunk): + if self.done: + return + + class OutputThrough: + def __init__(self, decoder): + self.write = decoder.output + + infp = cStringIO.StringIO(chunk) + outfp = OutputThrough(self) + try: + mimetools.decode(infp, outfp, self.encoding) + except ValueError, detail: + self.add_comment("%s, discarding content" % detail) + self.done = 1 + infp.close() + +class _UUDecoder(UUFilter): + def __init__(self, body, encoding): + UUFilter.__init__(self, body) + self.done = 0 + + def handle_end(self): + self.done = 1 + + def handle_uudata(self, chunk): + import binascii + if not self.done: + for line in string.split(chunk, '\n'): + if line: + self.output(binascii.a2b_uu(line)) + + + +################################################################ + +# Some tests: +if __name__ == '__main__': + import sys + + assert is_valid_7bit_data('abd') + assert is_valid_7bit_data('abd\r\n') + assert is_valid_7bit_data('abd\r\ndef') + assert not is_valid_7bit_data('ab\x00') + assert not is_valid_7bit_data('abd\rdef') + + pname = sys.argv.pop(0) + for file in sys.argv: + msg = mimetools.Message(open(file, 'r')) + writer = FilteringMimeWriter(sys.stdout) + write_message(msg, writer) Index: 2_0_3.1/Mailman/PlaintextMimeWriter.py --- 2_0_3.1/Mailman/PlaintextMimeWriter.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki () +++ 0.4(w)/Mailman/PlaintextMimeWriter.py Wed, 04 Apr 2001 07:45:23 -0700 dairiki (mailman/k/2_Plaintext. 1.3 664) @@ -0,0 +1,440 @@ +# Copyright (C) 20001 by Geoffrey T. Dairiki +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#FIXME: fix comments +#FIXME: test with no space after colon. + +"""Tools for converting MIME messages into plain text.""" + +import string +import re +import mimetools +import cStringIO +import FilteringMimeWriter + +################################################################ +class PlaintextMimeWriter(FilteringMimeWriter.FilteringMimeWriter): + """A class for converting MIME output to plain text. + + DESCRIPTION + + This is a sub-class of MimeWriter, and shares the same basic + interface. + + The output is written as a (single-part) MIME message of type + 'text/plain'. The following transformations are made: + + Any uuencoded files are removed from plain text content. + + HTML, richtext, and enriched text content is converted to + plain text. + + Multipart content is flattened. (All but (at most) one + subpart is discarded from 'multipart/alternative' + entities.) + + All other content is deleted (and replaced by a diagnostic + message.) + + In addition, the transfer-encoding's of the various subparts + are handled correctly. All parts are converted to a uniform + encoding --- either '7bit', if possible, or + 'quoted-printable'. + + If different parts use conflicting character sets, a warning + is generated. Currently no attempt is made to recode the + data. + + API DIFFERENCES FROM MimeWriter + + Always call flushheaders() + + You must always call the flushheaders() method. You should + call it once, only after you've finished all output (body as + well as headers) to the PlaintextMimeWriter. + + Optimization of Discarded Entity Bodies + + Startbody(), and startmultipartbody() return file-like objects + which have an extra public attribute: discards_data. If this + attribute has a true value, then you may safely omit writing + the body content (as anything you write will be ignored anyway.) + + NEW METHODS + + annotate(cmnt) + + Add a comment to the headers of the message. + + comment(cmnt) + + Add a comment to both the body and headers of the message. + + WARNINGS + + For the output to be valid MIME you must add a Mime-Type: + header to the outer entity (this is true for the plain MimeWriter + as well.) A value of '1.0' will work fine. + + BUGS + + There are some problems with the handling of character sets, + particularly those other than ISO-8859-1 (or ASCII). + Currently: + + The HTML to text converter converts entities to their + ISO-8859-1, without regard for the declared charset of the + document. + + The HTML to text converter doesn't check META tags for + charset declarations. + + PlaintextMimeWriter.annotate(str) encodes headers assuming + ISO-8859-1 data. + + I'm sure the are other problems as well. + + """ + + EXCLUDE_HEADERS = ('content-*',) + + def _get_filter(self, ctype): + ctype = string.lower(ctype) + if ctype == 'text/plain': + return _PlainTextFilter + if ctype == 'text/enriched': + return _EnrichedTextFilter + if ctype == 'text/richtext': + return _RichtextFilter + if ctype == 'text/html': + return _HTMLFilter + if ctype == 'message/rfc822': + return _RFC822Filter + return _BinaryFilter + + def _get_body(self, ctype, plist): + body = FilteringMimeWriter.MimeBody(ctype, plist, annotate = self._annotate) + body = _TrailingSpaceStripper(body) + filter = self._get_filter(ctype) + if filter: + body = filter(body) + return body + + def _get_multipart_body(self, subtype, plist): + if subtype == 'alternative': + return PlaintextAlternativeBody(subtype, plist, annotate = self._annotate) + return PlaintextMultipartBody(subtype, plist, annotate = self._annotate) + + +class PlaintextMultipartBody(FilteringMimeWriter.MultipartBody): + def __init__(self, subtype, plist, annotate): + FilteringMimeWriter.MultipartBody.__init__(self, subtype, plist, annotate) + del self.plist['boundary'] + self.content_type = 'text/plain' + self.sep = None + + def write(self, str): + if self.sep: + str = self.sep + str + self.sep = None + FilteringMimeWriter.MultipartBody.write(self, str) + + def add_separator(self, final = 0): + if final: + self._lastpart() + self.add_linebreak() + self.sep = '--\n' + + def _lastpart(self): + pass + + class SubpartWriter(PlaintextMimeWriter): + EXCLUDE_HEADERS = ('mime-version',) + PlaintextMimeWriter.EXCLUDE_HEADERS + + def _get_encoding_for_body(self): + return 'binary' + def _add_mime_headers(self, **params): + pass + + def _extract_annotations(self): + """Remove any headers generated by self.annotate(). + + Returns a list of the annotations. + + """ + annotations = [] + headers = [] + for line in self._headers: + head, val = string.split(line, ':', 1) + if head == 'X-Plaintext': + annotations.append(val) + else: + headers.append(line) + self._headers = headers + return annotations + + def _flushheaders(self): + + body = self.body + parent_body = self._fp + + # Move annotations from subpart to parent part + for note in self._extract_annotations(): + parent_body.annotate(note) + + if body: + charset = body.plist.get('charset') + parent_charset = parent_body.plist.get('charset') + + if charset and not body.is_ascii(): + if not parent_charset: + parent_body.plist['charset'] = charset + elif parent_charset != charset: + # FIXME: recode content? + parent_body.add_comment("Warning: charset mismatch '%s' != '%s'" % + (charset, parent_charset)) + + PlaintextMimeWriter._flushheaders(self) + +class PlaintextAlternativeBody(PlaintextMultipartBody): + def __init__(self, subtype, plist, annotate): + PlaintextMultipartBody.__init__(self, subtype, plist, annotate) + self.kept_part = None + + def _lastpart(self): + if self.kept_part: + self.add_comment('Picked %s from multipart/alternative' + % self.kept_part.body.content_type) + self.kept_part.real_flushheaders() + self.kept_part = None + else: + self.add_comment('Deleted binary multipart/alternative') + + def wants(self, subbody): + try: + plaintextness = subbody.plaintextness + except AttributeError: + return 0 + if not self.kept_part: + return 1 + return plaintextness > self.kept_part.body.plaintextness + + class SubpartWriter(PlaintextMultipartBody.SubpartWriter): + def real_flushheaders(self): + PlaintextMultipartBody.SubpartWriter.flushheaders(self) + + def flushheaders(self): + body = self.body + parent_body = self._fp + kept_part = parent_body.kept_part + if kept_part and kept_part is self: + return + + if body and parent_body.wants(self.body): + parent_body.kept_part = self + elif body: + body.close() + + def startbody(self, ctype, plist=[], prefix=1): + #FIXME: + parent_body = self._fp + self.body = PlaintextMultipartBody.SubpartWriter.startbody(self, ctype, + plist, prefix) + if not parent_body.wants(self.body): + self.body.close() + self.body = NullBody(ctype, plist) + return self.body + + def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1): + return NullMultipartBody(subtype, plist) + +class NullBody(FilteringMimeWriter.MimeBody): + discards_data = 1 + def write(self, str): + pass + +#FIXME: needs testing. and cleanup +class NullMultipartBody(NullBody): + def __init__(self, subtype, plist=FilteringMimeWriter.Plist(), annotate = None): + NullBody.__init__(self, 'multipart/' + subtype, plist = plist, annotate = annotate) + def add_separator(self, final = 0): + pass + class SubpartWriter(PlaintextMimeWriter): + EXCLUDE_HEADERS = ('*',) + def flushheaders(self): + self.body.close() + self._reset() + def startbody(self, ctype, plist=[], prefix=1): + return NullBody(ctype, plist) + def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1): + return NullMultipartBody(subtype, plist) + +################################################################ +class ParserFilter(FilteringMimeWriter.BodyFilter): + """A base class for filters based on parsers like HTMLParser.""" + def __init__(self, body, parser_class): + from formatter import DumbWriter, AbstractFormatter + + class OutputThrough: + def __init__(self, filter): + self.write = filter.output + FilteringMimeWriter.BodyFilter.__init__(self, body) + outfp = OutputThrough(self) + writer = DumbWriter(outfp, maxcol = 78) + self.parser = parser_class(AbstractFormatter(writer)) + + def write(self, str): + self.parser.feed(str) + + def flush(self): + self.parser.close() + self.output("\n") + self.flushoutput() + + + +class _TrailingSpaceStripper(FilteringMimeWriter.LineFilter): + def handle_chunk(self, chunk): + self.output(re.sub(r'[ \t]+(\r?\n)', r'\1', chunk)) + +class _PlainTextFilter(FilteringMimeWriter.UUFilter): + """Delete uuencoded files from body.""" + plaintextness = 10 + def __init__(self, body): + FilteringMimeWriter.UUFilter.__init__(self, body) + self.content_type = 'text/plain' + def handle_begin(self, filename): + self.add_comment("Deleted uuencoded file '%s'" % filename) + def handle_text(self, chunk): + self.output(chunk) + +class _HTMLFilter(ParserFilter): + """Convert HTML to plain text. + + This filter expects to have HTML written to it. It writes plain + text to the underlying message body. + + BUGS (FIXME:) + + Currently, all entities in the HTML are converted to ISO-8859-1 characters. + + """ + plaintextness = 0 + def __init__(self, body): + import htmllib + + class HTMLParser(htmllib.HTMLParser): + def start_div(self, attrs): + """

forces line break.""" + self.formatter.end_paragraph(0) + + ParserFilter.__init__(self, body, HTMLParser) + self.add_comment("Converted text/html to text/plain") + self.content_type = 'text/plain' + + def output(self, str): + # Convert  s to plain spaces + ParserFilter.output(self, string.replace(str, '\xa0', ' ')) + +class _EnrichedTextFilter(ParserFilter): + """Convert text/enriched to plain text.""" + plaintextness = 3 + def __init__(self, body): + from richtext import EnrichedTextParser + ParserFilter.__init__(self, body, EnrichedTextParser) + self.add_comment("Converted text/enriched to text/plain") + self.content_type = 'text/plain' + +class _RichtextFilter(ParserFilter): + """Convert text/richtext to plain text.""" + plaintextness = 1 + def __init__(self, body): + from richtext import RichtextParser + ParserFilter.__init__(self, body, RichtextParser) + self.add_comment("Converted text/richtext to text/plain") + self.content_type = 'text/plain' + +class _RFC822Filter(FilteringMimeWriter.BodyFilter): + """Filter RFC 822 message. + + FIXME: This is a bit of a hack. It would be much more efficient to + filter the headers as they're written, feed them to our mimewriter, + and pass the body straight through to the mimewriter. + + """ + plaintextness = -1 + def __init__(self, body): + FilteringMimeWriter.BodyFilter.__init__(self, body) + self.buf = cStringIO.StringIO() + self.content_type = 'text/plain' + + def write(self, str): + if self.buf: + self.buf.write(str) + + def flush(self): + class MyMimeWriter(PlaintextMimeWriter): + EXCLUDE_HEADERS = ('return-path', 'received', 'errors-to', + 'envelope-to', 'delivery-date', 'message-id', + 'x-*', 'mime-version', 'list-*' + ) + PlaintextMimeWriter.EXCLUDE_HEADERS + def _get_encoding_for_body(self): + return 'binary' + def _add_mime_headers(self, **params): + pass + + if self.buf: + self.buf.seek(0) + msg = mimetools.Message(self.buf) + mimewriter = MyMimeWriter(self.body) + FilteringMimeWriter.write_message(msg, mimewriter) + self.buf.close() + self.buf = None + self.flushoutput() + + +class _BinaryFilter(FilteringMimeWriter.BodyFilter): + """Filter binary files. + + Add content is deleted. + + """ + discards_data = 1 + + def __init__(self, body): + FilteringMimeWriter.BodyFilter.__init__(self, body) + filename = body.plist.get('name', default = 'Content') + ctype = body.content_type + self.add_comment("%s of type %s deleted" % (filename, ctype)) + self.content_type = 'text/plain' + + def write(self, str): + pass + def writelines(self, lines): + pass + + + +# Some tests: +if __name__ == '__main__': + import sys + + pname = sys.argv.pop(0) + for file in sys.argv: + msg = mimetools.Message(open(file, 'r')) + writer = PlaintextMimeWriter(sys.stdout) + FilteringMimeWriter.write_message(msg, writer) + Index: 2_0_3.1/Mailman/pythonlib/multifile.py --- 2_0_3.1/Mailman/pythonlib/multifile.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki () +++ 0.4(w)/Mailman/pythonlib/multifile.py Wed, 04 Apr 2001 07:44:58 -0700 dairiki (mailman/k/5_multifile. 1.1 664) @@ -0,0 +1,365 @@ +"""A readline()-style interface to the parts of a multipart message. + +The MultiFile class makes each part of a multipart message "feel" like +an ordinary file, as long as you use fp.readline(). Allows recursive +use, for nested multipart messages. Probably best used together +with module mimetools. + +Suggested use: + +real_fp = open(...) +fp = MultiFile(real_fp) + +'read some lines from fp' +fp.push(separator) +while 1: + 'read lines from fp until it returns an empty string' (A) + if not fp.next(): break +fp.pop() +'read remaining lines from fp until it returns an empty string' + +The latter sequence may be used recursively at (A). +It is also allowed to use multiple push()...pop() sequences. + +If seekable is given as 0, the class code will not do the bookkeeping +it normally attempts in order to make seeks relative to the beginning of the +current file part. This may be useful when using MultiFile with a non- +seekable stream object. +""" + +# 2001-04-03: Geoffrey T. Dairiki +# +# This is a re-implementation of the stock python multifile.py +# +# The main changes: +# +# 1. Efficiency: +# +# This version supports calling the read() method with an argument. +# (In many cases, I've found that reading a MultiFile line by line +# is just too slow --- remember multipart messages often contain +# large binary attachments.) +# +# This version performs reads on the underlying input stream in +# larger chunks as well, and uses a regular expression search to +# search for separator lines. +# +# 2. Buglets fixed +# +# The original version has a buglet regarding its handling of the +# newline which preceeds a separator line. According to RFC 2046, +# section 5.1.1 the newline preceeding a separator is part of the +# separator, not part of the preceeding content. The old version +# of multifile.py treats the newline as part of the content. Thus, +# it introduces a spurious empty line at the end of each content. +# +# Matching of the separators: RFC 2046, section 5.1.1 also states, +# that if the beginning of a line matches the separator, it is a +# separator. The old code ignores only trailing white space when +# looking for a separator line. This code ignores trailing anything, +# on the separator line. + + +import string +import re + +__all__ = ["MultiFile", "Error", "IllegalSeek", "UnexpectedEOF", "EndmarkMissing"] + +class Error(RuntimeError): + pass + +class IllegalSeek(Error): + def __init__(self, detail = "Illegal seek on multifile"): + Error.__init__(self, detail) + +class UnexpectedEOF(Error): + def __init__(self, detail = "Sudden EOF on multifile"): + Error.__init__(self, detail) + +class EndmarkMissing(Error): + pass + +_END_OF_STRING = re.compile(r'\Z') + +class MultiFile: + + def __init__(self, fp, seekable=1): + self.fp = fp + self.readahead = '' + self.pos = 0 # Position in underlying file + + self.seekable = seekable + if seekable: + try: + self.pos = fp.tell() + except: + self.seekable = 0 + + self.startpos = self.pos # Start of current 'file' + self.endpos = None # End of current 'file', if known + self.limit = None # min(enclosing endpos's) + self.sep = None # current boundary string + # Matches current separator, current end marker, + # or any enclosing separators or end markers: + self.mark_re = re.compile('(?=a)b') # Never matches + # Same as above, but includes trailing cruft through newline. + self.markline_re = self.mark_re + # Maximum length of a match for self.mark_re. + self.max_mark_len = 0 + self.done = 0 # End marker passed? + + self.stack = [] + + def tell(self): + if not self.seekable: + raise IllegalSeek + return self.pos - self.startpos + + def seek(self, pos, whence=0): + if not self.seekable: + raise IllegalSeek + + # Figure out where end of current file is. + if not self.endpos: + curpos = self.pos + try: + while self.read(4096): pass + finally: + self.pos = curpos + self.readahead = '' + + if whence == 1: + newpos = self.pos + pos + elif whence == 2: + newpos = self.endpos + pos + else: + newpos = self.startpos + pos + + if newpos < self.startpos: + raise ValueError, "Seek past beginning of file" + + self.pos = newpos + self.fp.seek(self.pos) + self.readahead = '' + + def __read_more(self, size = 8192): + hunk = self.fp.read(size) + if not hunk: + raise UnexpectedEOF + self.readahead = self.readahead + hunk + + def __do_read(self, size): + assert size >= 0 + while 1: + need = size - len(self.readahead) + if need <= 0: + break + self.__read_more(need) + + def __try_to_read(self, size): + try: + self.__do_read(size) + except UnexpectedEOF: + return _END_OF_STRING.search(self.readahead) + else: + return None + + def __fillbuf(self, size): + if self.endpos is not None: + # If we know where the end is, no need to search for marks. + # Just read in the data. + nleft = self.endpos - self.pos + if size >= 0: + nleft = min(size, nleft) + if nleft <= 0: + return 0 + self.__do_read(nleft) + return nleft + + if size < 0: + # Read until mark found + if self.sep is None: + self.readahead = self.readahead + self.fp.read() + eof = _END_OF_STRING.search(self.readahead) + else: + eof = self.mark_re.search(self.readahead) + while not eof: + self.__read_more() + eof = self.mark_re.search(self.readahead) + + else: + if self.limit is not None: + size = min(size, self.limit - self.pos) + # Read enough to include mark if there is one + eof = self.__try_to_read(size + self.max_mark_len) + if self.sep is not None: + eof = self.mark_re.search(self.readahead, 0, size) + + if eof: + if (self.sep is not None) and (eof.group('badmark') is not None): + raise EndmarkMissing, "Missing endmarker (sep = '%s')" % self.sep + size = eof.start() + self.endpos = self.pos + size + else: + size = min(size, len(self.readahead)) + return size + + def read(self, size = -1): + size = self.__fillbuf(size) + hunk = self.readahead[:size] + self.readahead = self.readahead[size:] + self.pos = self.pos + size + return hunk + + def readline(self): + if self.endpos is not None and self.pos >= self.endpos: + return '' + try: + while 1: + line_len = string.find(self.readahead, '\n') + 1 + if line_len: + return self.read(line_len) + self.__read_more() + except UnexpectedEOF: + if self.sep is None: + return self.read() + raise + + def readlines(self): + lines = string.split(self.read(), '\n') + last = lines.pop() + lines = map(lambda x: x+'\n', lines) + if last: + lines.append(last) + return lines + + def __at_mark(self): + self.__try_to_read(self.max_mark_len) + mark = self.mark_re.match(self.readahead) + if mark: + # Find end of marker line + try: + while 1: + mark = self.markline_re.match(self.readahead) + if mark: + break + self.__read_more(256) + except UnexpectedEOF: + mark = re.match(self.mark_re.pattern + r".*\Z", self.readahead) + assert mark + return mark + + + def next(self): + if self.done: + return 0 + + while self.read(4096): + pass + + if self.sep is None: + # no marks on stack + return 0 + + if self.pos > self.endpos: + assert self.seekable + self.pos = self.endpos + self.fp.seek(self.pos) + self.readahead = '' + mark = self.__at_mark() + assert mark + + if mark.group('badmark') is not None: + return 0 + + mark_len = mark.end() + self.readahead = self.readahead[mark_len:] + self.pos = self.pos + mark_len + self.startpos = self.pos + if mark.group('endmark') is not None: + self.endpos = self.pos + self.done = 1 + return 0 + else: + self.endpos = None + return 1 + + def push(self, sep): + if self.endpos is not None and self.pos > self.endpos: + raise Error, 'bad MultiFile.push() call' + if self.done or self.__at_mark(): + raise Error, 'bad MultiFile.push() call' + if sep is None: + raise ValueError, 'bad separator' + + self.stack.append( (self.sep, self.startpos, self.endpos, self.limit, + self.mark_re, self.markline_re, self.max_mark_len) ) + + if self.endpos is not None: + def not_None(x): + return x is not None + self.limit = min(filter(not_None, [self.endpos, self.limit])) + + self.sep = sep + self.startpos, self.endpos = self.pos, None + + self.__compute_regexps() + + def __compute_regexps(self): + def common_prefix(list): + prefix = list[0] + for item in list[1:]: + while prefix != item[:len(prefix)]: + prefix = prefix[:-1] + return prefix + + assert self.sep is not None + + mark = self.section_divider(self.sep) + endmark = self.end_marker(self.sep) + + badmarks = [] + for sep in map(lambda x:x[0], self.stack[1:]): + badmarks.append(self.end_marker(sep)) + badmarks.append(self.section_divider(sep)) + + marks = [mark, endmark] + badmarks + prefix = common_prefix(marks) + + def remove_prefix(x, p=prefix): + return x[len(p):] + + mark = re.escape(remove_prefix(mark)) + endmark = re.escape(remove_prefix(endmark)) + badmarks = string.join(map(re.escape, map(remove_prefix, badmarks)), '|') + prefix = re.escape(prefix) + + if not badmarks: + badmarks = '(?=a)b' # never matches + regexp = ( r'(?:\r?\n)?^%s(?:(?P%s)|%s|(?P%s))' % + (prefix, endmark, mark, badmarks) ) + + self.max_mark_len = max(map(len, marks)) + 2 + self.mark_re = re.compile(regexp, re.M) + self.markline_re = re.compile(regexp + r'.*\n', re.M) + + def pop(self): + try: + ( self.sep, self.startpos, self.endpos, self.limit, + self.mark_re, self.markline_re, self.max_mark_len + ) = self.stack.pop() + except IndexError: + raise Error, 'bad MultiFile.pop() call' + self.done = 0 + + def is_data(self, line): + return line[:2] <> '--' + + def section_divider(self, str): + return "--" + str + + def end_marker(self, str): + return "--" + str + "--" + +#End of multifile.py + Index: 2_0_3.1/Mailman/Handlers/PlainText.py --- 2_0_3.1/Mailman/Handlers/PlainText.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki () +++ 0.4(w)/Mailman/Handlers/PlainText.py Wed, 04 Apr 2001 09:27:33 -0700 dairiki (mailman/k/6_PlainText. 1.1 664) @@ -0,0 +1,59 @@ +# Copyright (C) 2001 by Geoffrey T. Dairiki +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +"""Force message into plain text format. +""" + + +def process(mlist, msg, msgdata): + + if not mlist.force_plain_text or msgdata.get('isdigest'): + return + + # Paranoia: + # We should never see messages with 'fasttrack' set. + # (They get delivered via DeliverToUser(), and therefore we shouldn't + # be in their pipeline.) + if msgdata.get('fasttrack'): + return + + + # Mark the message as dirty so that its text will be forced to disk next + # time it's queued. + msgdata['_dirty'] = 1 + + import mimetools + from Mailman.FilteringMimeWriter import write_message + from Mailman import PlaintextMimeWriter + from cStringIO import StringIO + + class MyMimeWriter(PlaintextMimeWriter.PlaintextMimeWriter): + # Don't include any headers in the output + # (except for the bare minimum of MIME headers) + def _keepheader(self, header): + return None + + inbuf = StringIO(str(msg)) + outbuf = StringIO() + write_message(mimetools.Message(inbuf), + MyMimeWriter(outbuf)) + inbuf.close() + outbuf.seek(0) + plaintext = mimetools.Message(outbuf) + + for hdr in plaintext.keys(): + msg[hdr] = plaintext[hdr] + msg.body = outbuf.read()