Index: 2_0_3.1/Mailman/Handlers/HandlerAPI.py
--- 2_0_3.1/Mailman/Handlers/HandlerAPI.py Tue, 27 Mar 2001 14:03:02 -0800 dairiki (mailman/i/37_HandlerAPI 1.1 664)
+++ 0.4(w)/Mailman/Handlers/HandlerAPI.py Wed, 04 Apr 2001 07:55:43 -0700 dairiki (mailman/i/37_HandlerAPI 1.2 664)
@@ -56,6 +56,7 @@
# this pipeline of handler modules.
LIST_PIPELINE = ['SpamDetect',
'Approve',
+ 'PlainText',
'Replybot',
'Hold',
'Cleanse',
Index: 2_0_3.1/Mailman/Version.py
--- 2_0_3.1/Mailman/Version.py Tue, 27 Mar 2001 14:03:02 -0800 dairiki (mailman/j/27_Version.py 1.1 664)
+++ 0.4(w)/Mailman/Version.py Wed, 04 Apr 2001 08:38:30 -0700 dairiki (mailman/j/27_Version.py 1.3 664)
@@ -36,7 +36,7 @@
(REL_LEVEL << 4) | (REL_SERIAL << 0))
# config.db schema version number
-DATA_FILE_VERSION = 21
+DATA_FILE_VERSION = (21, 'plaintext_patch')
# qfile/*.db schema version number
QFILE_SCHEMA_VERSION = 2
Index: 2_0_3.1/Mailman/MailList.py
--- 2_0_3.1/Mailman/MailList.py Tue, 27 Mar 2001 14:03:02 -0800 dairiki (mailman/j/34_MailList.p 1.1 664)
+++ 0.4(w)/Mailman/MailList.py Tue, 27 Mar 2001 19:16:52 -0800 dairiki (mailman/j/34_MailList.p 1.2 664)
@@ -291,6 +291,7 @@
self.dont_respond_to_post_requests = 0
self.advertised = mm_cfg.DEFAULT_LIST_ADVERTISED
self.max_num_recipients = mm_cfg.DEFAULT_MAX_NUM_RECIPIENTS
+ self.force_plain_text = mm_cfg.DEFAULT_FORCE_PLAIN_TEXT
self.max_message_size = mm_cfg.DEFAULT_MAX_MESSAGE_SIZE
self.web_page_url = mm_cfg.DEFAULT_URL
self.owner = [admin]
@@ -541,6 +542,24 @@
" limits except routine list moderation and spam"
" filters, for which notices are not sent. This"
" option overrides ever sending the notice."),
+
+ ('force_plain_text', mm_cfg.Radio, ('No', 'Yes'), 0,
+ 'Convert all mail to the list into plain text format?',
+
+ "If you enable this option, all posts to the list will be"
+ " converted to plain text:"
+ "
"
+ "- Any MIME content not of type 'text/html' or"
+ " 'text/plain' will be deleted."
+ "
- Any uuencoded data will be deleted."
+ "
- HTML content will be converted to plain text."
+ "
- Multipart messages will be flattened into a single"
+ " message."
+ "
"
+ ""
+ "Note: Currently, there are bugs in the"
+ " handling of non-ASCII HTML entities when the message is"
+ " in a charset other than iso-8859-1."),
('max_message_size', mm_cfg.Number, 7, 0,
'Maximum length in Kb of a message body. Use 0 for no limit.'),
Index: 2_0_3.1/Mailman/Defaults.py.in
--- 2_0_3.1/Mailman/Defaults.py.in Tue, 27 Mar 2001 14:03:02 -0800 dairiki (mailman/j/44_Defaults.p 1.1 664)
+++ 0.4(w)/Mailman/Defaults.py.in Tue, 27 Mar 2001 18:40:54 -0800 dairiki (mailman/j/44_Defaults.p 1.2 664)
@@ -253,6 +253,8 @@
# allowed?
DEFAULT_LIST_ADVERTISED = 1
DEFAULT_MAX_NUM_RECIPIENTS = 10
+# Should mail posted to the list be coerced into plain text?
+DEFAULT_FORCE_PLAIN_TEXT = 0
DEFAULT_MAX_MESSAGE_SIZE = 40 # KB
# These format strings will be expanded w.r.t. the dictionary for the
Index: 2_0_3.1/Mailman/richtext.py
--- 2_0_3.1/Mailman/richtext.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki ()
+++ 0.4(w)/Mailman/richtext.py Wed, 04 Apr 2001 07:46:51 -0700 dairiki (mailman/k/3_richtext.p 1.1 664)
@@ -0,0 +1,408 @@
+# Copyright (C) 20001 by Geoffrey T. Dairiki
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+"""Classes for parsing text/richtext and text/enriched content.
+
+This module contains two public classes, RichtextParser and EnrichedTextParser,
+which can be used for parsing text. The API of these classes follows that
+of HTMLParser.
+
+SEE ALSO
+
+ HTMLParser
+
+"""
+
+import string
+import re
+from formatter import AS_IS
+
+class _ParserBase:
+ """Basic parser functionality."""
+
+ #This regexp is suitable for richtext
+ CMD = re.compile('(?P.*?)(?P?[-a-z0-9]{1,40}>)', re.I | re.S)
+ MAX_CMD_LEN = 43
+
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.buf = ''
+ self.stack = []
+
+ def close(self):
+ self.__parse('flush')
+
+ def feed(self, str):
+ self.buf = self.buf + str
+ self.__parse()
+
+ def __parse(self, flush = 0):
+ buf, i = self.buf, 0
+ while i < len(buf):
+ m = self.CMD.match(buf, i)
+ if not m:
+ break
+ i = m.end()
+ if m.group('text'):
+ self.handle_text(m.group('text'))
+ self.handle_command(m.group('command'))
+
+ if flush:
+ keep = 0
+ else:
+ keep = self.MAX_CMD_LEN - 1
+ end = len(buf) - keep
+ if i < end:
+ self.handle_text(buf[i:end])
+ i = end
+ self.buf = buf[i:]
+
+ def handle_command(self, command):
+ command = string.lower(command)
+ if command[:2] == '':
+ self.end(command[2:-1])
+ else:
+ self.start(command[1:-1])
+
+ def start(self, command):
+ self.stack.append(command)
+ method = 'start_' + string.replace(command, '-', '_')
+ try:
+ f = getattr(self, method)
+ except AttributeError:
+ self.unknown_starttag(command)
+ else:
+ f()
+
+ def end(self, command):
+ if not command in self.stack:
+ return # Unmatched -- ignore
+ while 1:
+ c = self.stack.pop()
+ method = 'end_' + string.replace(c, '-', '_')
+
+ try:
+ f = getattr(self, method)
+ except AttributeError:
+ self.unknown_endtag(command)
+ else:
+ f()
+
+ if c == command:
+ break
+
+ def unknown_starttag(self, command):
+ pass
+ def unknown_endtag(self, command):
+ pass
+
+
+class _ParserCommon(_ParserBase):
+ """Commands comment to richtext and enriched text."""
+
+ def __init__(self, formatter):
+ _ParserBase.__init__(self)
+ self.formatter = formatter
+ self.sizestack = [ None ]
+
+ def smaller(self, size = None):
+ """Get next smaller font size."""
+ return AS_IS
+
+ def larger(self, size = None):
+ """Get next larger font size."""
+ return AS_IS
+
+ # Font control:
+ def start_bold(self):
+ self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS))
+ def end_bold(self):
+ self.formatter.pop_font()
+ def start_italic(self):
+ self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
+ def end_italic(self):
+ self.formatter.pop_font()
+ def start_fixed(self):
+ self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
+ def end_fixed(self):
+ self.formatter.pop_font()
+
+ def start_smaller(self):
+ newsize = self.smaller(self.sizestack[-1])
+ self.sizestack.append(newsize)
+ self.formatter.push_font((newsize, AS_IS, AS_IS, AS_IS))
+ def end_smaller(self):
+ self.sizestack.pop()
+ self.formatter.pop_font()
+ def start_larger(self):
+ newsize = self.larger(self.sizestack[-1])
+ self.sizestack.append(newsize)
+ self.formatter.push_font(('large', AS_IS, AS_IS, AS_IS))
+ def end_larger(self):
+ self.sizestack.pop()
+ self.formatter.pop_font()
+
+ # Text style
+ def start_underline(self):
+ self.formatter.push_style('underline')
+ def end_underline(self):
+ self.formatter.pop_style()
+
+ def start_excerpt(self):
+ self.formatter.end_paragraph(1)
+ self.start_italic()
+ self.formatter.push_margin('excerpt')
+ def end_excerpt(self):
+ self.formatter.end_paragraph(1)
+ self.end_italic()
+ self.formatter.pop_margin()
+
+ # Justification
+ def start_center(self):
+ self.formatter.push_alignment('center')
+ def end_center(self):
+ self.formatter.pop_alignment()
+ def start_flushleft(self):
+ self.formatter.push_alignment('left')
+ def end_flushleft(self):
+ self.formatter.pop_alignment()
+ def start_flushright(self):
+ self.formatter.push_alignment('right')
+ def end_flushright(self):
+ self.formatter.pop_alignment()
+ def start_flushboth(self):
+ self.formatter.push_alignment('justify')
+ def end_flushboth(self):
+ self.formatter.pop_alignment()
+
+class RichtextParser(_ParserCommon):
+ """A parser for richtext.
+
+ This class parses textual data in the text/richtext format defined
+ in RFC 1341.
+
+ The API is the same as that of HTMLParser. Output is through a
+ formatter object.
+
+ SEE ALSO
+
+ HTMLParser.
+
+ BUGS
+
+ The following richtext commands are ignored by this parser:
+
+ subscript, superscript
+ outdent, outdentright
+ samepage, heading, footing
+ iso-8859-*, us-ascii
+
+ """
+ def __init__(self, formatter):
+ _ParserCommon.__init__(self, formatter)
+ self.in_comment = 0
+
+ def start(self, command):
+ if not self.in_comment or command == 'comment':
+ _ParserCommon.start(self, command)
+
+ def end(self, command):
+ if not self.in_comment or command == 'comment':
+ _ParserCommon.end(self, command)
+
+ def handle_text(self, text):
+ if self.in_comment:
+ return
+ self.formatter.add_flowing_data(text)
+
+ def start_no_op(self):
+ pass
+ def end_no_op(self):
+ pass
+
+ def start_comment(self):
+ self.in_comment = 1
+ def end_comment(self):
+ self.in_comment = 'comment' in self.stack
+
+ def start_lt(self):
+ self.formatter.add_flowing_data('<')
+ self.end('lt') # no end tag allowed
+ def start_nl(self):
+ self.formatter.add_literal_data('\n')
+ self.end('nl') # no end tag allowed
+ def start_np(self):
+ # New page -> horizontal rule
+ self.formatter.add_hor_rule()
+ self.end('np') # no end tag allowed
+
+ def start_paragraph(self):
+ self.formatter.end_paragraph(1)
+ def end_paragraph(self):
+ self.formatter.end_paragraph(1)
+
+ def start_signature(self):
+ self.formatter.add_hor_rule()
+ self.start_italic()
+ def end_signature(self):
+ self.formatter.end_paragraph(1)
+ self.end_italic()
+
+ # Margin control
+ def start_indent(self):
+ self.formatter.push_margin('indent')
+ def end_indent(self):
+ self.formatter.pop_margin()
+ def start_indentright(self):
+ self.formatter.push_margin('indentright')
+ def end_indentright(self):
+ self.formatter.pop_margin()
+
+
+class EnrichedTextParser(_ParserCommon):
+ """A parser for text/enriched.
+
+ This class parses textual data in the text/enriched format defined
+ in RFC 1896.
+
+ The API is the same as that of HTMLParser. Output is through a
+ formatter object.
+
+ SEE ALSO
+
+ HTMLParser.
+
+ BUGS
+
+ The following commands from RFC 1896 are ignored by this parser:
+
+ FontFamily, Color, Lang
+
+ Paragraph breaking/new-line handling is not quite right.
+
+ """
+ CMD = re.compile('(?P.*?)(?P?[-a-z0-9]{1,60}>|<<)', re.I | re.S)
+ MAX_CMD_LEN = 63
+
+ def __init__(self, formatter):
+ _ParserCommon.__init__(self, formatter)
+ self.nofill = 0
+ self.deferred = None
+ self.param = None
+ self.pistack = []
+
+ def handle_command(self, command):
+ if command == '<<':
+ self.handle_text('<')
+ elif self.param is not None:
+ if string.lower(command) == '':
+ self.__start_deferred()
+ else:
+ self.param = self.param + command
+ else:
+ _ParserCommon.handle_command(self, command)
+
+ def __start_deferred(self):
+ if self.deferred:
+ _ParserCommon.start(self, self.deferred)
+ self.deferred = None
+ self.param = None
+
+ def start(self, command):
+ if command == 'param':
+ self.param = ''
+ else:
+ self.__start_deferred()
+ self.deferred = command
+
+ def end(self, command):
+ self.__start_deferred()
+ _ParserCommon.end(self, command)
+
+ _re_NLS = re.compile('(.*?)(\n+)')
+
+ def handle_text(self, text):
+ if self.param is not None:
+ self.param = self.param + text
+ return
+
+ self.__start_deferred()
+
+ text = string.replace(text, '\r', '')
+ if not self.nofill:
+ def replace_nls(m):
+ match = m.group()
+ if len(match) > 1:
+ return match[1:]
+ else:
+ return ' '
+ text = re.sub('[\n]+', replace_nls, text)
+
+ i = 0
+ while i < len(text):
+ m = self._re_NLS.match(text, i)
+ if not m:
+ self.formatter.add_flowing_data(text[i:])
+ break
+ i = m.end()
+ if m.group(1):
+ self.formatter.add_flowing_data(m.group(1))
+ self.formatter.add_literal_data(m.group(2))
+
+ def start_nofill(self):
+ self.nofill = 1
+ def end_nofill(self):
+ self.nofill = 'nofill' in self.stack
+
+ def start_paraindent(self):
+ self.formatter.end_paragraph(0)
+ param = string.strip(string.lower(self.param))
+ if param == 'left':
+ self.formatter.push_margin('indent')
+ self.pistack.append(self.formatter.pop_margin)
+ elif param == 'right':
+ self.formatter.push_margin('rightindent')
+ self.pistack.append(self.formatter.pop_margin)
+ elif param == 'in':
+ self.formatter.push_style('indent')
+ self.pistack.append(self.formatter.pop_style)
+ elif param == 'out':
+ self.formatter.push_style('hangingindent')
+ self.pistack.append(self.formatter.pop_style)
+ else:
+ self.pistack.append(None)
+ def end_paraindent(self):
+ endfunc = self.pistack.pop()
+ if endfunc:
+ endfunc()
+ self.formatter.end_paragraph(0)
+
+# Some tests:
+if __name__ == '__main__':
+ import formatter
+ import sys
+
+ writer = formatter.DumbWriter(sys.stdout)
+ formatter = formatter.AbstractFormatter(writer)
+ parser = EnrichedTextParser(formatter)
+
+ while 1:
+ line = sys.stdin.readline()
+ if not line: break
+ parser.feed(line)
+ parser.close()
+
Index: 2_0_3.1/Mailman/FilteringMimeWriter.py
--- 2_0_3.1/Mailman/FilteringMimeWriter.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki ()
+++ 0.4(w)/Mailman/FilteringMimeWriter.py Wed, 04 Apr 2001 09:31:11 -0700 dairiki (mailman/k/4_FilteringM 1.2 664)
@@ -0,0 +1,671 @@
+# Copyright (C) 20001 by Geoffrey T. Dairiki
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#FIXME: fix comments
+#FIXME: test with no space after colon.
+
+"""Tools for filtering MIME messages."""
+
+import string
+import re
+import mimetools
+import rfc822
+from Mailman.pythonlib import multifile
+import StringIO
+import cStringIO
+import MimeWriter
+
+def is_raw_encoding(encoding):
+ """Check whether any transformation is required for encoding.
+
+ Returns true iff no transformation is required for the
+ MIME Transfer-Encoding given by the argument.
+
+ """
+ return not encoding or string.lower(encoding) in ('7bit', '8bit', 'binary')
+
+def decode(infp, outfp, encoding):
+ """Wrapper for mimetools.decode().
+
+ Mimetools.decode() fails on the raw encodings (the ones which are
+ not really encoded. This will work on any recognized encoding.
+
+ """
+ if is_raw_encoding(encoding):
+ mimetools.copybinary(infp, outfp)
+ else:
+ mimetools.decode(infp, outfp, encoding)
+
+def encode(infp, outfp, encoding):
+ """Wrapper for mimetools.encode().
+
+ Mimetools.encode() fails on the raw encodings (the ones which are
+ not really encoded. This will work on any recognized encoding"""
+
+ if is_raw_encoding(encoding):
+ mimetools.copybinary(infp, outfp)
+ else:
+ mimetools.encode(infp, outfp, encoding)
+
+_re_NONBINARY = re.compile(r'[\x80-\xff]')
+
+def is_ascii(str):
+ """Does string contains only ASCII characters."""
+ return not _re_NONBINARY.search(str)
+
+_re_7BITBODY = re.compile(r'(?:[^\x00\r\n\x80-\xff]{0,200}\r?\n)*'
+ r'[^\x00\r\n\x80-\xff]{0,200}'
+ r'$')
+
+def is_valid_7bit_data(body):
+ """Check whether BODY contains data suitable for '7bit' MIME encoding.
+
+ The body of MIME messages using the '7bit' transfer-encoding is subject
+ to the following limitations:
+ o Only ASCII characters are allowed.
+ o No NUL ('\x00') characters are allowed.
+ o CR and LF can only occur as part of CR-LF pairs. (We
+ extend this to allow bare LF's to separate lines.)
+ o Lines can be a maximum of 998 characters long (not counting
+ terminating CR-LF). We further restrict lines to 200
+ characters for the hell of it.
+
+ """
+ return _re_7BITBODY.match(body)
+
+_re_TEXTCHARS = re.compile(r'[\r\n\t\x20-\x7e]+')
+
+def looks_binary(body):
+ """Check whether BODY looks like binary data.
+
+ If BODY is composed predominantly of ASCII characters, we assume
+ that it is textual rather than binary data.
+
+ """
+ nbin = len(_re_TEXTCHARS.sub('', body))
+ return nbin > len(body) / 4
+
+class Plist:
+ def __init__(self, plist = []):
+ self.plist = plist
+
+ def __str__(self):
+ params = []
+ for key, val in self.plist:
+ if re.search(r'[][()<>@,;:\\"/?=\r\n]', val):
+ val = '"%s"' % re.sub(r'(["\n\r])', r'\\\1', val)
+ params.append(';\n\t%s=%s' % (key, val))
+ return string.join(params, '')
+
+ def get(self, key, default = None):
+ try:
+ return self[key]
+ except IndexError:
+ return default
+
+ def __len__(self):
+ return len(self.plist)
+
+ def __getitem__(self, key):
+ key = string.lower(key)
+ for param, val in self.plist:
+ if string.lower(param) == key:
+ return val
+ raise IndexError
+
+ def __setitem__(self, key, val):
+ del self[key]
+ self.plist.insert(0, (key, val))
+
+ def __delitem__(self, key):
+ key = string.lower(key)
+ def out_key (pair, key = string.lower(key)):
+ param, val = pair
+ return string.lower(param) != key
+ self.plist = filter(out_key, self.plist)
+
+def message_headers(message):
+ """Get a list of all headers of an rfc822.Message object.
+
+ Returns a list of pairs, (header, value), where header is the
+ name of the header (case preserved, not lower-cased), and
+ value is the value of the header (all continuation lines
+ joined, leading space stripped).
+
+ """
+ rheaders, list, head = message.headers, [], ''
+ rheaders.reverse()
+ for line in rheaders:
+ head = line + head
+ if head[0] in ' \t':
+ continue
+ key, val = string.split(head, ':', 1)
+ list.insert(0, (key, string.lstrip(val)))
+ head = ''
+ return list
+
+
+class DigestMessage(mimetools.Message):
+ """A Message with a default content-type of 'message/rfc822'.
+
+ Subparts of 'multipart/digest's are messages of this type.
+
+ """
+ def __init__(self, fp, seekable = 1):
+ mimetools.Message.__init__(self, fp, seekable)
+ # A bit of a hack.
+ if self.getheader('content-type') is None:
+ self.typeheader = 'message/rfc822'
+ self.parsetype()
+
+def write_message(message, mimewriter):
+ """Write MESSAGE using MIMEWRITER.
+
+ The MESSAGE (which should be a mimetools.Message) is written to
+ the MIMEWRITER (which should be a MimeWriter).
+
+ """
+
+ def plist(message):
+ """Get MimeWriter style plist from mimetools.Message."""
+ def split_param(param):
+ pname, pval = string.split(param, '=', 1)
+ return (string.lower(pname), rfc822.unquote(pval))
+
+ return map(split_param, message.getplist())
+
+
+ def discards_data(fp):
+ """Determine whether file object ignores data written to it.
+
+ The startbody and startmultipartbody methods of
+ PlaintextMimeWriter can return dummy file-like objects, which,
+ in reality just discard any data written to them.
+
+ The function detects those dummy file objects, so that
+ optimizations can be carried out.
+
+ """
+ try:
+ return fp.discards_data
+ except AttributeError:
+ return 0
+
+ # Write the headers
+ for head, val in message_headers(message):
+ if string.lower(head) not in ('mime-version', 'content-type'):
+ mimewriter.addheader(head, val)
+ mimewriter.addheader('Mime-Version', '1.0 (Plaintext.py)')
+
+ if message.seekable:
+ message.rewindbody()
+
+ if message.getmaintype() == 'multipart':
+ # Multipart message
+ boundary = message.getparam('boundary')
+ if not boundary:
+ raise RuntimeError, 'No boundary for multipart data'
+
+ # Coerce input file to MultiFile
+ if isinstance(message.fp, multifile.MultiFile):
+ infp = message.fp
+ else:
+ infp = multifile.MultiFile(message.fp)
+ infp.push(boundary)
+
+ params = filter(lambda p: p[0] != 'boundary', plist(message))
+ outfp = mimewriter.startmultipartbody(message.getsubtype(), boundary, params)
+
+ if discards_data(outfp):
+ infp.pop()
+ else:
+ if message.getsubtype() == 'digest':
+ message_type = DigestMessage
+ else:
+ message_type = mimetools.Message
+
+ mimetools.copybinary(infp, outfp) # copy preamble
+ while infp.next():
+ subwriter = mimewriter.nextpart()
+ subpart = message_type(infp, 0)
+ write_message(subpart, subwriter)
+ mimewriter.lastpart()
+ infp.pop()
+ mimetools.copybinary(infp, outfp) # Copy trailer.
+ else:
+ # Single part message
+ outfp = mimewriter.startbody(message.gettype(), plist(message))
+ if not discards_data(outfp):
+ mimetools.copybinary(message.fp, outfp)
+
+
+ mimewriter.flushheaders()
+
+class FilteringMimeWriter(MimeWriter.MimeWriter):
+ #FIXME: fix comments
+ """A class for filter MIME output.
+
+ DESCRIPTION
+
+ This is a sub-class of MimeWriter, and shares the same basic
+ interface.
+
+ It is designed to be easily sub-classed to perform useful filtering
+ of MIME data.
+
+
+ API DIFFERENCES FROM MimeWriter
+
+ Always call flushheaders()
+
+ You must always call the flushheaders() method. You should
+ call it once, only after you've finished all output (body as
+ well as headers) to the PlaintextMimeWriter.
+
+ WARNINGS
+
+ For the output to be valid MIME you must add a Mime-Type:
+ header to the outer entity (this is true for the plain MimeWriter
+ as well.) A value of '1.0' will work fine.
+
+
+ """
+ def __init__(self, fp):
+ MimeWriter.MimeWriter.__init__(self, fp)
+ self._reset()
+
+ def _reset(self):
+ self.input_encoding = None
+ self.body = None
+ self.subpart = None
+
+ def addheader(self, header, value):
+ lchead = string.lower(header)
+
+ assert not self.body, 'addheader() after startbody()'
+
+ # One shouldn't add a content-type to a regular MimeWriter,
+ # since the MimeWriter generates one itself.
+ assert lchead != 'content-type', "Attempt to add Content-Type: header to MimeWriter"
+
+ if lchead == 'content-transfer-encoding':
+ self.input_encoding = string.strip(value)
+ elif self._keepheader(header):
+ MimeWriter.MimeWriter.addheader(self, header, value)
+
+ # A list of headers to always accept.
+ # This list can also include header prefixes (with trailing '*')
+ # which will match any header beginning with that prefix.
+ INCLUDE_HEADERS = ('content-description',)
+ # A list of headers to ignore. (Same format as above.)
+ EXCLUDE_HEADERS = () #('content-*',)
+
+ def _keepheader(self, header):
+ def _match(header, pattern_list):
+ for pat in pattern_list:
+ if pat[-1] == '*':
+ if header[:len(pat)-1] == pat[:-1]:
+ return pat
+ elif header == pat:
+ return pat
+ return None
+
+ header = string.lower(header)
+ if _match(header, self.INCLUDE_HEADERS):
+ return 1
+ return not _match(header, self.EXCLUDE_HEADERS)
+
+ def flushheaders(self):
+ if self.is_multipart():
+ if self.subpart:
+ self.lastpart()
+ else:
+ assert not self.subpart
+ if self.body:
+ self.body.flush()
+
+ self._flushheaders()
+
+ def _flushheaders(self):
+ body = self.body
+ if body:
+ encoding = self._get_encoding_for_body()
+ self._add_mime_headers(encoding = encoding)
+
+ have_headers = self._headers
+ MimeWriter.MimeWriter.flushheaders(self)
+
+ if body:
+ if have_headers:
+ self._fp.write("\n")
+ body.seek(0)
+ encode(body, self._fp, encoding)
+ body.close()
+
+ self._reset()
+
+ def _get_encoding_for_body(self):
+ """Get MIME transfer-encoding suitable for encoding BODY.
+
+ The encodings returned are only suitable for textual data (since
+ 'binary' or 'base64' should be used for non-textual data.)
+
+ FIXME: should probably use some heuristic to identify binary
+ files and use base64 on them.
+ """
+ if self.body.is_valid_7bit_data():
+ return '7bit'
+ if self.body.looks_binary():
+ return 'base64'
+ return 'quoted-printable'
+
+ def _add_mime_headers(self, encoding = 'binary'):
+ # Add mime headers for body
+ body = self.body
+ if body:
+ MimeWriter.MimeWriter.addheader(self, "Content-Type",
+ body.content_type + str(body.plist))
+ if encoding:
+ MimeWriter.MimeWriter.addheader(self, "Content-Transfer-Encoding",
+ encoding)
+
+ def startbody(self, ctype, plist=[], prefix=1):
+ assert ctype
+ assert not self.body, 'can only startbody once'
+
+ plist = Plist(plist)
+ self.body = self._get_body(ctype, plist)
+
+ decoder = self._get_decoder(self.input_encoding)
+ if decoder and not self.body.discards_data:
+ self.body = decoder(self.body, self.input_encoding)
+
+ return self.body
+
+ def _get_body(self, ctype, plist):
+ return MimeBody(ctype, plist, annotate = self._annotate)
+
+ def _get_decoder(self, encoding):
+ if is_raw_encoding(encoding):
+ return None
+ elif string.lower(encoding) in ('uuencode', 'x-uuencode', 'uue', 'x-uue'):
+ return _UUDecoder
+ return _DefaultDecoder
+
+ def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1):
+ assert subtype
+ assert not self.body, 'can only startmultipartbody once'
+ plist = Plist(plist)
+ plist['boundary'] = boundary
+ self.body = self._get_multipart_body(subtype, plist)
+ return self.body
+
+ def _get_multipart_body(self, subtype, plist):
+ return MultipartBody(subtype, plist = plist, annotate = self._annotate)
+
+ def nextpart(self):
+ assert self.is_multipart()
+
+ if self.subpart:
+ self.subpart.flushheaders() # finish previous part
+
+ self.body.add_separator()
+
+ self.subpart = self.body.SubpartWriter(self.body)
+ return self.subpart
+
+ def lastpart(self):
+ assert self.is_multipart()
+
+ if self.subpart:
+ self.subpart.flushheaders() # finish previous part
+ self.subpart = None
+
+ self.body.add_separator(final = 1)
+
+ def is_multipart(self):
+ body = self.body
+ if not body:
+ return 0
+ try:
+ body.SubpartWriter
+ except AttributeError:
+ return 0
+ return 1
+
+
+ def _annotate(self, str):
+ """Add a comment to the header of the message.
+
+ A header with name 'X-Plaintext' and value STR is added to
+ the output message.
+
+ This method can be called even after output to the body
+ has started.
+
+ """
+ from mimify import mime_encode_header
+ # Use mime_encode_header to allow non-ascii
+ # (FIXME: assumes iso-8859-1)
+ MimeWriter.MimeWriter.addheader(self, 'X-Plaintext',
+ mime_encode_header(str))
+
+
+class MimeBody(StringIO.StringIO):
+
+ discards_data = 0
+
+ def __init__(self, ctype, plist = Plist(), annotate = None):
+ StringIO.StringIO.__init__(self)
+ self.content_type = string.lower(ctype)
+ self.plist = plist
+ if annotate:
+ self.annotate = annotate
+
+ def annotate(str):
+ pass
+
+ def writelines(self, list):
+ self.write(string.join(list, ''))
+
+ def getvalue(self):
+ self.flush()
+ return StringIO.StringIO.getvalue(self)
+
+ def is_valid_7bit_data(self):
+ return is_valid_7bit_data(self.getvalue())
+
+ def looks_binary(self):
+ return looks_binary(self.getvalue())
+
+ def is_ascii(self):
+ return is_ascii(self.getvalue())
+
+ def add_linebreak(self):
+ body = self.getvalue()
+ if body and body[-1] != '\n':
+ self.write('\n')
+
+ def add_comment(self, str):
+ self.annotate(str)
+ self.add_linebreak()
+ self.write('[ %s ]\n' % str)
+
+
+class MultipartBody(MimeBody):
+ def __init__(self, subtype, plist=Plist(), annotate = None):
+ MimeBody.__init__(self, 'multipart/' + subtype, plist = plist, annotate = annotate)
+ self.boundary = plist['boundary']
+
+ def add_separator(self, final = 0):
+ if final:
+ self.write('\n--%s--\n' % self.boundary)
+ else:
+ self.write('\n--%s\n' % self.boundary)
+
+ class SubpartWriter(FilteringMimeWriter):
+ EXCLUDE_HEADERS = ('mime-version',) + FilteringMimeWriter.EXCLUDE_HEADERS
+
+
+################################################################
+class BodyFilter:
+ """An file-like object which writes to the body of a FilteringMimeWriter.
+
+ This object implements output file semantics. It appends all data
+ written to it to the body of the underlying FilteringMimeWriter object.
+
+ """
+
+ def __init__(self, body):
+ self.body = body
+
+ def __getattr__(self, attr):
+ # "Inherit" methods from underlying body.
+ self.__dict__[attr] = getattr(self.body, attr)
+ return self.__dict__[attr]
+
+ def output(self, str):
+ self.body.write(str)
+
+ # The following methods should be overloaded
+ def write(self, str):
+ self.output(str)
+
+ def writelines(self, lines):
+ self.write(string.join(lines, ''))
+
+ def flush(self):
+ self.flushoutput()
+
+ def flushoutput(self):
+ self.body.flush()
+
+
+class LineFilter(BodyFilter):
+ def __init__(self, body):
+ BodyFilter.__init__(self, body)
+ self.buf = ''
+
+ def write(self, str):
+ j = string.rfind(str, '\n') + 1
+ if not j:
+ self.buf = self.buf + str
+ return
+ self.handle_chunk(self.buf + str[:j])
+ self.buf = str[j:]
+
+ def flushinput(self):
+ if self.buf:
+ self.handle_chunk(self.buf + '\n')
+ self.buf = ''
+
+ def flush(self):
+ self.flushinput()
+ self.flushoutput()
+
+class UUFilter(LineFilter):
+ def __init__(self, body):
+ LineFilter.__init__(self, body)
+ self.handle_data = self.handle_text
+
+ SPLIT = re.compile('^(begin[ \t]+[0-7]{3,5}.*?|end)[ \t]*[\n\z]', re.M)
+
+ def handle_chunk(self, chunk):
+ hunks = self.SPLIT.split(chunk)
+ self.handle_data(hunks.pop(0))
+ assert len(hunks) % 2 == 0
+ while hunks:
+ begin = re.match('begin\s+[0-7]{3,5}\s*(?P.*)', hunks.pop(0))
+ if begin:
+ self.handle_begin(begin.group('name'))
+ self.handle_data = self.handle_uudata
+ else:
+ self.handle_end()
+ self.handle_data = self.handle_text
+ self.handle_data(hunks.pop(0))
+
+ def flush(self):
+ self.flushinput()
+ if self.handle_data == self.handle_uudata:
+ self.handle_end()
+ self.handle_data = self.handle_text
+ self.flushoutput()
+
+ def handle_begin(self, filename): pass
+ def handle_end(self): pass
+ def handle_uudata(self, line): pass
+ def handle_text(self, line): pass
+
+
+
+################################################################
+class _DefaultDecoder(LineFilter):
+ def __init__(self, body, encoding):
+ LineFilter.__init__(self, body)
+ self.encoding = encoding
+ self.done = 0
+
+ def handle_chunk(self, chunk):
+ if self.done:
+ return
+
+ class OutputThrough:
+ def __init__(self, decoder):
+ self.write = decoder.output
+
+ infp = cStringIO.StringIO(chunk)
+ outfp = OutputThrough(self)
+ try:
+ mimetools.decode(infp, outfp, self.encoding)
+ except ValueError, detail:
+ self.add_comment("%s, discarding content" % detail)
+ self.done = 1
+ infp.close()
+
+class _UUDecoder(UUFilter):
+ def __init__(self, body, encoding):
+ UUFilter.__init__(self, body)
+ self.done = 0
+
+ def handle_end(self):
+ self.done = 1
+
+ def handle_uudata(self, chunk):
+ import binascii
+ if not self.done:
+ for line in string.split(chunk, '\n'):
+ if line:
+ self.output(binascii.a2b_uu(line))
+
+
+
+################################################################
+
+# Some tests:
+if __name__ == '__main__':
+ import sys
+
+ assert is_valid_7bit_data('abd')
+ assert is_valid_7bit_data('abd\r\n')
+ assert is_valid_7bit_data('abd\r\ndef')
+ assert not is_valid_7bit_data('ab\x00')
+ assert not is_valid_7bit_data('abd\rdef')
+
+ pname = sys.argv.pop(0)
+ for file in sys.argv:
+ msg = mimetools.Message(open(file, 'r'))
+ writer = FilteringMimeWriter(sys.stdout)
+ write_message(msg, writer)
Index: 2_0_3.1/Mailman/PlaintextMimeWriter.py
--- 2_0_3.1/Mailman/PlaintextMimeWriter.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki ()
+++ 0.4(w)/Mailman/PlaintextMimeWriter.py Wed, 04 Apr 2001 07:45:23 -0700 dairiki (mailman/k/2_Plaintext. 1.3 664)
@@ -0,0 +1,440 @@
+# Copyright (C) 20001 by Geoffrey T. Dairiki
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#FIXME: fix comments
+#FIXME: test with no space after colon.
+
+"""Tools for converting MIME messages into plain text."""
+
+import string
+import re
+import mimetools
+import cStringIO
+import FilteringMimeWriter
+
+################################################################
+class PlaintextMimeWriter(FilteringMimeWriter.FilteringMimeWriter):
+ """A class for converting MIME output to plain text.
+
+ DESCRIPTION
+
+ This is a sub-class of MimeWriter, and shares the same basic
+ interface.
+
+ The output is written as a (single-part) MIME message of type
+ 'text/plain'. The following transformations are made:
+
+ Any uuencoded files are removed from plain text content.
+
+ HTML, richtext, and enriched text content is converted to
+ plain text.
+
+ Multipart content is flattened. (All but (at most) one
+ subpart is discarded from 'multipart/alternative'
+ entities.)
+
+ All other content is deleted (and replaced by a diagnostic
+ message.)
+
+ In addition, the transfer-encoding's of the various subparts
+ are handled correctly. All parts are converted to a uniform
+ encoding --- either '7bit', if possible, or
+ 'quoted-printable'.
+
+ If different parts use conflicting character sets, a warning
+ is generated. Currently no attempt is made to recode the
+ data.
+
+ API DIFFERENCES FROM MimeWriter
+
+ Always call flushheaders()
+
+ You must always call the flushheaders() method. You should
+ call it once, only after you've finished all output (body as
+ well as headers) to the PlaintextMimeWriter.
+
+ Optimization of Discarded Entity Bodies
+
+ Startbody(), and startmultipartbody() return file-like objects
+ which have an extra public attribute: discards_data. If this
+ attribute has a true value, then you may safely omit writing
+ the body content (as anything you write will be ignored anyway.)
+
+ NEW METHODS
+
+ annotate(cmnt)
+
+ Add a comment to the headers of the message.
+
+ comment(cmnt)
+
+ Add a comment to both the body and headers of the message.
+
+ WARNINGS
+
+ For the output to be valid MIME you must add a Mime-Type:
+ header to the outer entity (this is true for the plain MimeWriter
+ as well.) A value of '1.0' will work fine.
+
+ BUGS
+
+ There are some problems with the handling of character sets,
+ particularly those other than ISO-8859-1 (or ASCII).
+ Currently:
+
+ The HTML to text converter converts entities to their
+ ISO-8859-1, without regard for the declared charset of the
+ document.
+
+ The HTML to text converter doesn't check META tags for
+ charset declarations.
+
+ PlaintextMimeWriter.annotate(str) encodes headers assuming
+ ISO-8859-1 data.
+
+ I'm sure the are other problems as well.
+
+ """
+
+ EXCLUDE_HEADERS = ('content-*',)
+
+ def _get_filter(self, ctype):
+ ctype = string.lower(ctype)
+ if ctype == 'text/plain':
+ return _PlainTextFilter
+ if ctype == 'text/enriched':
+ return _EnrichedTextFilter
+ if ctype == 'text/richtext':
+ return _RichtextFilter
+ if ctype == 'text/html':
+ return _HTMLFilter
+ if ctype == 'message/rfc822':
+ return _RFC822Filter
+ return _BinaryFilter
+
+ def _get_body(self, ctype, plist):
+ body = FilteringMimeWriter.MimeBody(ctype, plist, annotate = self._annotate)
+ body = _TrailingSpaceStripper(body)
+ filter = self._get_filter(ctype)
+ if filter:
+ body = filter(body)
+ return body
+
+ def _get_multipart_body(self, subtype, plist):
+ if subtype == 'alternative':
+ return PlaintextAlternativeBody(subtype, plist, annotate = self._annotate)
+ return PlaintextMultipartBody(subtype, plist, annotate = self._annotate)
+
+
+class PlaintextMultipartBody(FilteringMimeWriter.MultipartBody):
+ def __init__(self, subtype, plist, annotate):
+ FilteringMimeWriter.MultipartBody.__init__(self, subtype, plist, annotate)
+ del self.plist['boundary']
+ self.content_type = 'text/plain'
+ self.sep = None
+
+ def write(self, str):
+ if self.sep:
+ str = self.sep + str
+ self.sep = None
+ FilteringMimeWriter.MultipartBody.write(self, str)
+
+ def add_separator(self, final = 0):
+ if final:
+ self._lastpart()
+ self.add_linebreak()
+ self.sep = '--\n'
+
+ def _lastpart(self):
+ pass
+
+ class SubpartWriter(PlaintextMimeWriter):
+ EXCLUDE_HEADERS = ('mime-version',) + PlaintextMimeWriter.EXCLUDE_HEADERS
+
+ def _get_encoding_for_body(self):
+ return 'binary'
+ def _add_mime_headers(self, **params):
+ pass
+
+ def _extract_annotations(self):
+ """Remove any headers generated by self.annotate().
+
+ Returns a list of the annotations.
+
+ """
+ annotations = []
+ headers = []
+ for line in self._headers:
+ head, val = string.split(line, ':', 1)
+ if head == 'X-Plaintext':
+ annotations.append(val)
+ else:
+ headers.append(line)
+ self._headers = headers
+ return annotations
+
+ def _flushheaders(self):
+
+ body = self.body
+ parent_body = self._fp
+
+ # Move annotations from subpart to parent part
+ for note in self._extract_annotations():
+ parent_body.annotate(note)
+
+ if body:
+ charset = body.plist.get('charset')
+ parent_charset = parent_body.plist.get('charset')
+
+ if charset and not body.is_ascii():
+ if not parent_charset:
+ parent_body.plist['charset'] = charset
+ elif parent_charset != charset:
+ # FIXME: recode content?
+ parent_body.add_comment("Warning: charset mismatch '%s' != '%s'" %
+ (charset, parent_charset))
+
+ PlaintextMimeWriter._flushheaders(self)
+
+class PlaintextAlternativeBody(PlaintextMultipartBody):
+ def __init__(self, subtype, plist, annotate):
+ PlaintextMultipartBody.__init__(self, subtype, plist, annotate)
+ self.kept_part = None
+
+ def _lastpart(self):
+ if self.kept_part:
+ self.add_comment('Picked %s from multipart/alternative'
+ % self.kept_part.body.content_type)
+ self.kept_part.real_flushheaders()
+ self.kept_part = None
+ else:
+ self.add_comment('Deleted binary multipart/alternative')
+
+ def wants(self, subbody):
+ try:
+ plaintextness = subbody.plaintextness
+ except AttributeError:
+ return 0
+ if not self.kept_part:
+ return 1
+ return plaintextness > self.kept_part.body.plaintextness
+
+ class SubpartWriter(PlaintextMultipartBody.SubpartWriter):
+ def real_flushheaders(self):
+ PlaintextMultipartBody.SubpartWriter.flushheaders(self)
+
+ def flushheaders(self):
+ body = self.body
+ parent_body = self._fp
+ kept_part = parent_body.kept_part
+ if kept_part and kept_part is self:
+ return
+
+ if body and parent_body.wants(self.body):
+ parent_body.kept_part = self
+ elif body:
+ body.close()
+
+ def startbody(self, ctype, plist=[], prefix=1):
+ #FIXME:
+ parent_body = self._fp
+ self.body = PlaintextMultipartBody.SubpartWriter.startbody(self, ctype,
+ plist, prefix)
+ if not parent_body.wants(self.body):
+ self.body.close()
+ self.body = NullBody(ctype, plist)
+ return self.body
+
+ def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1):
+ return NullMultipartBody(subtype, plist)
+
+class NullBody(FilteringMimeWriter.MimeBody):
+ discards_data = 1
+ def write(self, str):
+ pass
+
+#FIXME: needs testing. and cleanup
+class NullMultipartBody(NullBody):
+ def __init__(self, subtype, plist=FilteringMimeWriter.Plist(), annotate = None):
+ NullBody.__init__(self, 'multipart/' + subtype, plist = plist, annotate = annotate)
+ def add_separator(self, final = 0):
+ pass
+ class SubpartWriter(PlaintextMimeWriter):
+ EXCLUDE_HEADERS = ('*',)
+ def flushheaders(self):
+ self.body.close()
+ self._reset()
+ def startbody(self, ctype, plist=[], prefix=1):
+ return NullBody(ctype, plist)
+ def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1):
+ return NullMultipartBody(subtype, plist)
+
+################################################################
+class ParserFilter(FilteringMimeWriter.BodyFilter):
+ """A base class for filters based on parsers like HTMLParser."""
+ def __init__(self, body, parser_class):
+ from formatter import DumbWriter, AbstractFormatter
+
+ class OutputThrough:
+ def __init__(self, filter):
+ self.write = filter.output
+ FilteringMimeWriter.BodyFilter.__init__(self, body)
+ outfp = OutputThrough(self)
+ writer = DumbWriter(outfp, maxcol = 78)
+ self.parser = parser_class(AbstractFormatter(writer))
+
+ def write(self, str):
+ self.parser.feed(str)
+
+ def flush(self):
+ self.parser.close()
+ self.output("\n")
+ self.flushoutput()
+
+
+
+class _TrailingSpaceStripper(FilteringMimeWriter.LineFilter):
+ def handle_chunk(self, chunk):
+ self.output(re.sub(r'[ \t]+(\r?\n)', r'\1', chunk))
+
+class _PlainTextFilter(FilteringMimeWriter.UUFilter):
+ """Delete uuencoded files from body."""
+ plaintextness = 10
+ def __init__(self, body):
+ FilteringMimeWriter.UUFilter.__init__(self, body)
+ self.content_type = 'text/plain'
+ def handle_begin(self, filename):
+ self.add_comment("Deleted uuencoded file '%s'" % filename)
+ def handle_text(self, chunk):
+ self.output(chunk)
+
+class _HTMLFilter(ParserFilter):
+ """Convert HTML to plain text.
+
+ This filter expects to have HTML written to it. It writes plain
+ text to the underlying message body.
+
+ BUGS (FIXME:)
+
+ Currently, all entities in the HTML are converted to ISO-8859-1 characters.
+
+ """
+ plaintextness = 0
+ def __init__(self, body):
+ import htmllib
+
+ class HTMLParser(htmllib.HTMLParser):
+ def start_div(self, attrs):
+ """ forces line break."""
+ self.formatter.end_paragraph(0)
+
+ ParserFilter.__init__(self, body, HTMLParser)
+ self.add_comment("Converted text/html to text/plain")
+ self.content_type = 'text/plain'
+
+ def output(self, str):
+ # Convert s to plain spaces
+ ParserFilter.output(self, string.replace(str, '\xa0', ' '))
+
+class _EnrichedTextFilter(ParserFilter):
+ """Convert text/enriched to plain text."""
+ plaintextness = 3
+ def __init__(self, body):
+ from richtext import EnrichedTextParser
+ ParserFilter.__init__(self, body, EnrichedTextParser)
+ self.add_comment("Converted text/enriched to text/plain")
+ self.content_type = 'text/plain'
+
+class _RichtextFilter(ParserFilter):
+ """Convert text/richtext to plain text."""
+ plaintextness = 1
+ def __init__(self, body):
+ from richtext import RichtextParser
+ ParserFilter.__init__(self, body, RichtextParser)
+ self.add_comment("Converted text/richtext to text/plain")
+ self.content_type = 'text/plain'
+
+class _RFC822Filter(FilteringMimeWriter.BodyFilter):
+ """Filter RFC 822 message.
+
+ FIXME: This is a bit of a hack. It would be much more efficient to
+ filter the headers as they're written, feed them to our mimewriter,
+ and pass the body straight through to the mimewriter.
+
+ """
+ plaintextness = -1
+ def __init__(self, body):
+ FilteringMimeWriter.BodyFilter.__init__(self, body)
+ self.buf = cStringIO.StringIO()
+ self.content_type = 'text/plain'
+
+ def write(self, str):
+ if self.buf:
+ self.buf.write(str)
+
+ def flush(self):
+ class MyMimeWriter(PlaintextMimeWriter):
+ EXCLUDE_HEADERS = ('return-path', 'received', 'errors-to',
+ 'envelope-to', 'delivery-date', 'message-id',
+ 'x-*', 'mime-version', 'list-*'
+ ) + PlaintextMimeWriter.EXCLUDE_HEADERS
+ def _get_encoding_for_body(self):
+ return 'binary'
+ def _add_mime_headers(self, **params):
+ pass
+
+ if self.buf:
+ self.buf.seek(0)
+ msg = mimetools.Message(self.buf)
+ mimewriter = MyMimeWriter(self.body)
+ FilteringMimeWriter.write_message(msg, mimewriter)
+ self.buf.close()
+ self.buf = None
+ self.flushoutput()
+
+
+class _BinaryFilter(FilteringMimeWriter.BodyFilter):
+ """Filter binary files.
+
+ Add content is deleted.
+
+ """
+ discards_data = 1
+
+ def __init__(self, body):
+ FilteringMimeWriter.BodyFilter.__init__(self, body)
+ filename = body.plist.get('name', default = 'Content')
+ ctype = body.content_type
+ self.add_comment("%s of type %s deleted" % (filename, ctype))
+ self.content_type = 'text/plain'
+
+ def write(self, str):
+ pass
+ def writelines(self, lines):
+ pass
+
+
+
+# Some tests:
+if __name__ == '__main__':
+ import sys
+
+ pname = sys.argv.pop(0)
+ for file in sys.argv:
+ msg = mimetools.Message(open(file, 'r'))
+ writer = PlaintextMimeWriter(sys.stdout)
+ FilteringMimeWriter.write_message(msg, writer)
+
Index: 2_0_3.1/Mailman/pythonlib/multifile.py
--- 2_0_3.1/Mailman/pythonlib/multifile.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki ()
+++ 0.4(w)/Mailman/pythonlib/multifile.py Wed, 04 Apr 2001 07:44:58 -0700 dairiki (mailman/k/5_multifile. 1.1 664)
@@ -0,0 +1,365 @@
+"""A readline()-style interface to the parts of a multipart message.
+
+The MultiFile class makes each part of a multipart message "feel" like
+an ordinary file, as long as you use fp.readline(). Allows recursive
+use, for nested multipart messages. Probably best used together
+with module mimetools.
+
+Suggested use:
+
+real_fp = open(...)
+fp = MultiFile(real_fp)
+
+'read some lines from fp'
+fp.push(separator)
+while 1:
+ 'read lines from fp until it returns an empty string' (A)
+ if not fp.next(): break
+fp.pop()
+'read remaining lines from fp until it returns an empty string'
+
+The latter sequence may be used recursively at (A).
+It is also allowed to use multiple push()...pop() sequences.
+
+If seekable is given as 0, the class code will not do the bookkeeping
+it normally attempts in order to make seeks relative to the beginning of the
+current file part. This may be useful when using MultiFile with a non-
+seekable stream object.
+"""
+
+# 2001-04-03: Geoffrey T. Dairiki
+#
+# This is a re-implementation of the stock python multifile.py
+#
+# The main changes:
+#
+# 1. Efficiency:
+#
+# This version supports calling the read() method with an argument.
+# (In many cases, I've found that reading a MultiFile line by line
+# is just too slow --- remember multipart messages often contain
+# large binary attachments.)
+#
+# This version performs reads on the underlying input stream in
+# larger chunks as well, and uses a regular expression search to
+# search for separator lines.
+#
+# 2. Buglets fixed
+#
+# The original version has a buglet regarding its handling of the
+# newline which preceeds a separator line. According to RFC 2046,
+# section 5.1.1 the newline preceeding a separator is part of the
+# separator, not part of the preceeding content. The old version
+# of multifile.py treats the newline as part of the content. Thus,
+# it introduces a spurious empty line at the end of each content.
+#
+# Matching of the separators: RFC 2046, section 5.1.1 also states,
+# that if the beginning of a line matches the separator, it is a
+# separator. The old code ignores only trailing white space when
+# looking for a separator line. This code ignores trailing anything,
+# on the separator line.
+
+
+import string
+import re
+
+__all__ = ["MultiFile", "Error", "IllegalSeek", "UnexpectedEOF", "EndmarkMissing"]
+
+class Error(RuntimeError):
+ pass
+
+class IllegalSeek(Error):
+ def __init__(self, detail = "Illegal seek on multifile"):
+ Error.__init__(self, detail)
+
+class UnexpectedEOF(Error):
+ def __init__(self, detail = "Sudden EOF on multifile"):
+ Error.__init__(self, detail)
+
+class EndmarkMissing(Error):
+ pass
+
+_END_OF_STRING = re.compile(r'\Z')
+
+class MultiFile:
+
+ def __init__(self, fp, seekable=1):
+ self.fp = fp
+ self.readahead = ''
+ self.pos = 0 # Position in underlying file
+
+ self.seekable = seekable
+ if seekable:
+ try:
+ self.pos = fp.tell()
+ except:
+ self.seekable = 0
+
+ self.startpos = self.pos # Start of current 'file'
+ self.endpos = None # End of current 'file', if known
+ self.limit = None # min(enclosing endpos's)
+ self.sep = None # current boundary string
+ # Matches current separator, current end marker,
+ # or any enclosing separators or end markers:
+ self.mark_re = re.compile('(?=a)b') # Never matches
+ # Same as above, but includes trailing cruft through newline.
+ self.markline_re = self.mark_re
+ # Maximum length of a match for self.mark_re.
+ self.max_mark_len = 0
+ self.done = 0 # End marker passed?
+
+ self.stack = []
+
+ def tell(self):
+ if not self.seekable:
+ raise IllegalSeek
+ return self.pos - self.startpos
+
+ def seek(self, pos, whence=0):
+ if not self.seekable:
+ raise IllegalSeek
+
+ # Figure out where end of current file is.
+ if not self.endpos:
+ curpos = self.pos
+ try:
+ while self.read(4096): pass
+ finally:
+ self.pos = curpos
+ self.readahead = ''
+
+ if whence == 1:
+ newpos = self.pos + pos
+ elif whence == 2:
+ newpos = self.endpos + pos
+ else:
+ newpos = self.startpos + pos
+
+ if newpos < self.startpos:
+ raise ValueError, "Seek past beginning of file"
+
+ self.pos = newpos
+ self.fp.seek(self.pos)
+ self.readahead = ''
+
+ def __read_more(self, size = 8192):
+ hunk = self.fp.read(size)
+ if not hunk:
+ raise UnexpectedEOF
+ self.readahead = self.readahead + hunk
+
+ def __do_read(self, size):
+ assert size >= 0
+ while 1:
+ need = size - len(self.readahead)
+ if need <= 0:
+ break
+ self.__read_more(need)
+
+ def __try_to_read(self, size):
+ try:
+ self.__do_read(size)
+ except UnexpectedEOF:
+ return _END_OF_STRING.search(self.readahead)
+ else:
+ return None
+
+ def __fillbuf(self, size):
+ if self.endpos is not None:
+ # If we know where the end is, no need to search for marks.
+ # Just read in the data.
+ nleft = self.endpos - self.pos
+ if size >= 0:
+ nleft = min(size, nleft)
+ if nleft <= 0:
+ return 0
+ self.__do_read(nleft)
+ return nleft
+
+ if size < 0:
+ # Read until mark found
+ if self.sep is None:
+ self.readahead = self.readahead + self.fp.read()
+ eof = _END_OF_STRING.search(self.readahead)
+ else:
+ eof = self.mark_re.search(self.readahead)
+ while not eof:
+ self.__read_more()
+ eof = self.mark_re.search(self.readahead)
+
+ else:
+ if self.limit is not None:
+ size = min(size, self.limit - self.pos)
+ # Read enough to include mark if there is one
+ eof = self.__try_to_read(size + self.max_mark_len)
+ if self.sep is not None:
+ eof = self.mark_re.search(self.readahead, 0, size)
+
+ if eof:
+ if (self.sep is not None) and (eof.group('badmark') is not None):
+ raise EndmarkMissing, "Missing endmarker (sep = '%s')" % self.sep
+ size = eof.start()
+ self.endpos = self.pos + size
+ else:
+ size = min(size, len(self.readahead))
+ return size
+
+ def read(self, size = -1):
+ size = self.__fillbuf(size)
+ hunk = self.readahead[:size]
+ self.readahead = self.readahead[size:]
+ self.pos = self.pos + size
+ return hunk
+
+ def readline(self):
+ if self.endpos is not None and self.pos >= self.endpos:
+ return ''
+ try:
+ while 1:
+ line_len = string.find(self.readahead, '\n') + 1
+ if line_len:
+ return self.read(line_len)
+ self.__read_more()
+ except UnexpectedEOF:
+ if self.sep is None:
+ return self.read()
+ raise
+
+ def readlines(self):
+ lines = string.split(self.read(), '\n')
+ last = lines.pop()
+ lines = map(lambda x: x+'\n', lines)
+ if last:
+ lines.append(last)
+ return lines
+
+ def __at_mark(self):
+ self.__try_to_read(self.max_mark_len)
+ mark = self.mark_re.match(self.readahead)
+ if mark:
+ # Find end of marker line
+ try:
+ while 1:
+ mark = self.markline_re.match(self.readahead)
+ if mark:
+ break
+ self.__read_more(256)
+ except UnexpectedEOF:
+ mark = re.match(self.mark_re.pattern + r".*\Z", self.readahead)
+ assert mark
+ return mark
+
+
+ def next(self):
+ if self.done:
+ return 0
+
+ while self.read(4096):
+ pass
+
+ if self.sep is None:
+ # no marks on stack
+ return 0
+
+ if self.pos > self.endpos:
+ assert self.seekable
+ self.pos = self.endpos
+ self.fp.seek(self.pos)
+ self.readahead = ''
+ mark = self.__at_mark()
+ assert mark
+
+ if mark.group('badmark') is not None:
+ return 0
+
+ mark_len = mark.end()
+ self.readahead = self.readahead[mark_len:]
+ self.pos = self.pos + mark_len
+ self.startpos = self.pos
+ if mark.group('endmark') is not None:
+ self.endpos = self.pos
+ self.done = 1
+ return 0
+ else:
+ self.endpos = None
+ return 1
+
+ def push(self, sep):
+ if self.endpos is not None and self.pos > self.endpos:
+ raise Error, 'bad MultiFile.push() call'
+ if self.done or self.__at_mark():
+ raise Error, 'bad MultiFile.push() call'
+ if sep is None:
+ raise ValueError, 'bad separator'
+
+ self.stack.append( (self.sep, self.startpos, self.endpos, self.limit,
+ self.mark_re, self.markline_re, self.max_mark_len) )
+
+ if self.endpos is not None:
+ def not_None(x):
+ return x is not None
+ self.limit = min(filter(not_None, [self.endpos, self.limit]))
+
+ self.sep = sep
+ self.startpos, self.endpos = self.pos, None
+
+ self.__compute_regexps()
+
+ def __compute_regexps(self):
+ def common_prefix(list):
+ prefix = list[0]
+ for item in list[1:]:
+ while prefix != item[:len(prefix)]:
+ prefix = prefix[:-1]
+ return prefix
+
+ assert self.sep is not None
+
+ mark = self.section_divider(self.sep)
+ endmark = self.end_marker(self.sep)
+
+ badmarks = []
+ for sep in map(lambda x:x[0], self.stack[1:]):
+ badmarks.append(self.end_marker(sep))
+ badmarks.append(self.section_divider(sep))
+
+ marks = [mark, endmark] + badmarks
+ prefix = common_prefix(marks)
+
+ def remove_prefix(x, p=prefix):
+ return x[len(p):]
+
+ mark = re.escape(remove_prefix(mark))
+ endmark = re.escape(remove_prefix(endmark))
+ badmarks = string.join(map(re.escape, map(remove_prefix, badmarks)), '|')
+ prefix = re.escape(prefix)
+
+ if not badmarks:
+ badmarks = '(?=a)b' # never matches
+ regexp = ( r'(?:\r?\n)?^%s(?:(?P%s)|%s|(?P%s))' %
+ (prefix, endmark, mark, badmarks) )
+
+ self.max_mark_len = max(map(len, marks)) + 2
+ self.mark_re = re.compile(regexp, re.M)
+ self.markline_re = re.compile(regexp + r'.*\n', re.M)
+
+ def pop(self):
+ try:
+ ( self.sep, self.startpos, self.endpos, self.limit,
+ self.mark_re, self.markline_re, self.max_mark_len
+ ) = self.stack.pop()
+ except IndexError:
+ raise Error, 'bad MultiFile.pop() call'
+ self.done = 0
+
+ def is_data(self, line):
+ return line[:2] <> '--'
+
+ def section_divider(self, str):
+ return "--" + str
+
+ def end_marker(self, str):
+ return "--" + str + "--"
+
+#End of multifile.py
+
Index: 2_0_3.1/Mailman/Handlers/PlainText.py
--- 2_0_3.1/Mailman/Handlers/PlainText.py Wed, 04 Apr 2001 10:06:21 -0700 dairiki ()
+++ 0.4(w)/Mailman/Handlers/PlainText.py Wed, 04 Apr 2001 09:27:33 -0700 dairiki (mailman/k/6_PlainText. 1.1 664)
@@ -0,0 +1,59 @@
+# Copyright (C) 2001 by Geoffrey T. Dairiki
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+"""Force message into plain text format.
+"""
+
+
+def process(mlist, msg, msgdata):
+
+ if not mlist.force_plain_text or msgdata.get('isdigest'):
+ return
+
+ # Paranoia:
+ # We should never see messages with 'fasttrack' set.
+ # (They get delivered via DeliverToUser(), and therefore we shouldn't
+ # be in their pipeline.)
+ if msgdata.get('fasttrack'):
+ return
+
+
+ # Mark the message as dirty so that its text will be forced to disk next
+ # time it's queued.
+ msgdata['_dirty'] = 1
+
+ import mimetools
+ from Mailman.FilteringMimeWriter import write_message
+ from Mailman import PlaintextMimeWriter
+ from cStringIO import StringIO
+
+ class MyMimeWriter(PlaintextMimeWriter.PlaintextMimeWriter):
+ # Don't include any headers in the output
+ # (except for the bare minimum of MIME headers)
+ def _keepheader(self, header):
+ return None
+
+ inbuf = StringIO(str(msg))
+ outbuf = StringIO()
+ write_message(mimetools.Message(inbuf),
+ MyMimeWriter(outbuf))
+ inbuf.close()
+ outbuf.seek(0)
+ plaintext = mimetools.Message(outbuf)
+
+ for hdr in plaintext.keys():
+ msg[hdr] = plaintext[hdr]
+ msg.body = outbuf.read()