Index: Scrubber.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/Handlers/Scrubber.py,v
retrieving revision 2.18.2.3
diff -u -r2.18.2.3 Scrubber.py
--- Scrubber.py	8 Feb 2003 07:13:50 -0000	2.18.2.3
+++ Scrubber.py	24 Feb 2003 03:07:30 -0000
@@ -32,6 +32,7 @@
 from email.Utils import parsedate
 from email.Parser import HeaderParser
 from email.Generator import Generator
+from email import message_from_string
 
 from Mailman import mm_cfg
 from Mailman import Utils
@@ -190,7 +191,7 @@
                 # lists.
                 omask = os.umask(002)
                 try:
-                    url = save_attachment(mlist, part, dir, filter_html=0)
+                    url, size = save_attachment(mlist, part, dir, filter_html=0)
                 finally:
                     os.umask(omask)
                 del part['content-type']
@@ -201,7 +202,11 @@
             else:
                 # HTML-escape it and store it as an attachment, but make it
                 # look a /little/ bit prettier. :(
-                payload = Utils.websafe(part.get_payload(decode=1))
+                try:
+                    payload = Utils.websafe(part.get_payload(decode=1))
+                except binascii.Error:
+                    # Error in base64 decoding. It'll surely look ugly.
+                    payload = Utils.websafe(part.get_payload())
                 # For whitespace in the margin, change spaces into
                 # non-breaking spaces, and tabs into 8 of those.  Then use a
                 # mono-space font.  Still looks hideous to me, but then I'd
@@ -216,7 +221,7 @@
                 del part['content-transfer-encoding']
                 omask = os.umask(002)
                 try:
-                    url = save_attachment(mlist, part, dir, filter_html=0)
+                    url, size = save_attachment(mlist, part, dir, filter_html=0)
                 finally:
                     os.umask(omask)
                 del part['content-type']
@@ -229,13 +234,12 @@
             submsg = part.get_payload(0)
             omask = os.umask(002)
             try:
-                url = save_attachment(mlist, part, dir)
+                url, size = save_attachment(mlist, part, dir)
             finally:
                 os.umask(omask)
             subject = submsg.get('subject', _('no subject'))
             date = submsg.get('date', _('no date'))
             who = submsg.get('from', _('unknown sender'))
-            size = len(str(submsg))
             del part['content-type']
             part.set_payload(_("""\
 An embedded message was scrubbed...
@@ -249,12 +253,10 @@
         # attachment that would have to be separately downloaded.  Pipermail
         # will transform the url into a hyperlink.
         elif not part.is_multipart():
-            payload = part.get_payload(decode=1)
             ctype = part.get_type()
-            size = len(payload)
             omask = os.umask(002)
             try:
-                url = save_attachment(mlist, part, dir)
+                url, size = save_attachment(mlist, part, dir)
             finally:
                 os.umask(omask)
             desc = part.get('content-description', _('not available'))
@@ -270,6 +272,9 @@
 Url : %(url)s
 """), lcset)
         outer = 0
+    # TK: We (Japanese) need to stringify and re-generate the message
+    #     instance because multiple charsets are used.
+    msg = message_from_string(str(msg))
     # We still have to sanitize multipart messages to flat text because
     # Pipermail can't handle messages with list payloads.  This is a kludge;
     # def (n) clever hack ;).
@@ -286,8 +291,11 @@
         # BAW: Martin's original patch suggested we might want to try
         # generalizing to utf-8, and that's probably a good idea (eventually).
         text = []
-        for part in msg.get_payload():
+        for part in msg.walk():
             # All parts should be scrubbed to text/plain by now.
+            # ... or embedded multipart message ... so, walk don't get
+            if part.get_content_maintype() == 'multipart':
+                continue
             partctype = part.get_content_type()
             if partctype <> 'text/plain':
                 text.append(_('Skipped content of type %(partctype)s'))
@@ -296,6 +304,9 @@
                 t = part.get_payload(decode=1)
             except binascii.Error:
                 t = part.get_payload()
+            if not t:
+                # somehow we got an empty payload, skip this
+                continue
             partcharset = part.get_content_charset()
             if partcharset and partcharset <> charset:
                 try:
@@ -340,7 +351,12 @@
     fsdir = os.path.join(mlist.archive_dir(), dir)
     makedirs(fsdir)
     # Figure out the attachment type and get the decoded data
-    decodedpayload = msg.get_payload(decode=1)
+    try:
+        decodedpayload = msg.get_payload(decode=1)
+    except binascii.Error:
+        # Fail to decode base64. Save undecoded anyway.
+        # TBD: or do some nice warning and discard ?
+        decodedpayload = msg.get_payload()
     # BAW: mimetypes ought to handle non-standard, but commonly found types,
     # e.g. image/jpg (should be image/jpeg).  For now we just store such
     # things as application/octet-streams since that seems the safest.
@@ -431,6 +447,7 @@
         submsg = msg.get_payload()
         # BAW: I'm sure we can eventually do better than this. :(
         decodedpayload = Utils.websafe(str(submsg))
+    size = len(decodedpayload)
     fp = open(path, 'w')
     fp.write(decodedpayload)
     fp.close()
@@ -440,4 +457,4 @@
     if baseurl[-1] <> '/':
         baseurl += '/'
     url = baseurl + '%s/%s%s%s' % (dir, filebase, extra, ext)
-    return url
+    return (url, size)