Index: Scrubber.py =================================================================== RCS file: /cvsroot/mailman/mailman/Mailman/Handlers/Scrubber.py,v retrieving revision 2.18.2.3 diff -u -r2.18.2.3 Scrubber.py --- Scrubber.py 8 Feb 2003 07:13:50 -0000 2.18.2.3 +++ Scrubber.py 24 Feb 2003 03:07:30 -0000 @@ -32,6 +32,7 @@ from email.Utils import parsedate from email.Parser import HeaderParser from email.Generator import Generator +from email import message_from_string from Mailman import mm_cfg from Mailman import Utils @@ -190,7 +191,7 @@ # lists. omask = os.umask(002) try: - url = save_attachment(mlist, part, dir, filter_html=0) + url, size = save_attachment(mlist, part, dir, filter_html=0) finally: os.umask(omask) del part['content-type'] @@ -201,7 +202,11 @@ else: # HTML-escape it and store it as an attachment, but make it # look a /little/ bit prettier. :( - payload = Utils.websafe(part.get_payload(decode=1)) + try: + payload = Utils.websafe(part.get_payload(decode=1)) + except binascii.Error: + # Error in base64 decoding. It'll surely look ugly. + payload = Utils.websafe(part.get_payload()) # For whitespace in the margin, change spaces into # non-breaking spaces, and tabs into 8 of those. Then use a # mono-space font. Still looks hideous to me, but then I'd @@ -216,7 +221,7 @@ del part['content-transfer-encoding'] omask = os.umask(002) try: - url = save_attachment(mlist, part, dir, filter_html=0) + url, size = save_attachment(mlist, part, dir, filter_html=0) finally: os.umask(omask) del part['content-type'] @@ -229,13 +234,12 @@ submsg = part.get_payload(0) omask = os.umask(002) try: - url = save_attachment(mlist, part, dir) + url, size = save_attachment(mlist, part, dir) finally: os.umask(omask) subject = submsg.get('subject', _('no subject')) date = submsg.get('date', _('no date')) who = submsg.get('from', _('unknown sender')) - size = len(str(submsg)) del part['content-type'] part.set_payload(_("""\ An embedded message was scrubbed... @@ -249,12 +253,10 @@ # attachment that would have to be separately downloaded. Pipermail # will transform the url into a hyperlink. elif not part.is_multipart(): - payload = part.get_payload(decode=1) ctype = part.get_type() - size = len(payload) omask = os.umask(002) try: - url = save_attachment(mlist, part, dir) + url, size = save_attachment(mlist, part, dir) finally: os.umask(omask) desc = part.get('content-description', _('not available')) @@ -270,6 +272,9 @@ Url : %(url)s """), lcset) outer = 0 + # TK: We (Japanese) need to stringify and re-generate the message + # instance because multiple charsets are used. + msg = message_from_string(str(msg)) # We still have to sanitize multipart messages to flat text because # Pipermail can't handle messages with list payloads. This is a kludge; # def (n) clever hack ;). @@ -286,8 +291,11 @@ # BAW: Martin's original patch suggested we might want to try # generalizing to utf-8, and that's probably a good idea (eventually). text = [] - for part in msg.get_payload(): + for part in msg.walk(): # All parts should be scrubbed to text/plain by now. + # ... or embedded multipart message ... so, walk don't get + if part.get_content_maintype() == 'multipart': + continue partctype = part.get_content_type() if partctype <> 'text/plain': text.append(_('Skipped content of type %(partctype)s')) @@ -296,6 +304,9 @@ t = part.get_payload(decode=1) except binascii.Error: t = part.get_payload() + if not t: + # somehow we got an empty payload, skip this + continue partcharset = part.get_content_charset() if partcharset and partcharset <> charset: try: @@ -340,7 +351,12 @@ fsdir = os.path.join(mlist.archive_dir(), dir) makedirs(fsdir) # Figure out the attachment type and get the decoded data - decodedpayload = msg.get_payload(decode=1) + try: + decodedpayload = msg.get_payload(decode=1) + except binascii.Error: + # Fail to decode base64. Save undecoded anyway. + # TBD: or do some nice warning and discard ? + decodedpayload = msg.get_payload() # BAW: mimetypes ought to handle non-standard, but commonly found types, # e.g. image/jpg (should be image/jpeg). For now we just store such # things as application/octet-streams since that seems the safest. @@ -431,6 +447,7 @@ submsg = msg.get_payload() # BAW: I'm sure we can eventually do better than this. :( decodedpayload = Utils.websafe(str(submsg)) + size = len(decodedpayload) fp = open(path, 'w') fp.write(decodedpayload) fp.close() @@ -440,4 +457,4 @@ if baseurl[-1] <> '/': baseurl += '/' url = baseurl + '%s/%s%s%s' % (dir, filebase, extra, ext) - return url + return (url, size)