下面把主要的代码贴一下:
初始化,定义邮件服务器
self.IMAP_SERVER='imap.gmail.com'
self.IMAP_PORT=993
self.M = None
self.response
self.mailboxes = []
登录,选择mailbox:
self.M = imaplib.IMAP4_SSL(self.IMAP_SERVER, self.IMAP_POR
rc, self.response = self.M.login(username, password)
tye,data = m.M.select()
邮件搜索:
ret, msgnums = m.M.search(None, 'BODY', datapath)
获取邮件信息:
status, response = self.M.fetch(id,"(RFC822)")
mailText = response[0][1]
mail_message = email.message_from_string(mailText)
subject = unicode(email.Header.make_header(email.Header.decode_header(mail_message['subject'])))
#print "subject_________:" +subject
mail_from = email.utils.parseaddr(mail_message["from"])[1]
mail_to = email.utils.parseaddr(mail_message["to"])[1]
time = mail_message['Date']
print '['+mail_message['Date']+']'+'\n'+'From:'+mail_from+ ' To:'+mail_to+'\n'+'Subject:'+subject+'\n'
return self.get_first_text_block(mail_message), subject, mail_from, time
maintype = email_message_instance.get_content_maintype() 返回邮件里的内容是何种类型,若为text就比较好处理,如果是multipart,还得遍历email_message_instance去根据不同类型处理。
email.message_from_string(mailText)返回了一个结构体,里面包含了邮件的基本信息
邮件里比较蛋疼的是字符串编码的问题,毕竟大家的邮件格式都不一样,有些是unicode,有些是utf-8,有些是gb2312,还有附件,图片等多种格式,
当然这次也只处理了文本,暂时没有需求去处理附件和图片这些。我都是统一将字符转成unicode去处理的。
字符串处理的时候,可以使用chardet判断字符串类型,读写文件的时候可以用codecs指定读写的字符集类型
补充三个例子
Example 1
From project georegistry, under directory georegistry/lib, in source file smtp.py.
Score: 13
vote
vote
def sendMessage(fromByValue, toByValue, subject, body, headerByName=None):
'Send a message using SMTP'
# Prepare
message = email.message.Message()
message.add_header('from', email.utils.formataddr((fromByValue['nickname'], fromByValue['email'])))
message.add_header('to', email.utils.formataddr((toByValue['nickname'], toByValue['email'])))
message.add_header('subject', subject)
message.set_payload(body)
if headerByName:
for key, value in headerByName.iteritems():
message.add_header(key, value)
# Connect to server
if fromByValue['smtp'] == 'localhost':
server = smtplib.SMTP('localhost')
else:
server = smtplib.SMTP_SSL(fromByValue['smtp'], 465)
if len(fromByValue['username']):
server.login(fromByValue['username'], fromByValue['password'])
# Send mail
try:
server.sendmail(fromByValue['email'], toByValue['email'], message.as_string())
except socket.error, error:
raise SMTPError(error)
finally:
server.quit()
Example 2
From project appengine-python3-master, under directory google/appengine/tools/devappserver2/admin, in source file mail_request_handler_test.py.
Score: 10
vote
vote
def test_send(self):
self.mox.StubOutWithMock(mail_request_handler.MailRequestHandler,
'dispatcher')
handler = mail_request_handler.MailRequestHandler(None, None)
handler.dispatcher = self.mox.CreateMock(dispatcher.Dispatcher)
handler.dispatcher.add_request(
method='POST',
relative_url='URL',
headers=[('Content-Type', 'message/rfc822')],
body='mail message',
source_ip='0.1.0.20')
message = self.mox.CreateMock(email.message.Message)
message.as_string().AndReturn('mail message')
self.mox.ReplayAll()
handler._send('URL', message)
self.mox.VerifyAll()
Example 3
From project python-sipsimple-master, under directory sipsimple/streams/applications, in source file chat.py.
Score: 8
vote
vote
def __str__(self):
headers = []
if self.sender:
headers.append(u'From: %s' % self.sender)
for recipient in self.recipients:
headers.append(u'To: %s' % recipient)
for recipient in self.courtesy_recipients:
headers.append(u'cc: %s' % recipient)
if self.subject:
headers.append(u'Subject: %s' % self.subject)
if self.subject is not None:
for lang, translation in self.subject.translations.iteritems():
headers.append(u'Subject:;lang=%s %s' % (lang, translation))
if self.timestamp:
headers.append(u'DateTime: %s' % self.timestamp)
if self.required:
headers.append(u'Required: %s' % ','.join(self.required))
namespaces = {u'': self.standard_namespace}
for header in self.additional_headers:
if namespaces.get(header.namespace.prefix, None) != header.namespace:
if header.namespace.prefix:
headers.append(u'NS: %s <%s>' % (header.namespace.prefix, header.namespace))
else:
headers.append(u'NS: <%s>' % header.namespace)
namespaces[header.namespace.prefix] = header.namespace
if header.namespace.prefix:
headers.append(u'%s.%s: %s' % (header.namespace.prefix, header.name, header.value))
else:
headers.append(u'%s: %s' % (header.name, header.value))
headers.append(u'')
headers = '\r\n'.join(s.encode('cpim-headers') for s in headers)
message = Message()
message.set_type(self.content_type)
if isinstance(self.body, unicode):
message.set_param('charset', 'utf-8')
message.set_payload(self.body.encode('utf-8'))
else:
message.set_payload(self.body)
return headers + '\r\n' + message.as_string()
Example 4
From project odoo, under directory addons/mail, in source file mail_thread.py.
Score: 8
vote
vote
def _message_extract_payload(self, message, save_original=False):
"""Extract body as HTML and attachments from the mail message"""
attachments = []
body = u''
if save_original:
attachments.append(('original_email.eml', message.as_string()))
if not message.is_multipart() or 'text/' in message.get('content-type', ''):
encoding = message.get_content_charset()
body = message.get_payload(decode=True)
body = tools.ustr(body, encoding, errors='replace')
if message.get_content_type() == 'text/plain':
# text/plain -> <pre/>
body = tools.append_content_to_html(u'', body, preserve=True)
else:
alternative = False
for part in message.walk():
if part.get_content_type() == 'multipart/alternative':
alternative = True
if part.get_content_maintype() == 'multipart':
continue # skip container
# part.get_filename returns decoded value if able to decode, coded otherwise.
# original get_filename is not able to decode iso-8859-1 (for instance).
# therefore, iso encoded attachements are not able to be decoded properly with get_filename
# code here partially copy the original get_filename method, but handle more encoding
filename=part.get_param('filename', None, 'content-disposition')
if not filename:
filename=part.get_param('name', None)
if filename:
if isinstance(filename, tuple):
# RFC2231
filename=email.utils.collapse_rfc2231_value(filename).strip()
else:
filename=decode(filename)
encoding = part.get_content_charset() # None if attachment
# 1) Explicit Attachments -> attachments
if filename or part.get('content-disposition', '').strip().startswith('attachment'):
attachments.append((filename or 'attachment', part.get_payload(decode=True)))
continue
# 2) text/plain -> <pre/>
if part.get_content_type() == 'text/plain' and (not alternative or not body):
body = tools.append_content_to_html(body, tools.ustr(part.get_payload(decode=True),
encoding, errors='replace'), preserve=True)
# 3) text/html -> raw
elif part.get_content_type() == 'text/html':
html = tools.ustr(part.get_payload(decode=True), encoding, errors='replace')
if alternative:
body = html
else:
body = tools.append_content_to_html(body, html, plaintext=False)
# 4) Anything else -> attachment
else:
attachments.append((filename or 'attachment', part.get_payload(decode=True)))
return body, attachments
Example 5
From project openerp-ktv, under directory openerp/addons/mail, in source file mail_message.py.
Score: 5
vote
vote
def parse_message(self, message, save_original=False):
"""Parses a string or email.message.Message representing an
RFC-2822 email, and returns a generic dict holding the
message details.
:param message: the message to parse
:type message: email.message.Message | string | unicode
:param bool save_original: whether the returned dict
should include an ``original`` entry with the base64
encoded source of the message.
:rtype: dict
:return: A dict with the following structure, where each
field may not be present if missing in original
message::
{ 'message-id': msg_id,
'subject': subject,
'from': from,
'to': to,
'cc': cc,
'headers' : { 'X-Mailer': mailer,
#.. all X- headers...
},
'subtype': msg_mime_subtype,
'body_text': plaintext_body
'body_html': html_body,
'attachments': [('file1', 'bytes'),
('file2', 'bytes') }
# ...
'original': source_of_email,
}
"""
msg_txt = message
if isinstance(message, str):
msg_txt = email.message_from_string(message)
# Warning: message_from_string doesn't always work correctly on unicode,
# we must use utf-8 strings here :-(
if isinstance(message, unicode):
message = message.encode('utf-8')
msg_txt = email.message_from_string(message)
message_id = msg_txt.get('message-id', False)
msg = {}
if save_original:
# save original, we need to be able to read the original email sometimes
msg['original'] = message.as_string() if isinstance(message, Message) \
else message
msg['original'] = base64.b64encode(msg['original']) # binary fields are b64
if not message_id:
# Very unusual situation, be we should be fault-tolerant here
message_id = time.time()
msg_txt['message-id'] = message_id
_logger.info('Parsing Message without message-id, generating a random one: %s', message_id)
fields = msg_txt.keys()
msg['id'] = message_id
msg['message-id'] = message_id
if 'Subject' in fields:
msg['subject'] = decode(msg_txt.get('Subject'))
if 'Content-Type' in fields:
msg['content-type'] = msg_txt.get('Content-Type')
if 'From' in fields:
msg['from'] = decode(msg_txt.get('From') or msg_txt.get_unixfrom())
if 'To' in fields:
msg['to'] = decode(msg_txt.get('To'))
if 'Delivered-To' in fields:
msg['to'] = decode(msg_txt.get('Delivered-To'))
if 'CC' in fields:
msg['cc'] = decode(msg_txt.get('CC'))
if 'Cc' in fields:
msg['cc'] = decode(msg_txt.get('Cc'))
if 'Reply-To' in fields:
msg['reply'] = decode(msg_txt.get('Reply-To'))
if 'Date' in fields:
try:
date_hdr = decode(msg_txt.get('Date'))
parsed_date = dateutil.parser.parse(date_hdr, fuzzy=True)
if parsed_date.utcoffset() is None:
# naive datetime, so we arbitrarily decide to make it
# UTC, there's no better choice. Should not happen,
# as RFC2822 requires timezone offset in Date headers.
stored_date = parsed_date.replace(tzinfo=pytz.utc)
else:
stored_date = parsed_date.astimezone(pytz.utc)
except Exception:
_logger.warning('Failed to parse Date header %r in incoming mail '
'with message-id %r, assuming current date/time.',
msg_txt.get('Date'), message_id)
stored_date = datetime.datetime.now()
msg['date'] = stored_date.strftime("%Y-%m-%d %H:%M:%S")
if 'Content-Transfer-Encoding' in fields:
msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')
if 'References' in fields:
msg['references'] = msg_txt.get('References')
if 'In-Reply-To' in fields:
msg['in-reply-to'] = msg_txt.get('In-Reply-To')
msg['headers'] = {}
msg['subtype'] = 'plain'
for item in msg_txt.items():
if item[0].startswith('X-'):
msg['headers'].update({item[0]: item[1]})
if not msg_txt.is_multipart() or 'text/plain' in msg.get('content-type', ''):
encoding = msg_txt.get_content_charset()
body = tools.ustr(msg_txt.get_payload(decode=True), encoding, errors='replace')
if 'text/html' in msg.get('content-type', ''):
msg['body_html'] = body
msg['subtype'] = 'html'
body = tools.html2plaintext(body)
msg['body_text'] = tools.ustr(body, encoding, errors='replace')
attachments = []
if msg_txt.is_multipart() or 'multipart/alternative' in msg.get('content-type', ''):
body = ""
if 'multipart/alternative' in msg.get('content-type', ''):
msg['subtype'] = 'alternative'
else:
msg['subtype'] = 'mixed'
for part in msg_txt.walk():
if part.get_content_maintype() == 'multipart':
continue
encoding = part.get_content_charset()
filename = part.get_filename()
if part.get_content_maintype()=='text':
content = part.get_payload(decode=True)
if filename:
attachments.append((filename, content))
content = tools.ustr(content, encoding, errors='replace')
if part.get_content_subtype() == 'html':
msg['body_html'] = content
msg['subtype'] = 'html' # html version prevails
body = tools.ustr(tools.html2plaintext(content))
body = body.replace('
', '')
elif part.get_content_subtype() == 'plain':
body = content
elif part.get_content_maintype() in ('application', 'image'):
if filename :
attachments.append((filename,part.get_payload(decode=True)))
else:
res = part.get_payload(decode=True)
body += tools.ustr(res, encoding, errors='replace')
msg['body_text'] = body
msg['attachments'] = attachments
# for backwards compatibility:
msg['body'] = msg['body_text']
msg['sub_type'] = msg['subtype'] or 'plain'
return msg