email.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. from email import message_from_bytes
  2. import email.header
  3. import os
  4. import re
  5. import datetime
  6. import calendar
  7. class HTML:
  8. def __init__(self, body):
  9. self.body = body
  10. self.type = "text/html"
  11. class PLAIN:
  12. def __init__(self, body):
  13. self.body = body
  14. self.type = "text/plain"
  15. class FILE:
  16. def __init__(self, filename, byte: bytes, content_type: str, content_disposition: str):
  17. self.filename = filename
  18. self.size = len(byte) / 1024 / 1024 # 换算得到mb
  19. self.content_type = content_type
  20. self.content_disposition = content_disposition
  21. if self.size >= 0.1:
  22. self.size_str = f"{self.size:.2f}MB"
  23. elif self.size * 1024 > 0.1:
  24. self.size_str = f"{self.size * 1024:.2f}KB"
  25. else:
  26. self.size_str = f"{int(self.size * 1024 * 1024):d}B"
  27. class Mail:
  28. date_pattern = re.compile(
  29. r"[A-Za-z]+, "
  30. r"([0-9]{1,2}) "
  31. r"([A-Za-z]+) "
  32. r"([0-9]{4}) "
  33. r"([0-9]{1,2}):([0-9]{1,2}):([0-9]{1,2}) "
  34. r"([\s\S]*)"
  35. )
  36. time_zone_pattern = re.compile(r"([+-])([0-9]{2})00")
  37. def __init__(self, num: str, data: bytes):
  38. self.__date_ = None
  39. self.byte = data
  40. self.num = num
  41. @property
  42. def msg_data(self): # 有需要的时候才加载
  43. if self.__date_:
  44. return self.__date_
  45. self.__date_ = message_from_bytes(self.byte)
  46. return self.__date_
  47. @property
  48. def from_addr(self):
  49. if not self.msg_data['From']:
  50. return ""
  51. return str(email.header.make_header(email.header.decode_header(self.msg_data['From'])))
  52. @property
  53. def date(self):
  54. if not self.msg_data['Date']:
  55. return datetime.datetime(2022, 1, 1)
  56. date = str(email.header.make_header(email.header.decode_header(self.msg_data['Date'])))
  57. res = self.date_pattern.match(str(date)).groups()
  58. time = datetime.datetime(int(res[2]),
  59. list(calendar.month_abbr).index(res[1]),
  60. int(res[0]),
  61. int(res[3]),
  62. int(res[4]),
  63. int(res[5]))
  64. timezone = self.time_zone_pattern.match(res[6])
  65. if timezone:
  66. if timezone.groups()[0] == '-':
  67. time += datetime.timedelta(hours=int(timezone.groups()[1]))
  68. else:
  69. time -= datetime.timedelta(hours=int(timezone.groups()[1]))
  70. time += datetime.timedelta(hours=8) # 转换为北京时间
  71. return time
  72. @property
  73. def title(self):
  74. if not self.msg_data['Subject']:
  75. return ""
  76. return (str(email.header.make_header(email.header.decode_header(self.msg_data['Subject'])))
  77. .replace('\n', '')
  78. .replace('\r', ''))
  79. @property
  80. def body(self):
  81. return self.__get_body(self.msg_data)
  82. def __get_body(self, msg):
  83. if msg.is_multipart():
  84. res = ""
  85. for i in msg.get_payload():
  86. res += self.__get_body(i)
  87. return res
  88. else:
  89. msg_type = msg.get_content_type()
  90. if msg_type == "text/plain":
  91. return "text/plain:\n" + msg.get_payload(decode=True).decode('utf-8') + "\n"
  92. elif msg_type == "text/html":
  93. return "text/html:\n" + msg.get_payload(decode=True).decode('utf-8') + "\n"
  94. else:
  95. return ""
  96. @property
  97. def body_list(self):
  98. return self.__get_body_list(self.msg_data)
  99. def __get_body_list(self, msg):
  100. if msg.is_multipart():
  101. res = []
  102. for i in msg.get_payload():
  103. son = self.__get_body_list(i)
  104. if son is not None:
  105. res += son
  106. return res
  107. else:
  108. msg_type = msg.get_content_type()
  109. if msg_type == "text/plain":
  110. return [PLAIN(msg.get_payload(decode=True).decode('utf-8'))]
  111. elif msg_type == "text/html":
  112. return [HTML(msg.get_payload(decode=True).decode('utf-8'))]
  113. else:
  114. return None
  115. def save_file(self, file_dir: str):
  116. return self.__get_files(self.msg_data, file_dir)
  117. @staticmethod
  118. def __get_files(msg, file_dir: str):
  119. create = False
  120. for part in msg.walk():
  121. if not create:
  122. os.makedirs(file_dir, exist_ok=True)
  123. create = True
  124. if part.get_content_maintype() == 'multipart':
  125. continue
  126. if part.get('Content-Disposition') is None:
  127. continue
  128. filename = part.get_filename()
  129. if filename:
  130. filepath = os.path.join(file_dir, filename)
  131. with open(filepath, 'wb') as f:
  132. f.write(part.get_payload(decode=True))
  133. @property
  134. def file_list(self):
  135. res = []
  136. for part in self.msg_data.walk():
  137. if part.get_content_maintype() == 'multipart':
  138. continue
  139. if part.get('Content-Disposition') is None:
  140. continue
  141. filename = part.get_filename()
  142. if filename:
  143. res.append(FILE(filename, part.get_payload(decode=True),
  144. part.get_content_type(), part.get('Content-Disposition')))
  145. return res
  146. def get_file(self, filename) -> "(bytes, str, str) | (None, None, None)":
  147. for part in self.msg_data.walk():
  148. if part.get_content_maintype() == 'multipart':
  149. continue
  150. if part.get('Content-Disposition') is None:
  151. continue
  152. if filename == part.get_filename():
  153. return part.get_payload(decode=True), part.get_content_type(), part.get('Content-Disposition')
  154. return None, None, None
  155. def __lt__(self, other: "Mail"):
  156. return self.date < other.date
  157. def __eq__(self, other: "Mail"):
  158. return self.date == other.date
  159. def __le__(self, other: "Mail"):
  160. return self.date <= other.date
  161. def __str__(self):
  162. return f"{self.num} {self.title} {self.from_addr} {self.date}"