Ai+Python 邮件分析小轮子

admin

146030
文章

119
评论

2024年6月20日19:17:27评论17 views字数 7349阅读24分29秒阅读模式

邮件分析小轮子

诚然，沙箱固然好用，但是如果你在不出网环境或者无法识别邮件内的二维码怎么办！？还是得人工一个个看！

此轮子快速解决你的分析需求

Ai+Python 邮件分析小轮子

Ai+Python 邮件分析小轮子 import re
import os
import email
import socket
from email import policy
from email.header import decode_header
from email.parser import BytesParser
from email.utils import parsedate_to_datetime

import ipinfo
from PIL import Image
from pyzbar.pyzbar import decode
import hashlib
import requests
import json
import chardet
import zipfile
import py7zr
import rarfile

# 正则表达式匹配URL
url_pattern = re.compile(r'http

展开收缩

?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
# 正则表达式匹配IP地址
ip_pattern = re.compile(r'b(?:d{1,3}.){3}d{1,3}b')

def decode_qr_code(image_path):
try:
img = Image.open(image_path)
decoded_objects = decode(img)
if decoded_objects:
for obj in decoded_objects:
print(f"QR Code Data: {obj.data.decode('utf-8')}")
else:
print(f"No QR code found in {image_path}")
except Exception as e:
print(f"Error decoding QR code: {e}")

def calculate_hashes(file_path):
"""
计算文件的MD5和SHA-256哈希值
"""
hashers = {
'MD5': hashlib.md5(),
'SHA-256': hashlib.sha256()
}
with open(file_path, 'rb') as f:
while chunk := f.read(8192):
for hasher in hashers.values():
hasher.update(chunk)
return {algo: hasher.hexdigest() for algo, hasher in hashers.items()}

def get_password(emlbody_string):
# 识别以下语句的解压密码，输出密码，不需要回复其他文字：
url = "https://api.deepseek.com/chat/completions"

payload = json.dumps({
"messages": [
{
"content": '识别以下语句的解压密码，输出密码，不需要回复其他文字：' + emlbody_string,
"role": "system"
},
{
"content": "Hi",
"role": "user"
}
],
"model": "deepseek-coder",
"frequency_penalty": 0,
"max_tokens": 2048,
"presence_penalty": 0,
"stop": None,
"stream": False,
"temperature": 1,
"top_p": 1,
"logprobs": False,
"top_logprobs": None
})
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': 'Bearer 【你的key，自己去注册，反正免费的】'
}

response = requests.request("POST", url, headers=headers, data=payload)

# print(response.text)
# 解析JSON数据
data = json.loads(response.text)

# 提取content字段内容
content = data['choices'][0]['message']['content']

print('成功匹配密码：' + content)
content = content.replace(" ", "")
return content

def extract_archive(file_path, password, output_dir):
if file_path.lower().endswith('.zip'):
with zipfile.ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall(path=output_dir, pwd=password.encode())
elif file_path.lower().endswith('.7z'):
with py7zr.SevenZipFile(file_path, mode='r', password=password) as z:
z.extractall(path=output_dir)

def parse_eml(eml_fp, attr_dir):
"""
eml文件解析
:params eml_fp: eml文件路径
:params attr_dir: 附件保存目录
"""

# 计算附件的哈希值
hashes_eml = calculate_hashes(eml_fp)

print(f"邮件 {eml_fp} 的哈希值:")
for algo1, hash_value1 in hashes_eml.items():
print(f"{algo1}: {hash_value1}")

if not os.path.exists(attr_dir):
os.makedirs(attr_dir)

# 读取eml文件
with open(eml_fp, "r") as file:
eml_content = file.read()
# 转为email对象
msg = email.message_from_string(eml_content)

# 邮件主题
subject_bytes, subject_encode = decode_header(msg["Subject"])[0]
if subject_encode:
subject = subject_bytes.decode(subject_encode)
else:
subject = subject_bytes
print("主题：", subject)

# 邮件发件人
from_ip = re.search("<(.*)>", msg["from"]).group(1)
print("发件人邮箱：", from_ip)
from_name = decode_header(msg["from"].split("<")[0].strip())
if from_name:
if from_name[0] and from_name[0][1]:
from_n = from_name[0][0].decode(from_name[0][1])
else:
from_n = from_name[0][0]
print("发件人名称：", from_n)

# 获取收件人信息
recipients = []
if msg['To']:
recipients.extend(email.utils.getaddresses([msg['To']]))
if msg['Cc']:
recipients.extend(email.utils.getaddresses([msg['Cc']]))
if msg['Bcc']:
recipients.extend(email.utils.getaddresses([msg['Bcc']]))

print("收件人信息：")
for name, addr in recipients:
print(f"{name} <{addr}>")
# 邮件时间
received_date = parsedate_to_datetime(msg["date"])
print("接收时间：", received_date)
# 获取发件人IP地址
sender_ip = None
for received in msg.get_all('received', []):
match = ip_pattern.search(received)
if match:
sender_ip = match.group(0)
break
if sender_ip:
print("发件人IP地址：", sender_ip)
else:
print("无法获取发件人IP地址")
# 邮件正文及附件
for par in msg.walk():
if not par.is_multipart():# 判断是否为multipart，里面的数据不需要
# name = par.get_param("name")# 获取附件的文件名
name = par.get_filename()
if name:
# 附件
fname = decode_header(name)[0]
if fname[1]:
attr_name = fname[0].decode(fname[1])
else:
attr_name = fname[0]
print("附件名:", attr_name)
# 解码附件内容
attr_data = par.get_payload(decode=True)
attr_fp = os.path.join(attr_dir, attr_name)
with open(attr_fp, 'wb') as f_write:
f_write.write(attr_data)
# 计算附件的哈希值
hashes = calculate_hashes(attr_fp)
print(f"附件 {attr_name} 的哈希值:")
for algo, hash_value in hashes.items():
print(f"{algo}: {hash_value}")
# 检查附件是否为图片并尝试解码QR码
if attr_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
decode_qr_code(attr_fp)
# 检查附件是否为压缩包并尝试解压
if attr_name.lower().endswith(('.zip', '.7z', '.rar')):
# 解压文件
if attr_fp.endswith('.zip'):
try:
with zipfile.ZipFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_dir)
except zipfile.BadZipFile:
with zipfile.ZipFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_name, pwd=get_password(body))
elif attr_fp.endswith('.7z'):
try:
with py7zr.SevenZipFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_dir)
except py7zr.exceptions.PasswordRequired:
with py7zr.SevenZipFile(attr_name, 'r', password=get_password(body)) as zip_ref:
zip_ref.extractall(attr_dir)
elif attr_fp.endswith('.rar'):
try:
with rarfile.RarFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_dir)
except rarfile.NeedFirstVolume:
with rarfile.RarFile(attr_name, 'r') as zip_ref:
zip_ref.extractall(attr_dir, pwd=get_password(body))
else:
# 正文
text_char = par.get_content_charset()
if "text/plain" in par["content-type"]:# 文本正文
raw_data = par.get_payload(decode=True)
result = chardet.detect(raw_data)
charenc = result['encoding']
if charenc is None:
charenc = 'utf-8'# 或其他默认编码
body = raw_data.decode(charenc)
print("邮件正文：", body[:10])
get_password(body)
urls = url_pattern.findall(body)
for url in urls:
print("URL found in body:", url)
else:# html格式正文
html_body = par.get_payload(decode=True)

# 使用 chardet 检测编码
detected_encoding = chardet.detect(html_body)['encoding']

# 检查检测到的编码是否为 None，并提供默认编码
if detected_encoding is None:
detected_encoding = 'utf-8'# 或其他默认编码

try:
html_body = html_body.decode(detected_encoding)
except UnicodeDecodeError:
# 如果检测到的编码失败，尝试其他编码
html_body = html_body.decode('latin1')# 或其他备选编码

print("HTML正文：", html_body[:100])
urls = url_pattern.findall(html_body)
for url in urls:
print("URL found in HTML body:", url)
print("-" * 60)
continue

# 参数 1.目标邮件2.释放目录

if __name__ == "__main__":
parse_eml("3.eml", "E:\python\邮件分析\Safedi")

原文始发于微信公众号（硅步security）：Ai+Python 邮件分析小轮子

免责声明:文章中涉及的程序(方法)可能带有攻击性，仅供安全研究与教学之用，读者将其信息做其他用途，由读者承担全部法律及连带责任，本站不承担任何法律及连带责任；如有问题可邮件联系(建议使用企业邮箱或有效邮箱,避免邮件被拦截，联系方式见首页)，望知悉。

左青龙
微信扫一扫

右白虎
微信扫一扫

Ai+Python 邮件分析小轮子

远程开发引起的隐形风险与合规守护

Perl 语言基础入门

Rust标准黑客工具设计实现分析

插件前台任意文件读取漏洞复现与分析 (CVE-2025-2294)

汇编语言Day07

JavaWeb代码审计实战开源系统学习心得

【知识回顾】RedTeam 工具开发原则 - 基础知识

脚本小子进阶之路——Shell编程简介

javaFx 教程二

实战某凤网站导致的代码审计

发表评论

在线咨询

微信