Skip to content

Commit c9e6ad0

Browse files
authored
done
1 parent e1c70ea commit c9e6ad0

File tree

3 files changed

+141
-0
lines changed

3 files changed

+141
-0
lines changed

main.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import os
2+
import re
3+
import email
4+
from email import policy
5+
from email.parser import BytesParser
6+
from email.header import decode_header
7+
from datetime import datetime
8+
from bs4 import BeautifulSoup
9+
from docx import Document
10+
11+
folder_path = "data"
12+
output_data = []
13+
14+
# Чтение и парсинг писем
15+
for filename in os.listdir(folder_path):
16+
if filename.endswith(".eml"):
17+
eml_path = os.path.join(folder_path, filename)
18+
with open(eml_path, "rb") as file:
19+
msg = BytesParser(policy=policy.default).parse(file)
20+
21+
date = msg["Date"]
22+
from_ = msg["From"]
23+
to = msg["To"]
24+
subject = msg["Subject"]
25+
26+
attachment_header = msg["Content-Disposition"]
27+
body = msg.get_body(preferencelist=("plain", "html"))
28+
body_content = body.get_content() if body else ""
29+
30+
# Преобразование HTML в текст
31+
if body and body.get_content_type() == "text/html":
32+
soup = BeautifulSoup(body_content, "html.parser")
33+
body_content = soup.get_text()
34+
35+
# Очистка от лишних пробелов и пустых строк
36+
body_content = "\n".join(
37+
re.sub(r"\s{2,}", " ", line.strip())
38+
for line in body_content.splitlines()
39+
if line.strip()
40+
)
41+
42+
# Преобразование даты
43+
try:
44+
parsed_date = datetime.strptime(date[:-6], "%a, %d %b %Y %H:%M:%S")
45+
formatted_date = parsed_date.strftime("%d.%m.%Y %H:%M")
46+
except Exception as e:
47+
formatted_date = date
48+
49+
attachments = []
50+
for part in msg.iter_attachments():
51+
attach_name = part.get_filename()
52+
if attach_name:
53+
attachments.append(attach_name)
54+
55+
# Сбор данных
56+
output_data.append({
57+
"Дата/время": formatted_date,
58+
"Отправитель (от кого)": from_,
59+
"Получатель (кому)": to,
60+
"Содержание письма / Тема": f"Тема: {subject}\n\n{body_content}\n\n",
61+
"Названия вложений": ", ".join(attachments) if attachments else " "
62+
})
63+
64+
print(f"\nВсего обработано писем: {len(output_data)}")
65+
66+
# Создание Word-файла и таблицы
67+
doc = Document()
68+
doc.add_heading('Список писем', 0)
69+
70+
table = doc.add_table(rows=1, cols=5)
71+
table.style = 'Table Grid'
72+
73+
# Заголовки
74+
hdr_cells = table.rows[0].cells
75+
hdr_cells[0].text = 'Дата/время'
76+
hdr_cells[1].text = 'Отправитель (от кого)'
77+
hdr_cells[2].text = 'Получатель (кому)'
78+
hdr_cells[3].text = 'Содержание письма / Тема'
79+
hdr_cells[4].text = 'Названия вложений'
80+
81+
# Добавление строк
82+
for i, email_data in enumerate(output_data):
83+
row_cells = table.add_row().cells
84+
row_cells[0].text = email_data["Дата/время"]
85+
row_cells[1].text = email_data["Отправитель (от кого)"]
86+
row_cells[2].text = email_data["Получатель (кому)"]
87+
row_cells[3].text = email_data["Содержание письма / Тема"]
88+
row_cells[4].text = email_data["Названия вложений"]
89+
90+
# Сохранение .docx файла
91+
doc_path = "emails.docx"
92+
doc.save(doc_path)
93+
94+
print(f"\nФайл Word с таблицей сохранён: {doc_path}")
95+
print(f"Обработано {len(output_data)} писем.")

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pandas
2+
beautifulsoup4
3+
python-docx

start.bat

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
@echo off
2+
setlocal
3+
4+
echo Checking if Python is installed...
5+
python --version >nul 2>&1
6+
if %errorlevel% neq 0 (
7+
echo Python not found. Downloading and installing Python 3.11...
8+
powershell -Command "Invoke-WebRequest -Uri https://www.python.org/ftp/python/3.11.4/python-3.11.4-amd64.exe -OutFile python-installer.exe"
9+
10+
echo Installing Python in silent mode...
11+
python-installer.exe /quiet InstallAllUsers=1 PrependPath=1 Include_test=0
12+
if %errorlevel% neq 0 (
13+
echo Error during Python installation.
14+
pause
15+
exit /b 1
16+
)
17+
echo Deleting installer file...
18+
del python-installer.exe
19+
set "PATH=%SystemRoot%\system32;%SystemRoot%;%SystemRoot%\System32\Wbem;%SYSTEMROOT%\System32\WindowsPowerShell\v1.0\"
20+
for /f "usebackq tokens=2,* skip=2" %%A in (`reg query "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /v PATH`) do set "PATH=%PATH%;%%B"
21+
)
22+
23+
24+
if not exist "venv\Scripts\python.exe" (
25+
echo Creating virtual environment...
26+
python -m venv venv
27+
) else (
28+
echo Virtual environment already exists.
29+
)
30+
31+
echo Activating virtual environment...
32+
call venv\Scripts\activate
33+
34+
echo Installing/updating dependencies from requirements.txt...
35+
python -m pip install --upgrade pip
36+
python -m pip install -r requirements.txt
37+
38+
echo Running main script...
39+
python main.py
40+
41+
echo.
42+
echo Script has finished. Press any key to exit.
43+
pause

0 commit comments

Comments
 (0)