Add cli, top sender calculation functionality
This commit is contained in:
parent
df23e520fa
commit
88899dfbd4
@ -3,13 +3,9 @@ name = "maildirclean"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
authors = [
|
authors = [{ name = "Alex Selimov", email = "alex@alexselimov.com" }]
|
||||||
{ name = "Alex Selimov", email = "alex@alexselimov.com" }
|
|
||||||
]
|
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
dependencies = [
|
dependencies = ["pandas>=2.2.3"]
|
||||||
"pandas>=2.2.3",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
maildirclean = "maildirclean:main"
|
maildirclean = "maildirclean:main"
|
||||||
@ -19,6 +15,4 @@ requires = ["hatchling"]
|
|||||||
build-backend = "hatchling.build"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = [
|
dev = ["pytest>=8.3.5"]
|
||||||
"pytest>=8.3.5",
|
|
||||||
]
|
|
||||||
|
@ -1,2 +1,5 @@
|
|||||||
def main() -> None:
|
from .cli import cli
|
||||||
print("Hello from maildirclean!")
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
return cli()
|
||||||
|
70
src/maildirclean/cli.py
Normal file
70
src/maildirclean/cli.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .maildir import parse_maildir
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments() -> argparse.Namespace:
|
||||||
|
"""Parse command line arguments
|
||||||
|
|
||||||
|
Returns: Namespace object corresponding to parsed arguments
|
||||||
|
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Analyze email metadata from a maildir directory"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"maildir", type=str, help="Path to the maildir directory to analyze"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--top",
|
||||||
|
"-t",
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help="Number of top senders to display (default: 5)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--verbose", "-v", action="store_true", help="Enable verbose output"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def cli():
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
maildir_path = Path(args.maildir)
|
||||||
|
if not maildir_path.exists() or not maildir_path.is_dir():
|
||||||
|
print(f"Error: {args.maildir} is not a valid directory", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.verbose:
|
||||||
|
print(f"Analyzing emails in {maildir_path}...")
|
||||||
|
|
||||||
|
maildir = parse_maildir(maildir_path)
|
||||||
|
if args.verbose:
|
||||||
|
print(f"Found {len(maildir._df)} emails")
|
||||||
|
|
||||||
|
top_senders = maildir.get_top_n_senders(args.top)
|
||||||
|
|
||||||
|
if not top_senders:
|
||||||
|
print("No senders found in the maildir", file=sys.stderr)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for i, sender in enumerate(top_senders, 1):
|
||||||
|
names_str = ", ".join(sender.names[:5]) # Limit to first 5 names
|
||||||
|
if len(sender.names) > 5:
|
||||||
|
names_str += f" and {len(sender.names) - 5} more"
|
||||||
|
|
||||||
|
result.append(f"{i}. {sender.email} - Names used: {names_str}")
|
||||||
|
|
||||||
|
output = "\n".join(
|
||||||
|
[f"Top {len(top_senders)} senders in {maildir_path}:", "=" * 40, *result]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
return 0
|
@ -1,8 +1,11 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .utility import first_match_or_empty, to_datetime_safe
|
||||||
|
|
||||||
|
|
||||||
METADATA_SCHEMA = sorted(["Path", "From", "Date"])
|
METADATA_SCHEMA = sorted(["path", "from", "date"])
|
||||||
|
|
||||||
|
|
||||||
def make_email_metadata(email_path: str | Path) -> dict[str, str]:
|
def make_email_metadata(email_path: str | Path) -> dict[str, str]:
|
||||||
@ -15,13 +18,17 @@ def make_email_metadata(email_path: str | Path) -> dict[str, str]:
|
|||||||
Returns: Dict containing the required metadata
|
Returns: Dict containing the required metadata
|
||||||
"""
|
"""
|
||||||
key_is_set = {key: False for key in METADATA_SCHEMA}
|
key_is_set = {key: False for key in METADATA_SCHEMA}
|
||||||
metadata = {"Path": str(email_path)}
|
metadata = {"path": str(email_path)}
|
||||||
key_is_set["Path"] = True
|
key_is_set["path"] = True
|
||||||
|
|
||||||
with open(email_path, "r") as f:
|
with open(email_path, "rb") as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
try:
|
try:
|
||||||
k, v = [val.strip() for val in line.split(":", maxsplit=1)]
|
k, v = [
|
||||||
|
val.strip()
|
||||||
|
for val in line.decode(errors="ignore").split(":", maxsplit=1)
|
||||||
|
]
|
||||||
|
k = k.lower()
|
||||||
if k in METADATA_SCHEMA:
|
if k in METADATA_SCHEMA:
|
||||||
metadata[k] = v
|
metadata[k] = v
|
||||||
key_is_set[k] = True
|
key_is_set[k] = True
|
||||||
@ -63,10 +70,17 @@ class MailDir:
|
|||||||
Stores the metadata associated with all local emails.
|
Stores the metadata associated with all local emails.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
name_regex = r"^(.*?)(?=<)"
|
||||||
|
email_regex = r"<?([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)>?"
|
||||||
|
|
||||||
KEYS_AND_FUNCS = {
|
KEYS_AND_FUNCS = {
|
||||||
"Name": lambda df: df["From"].map(lambda x: x.split("<")[0].strip('" ')),
|
"name": lambda df: df["from"].map(
|
||||||
"Email": lambda df: df["From"].map(lambda x: x.split("<")[1].strip(">")),
|
lambda x: first_match_or_empty(MailDir.name_regex, x).strip('" ')
|
||||||
"Date": lambda df: pd.to_datetime(df["Date"]),
|
),
|
||||||
|
"email": lambda df: df["from"].map(
|
||||||
|
lambda x: first_match_or_empty(MailDir.email_regex, x).strip("")
|
||||||
|
),
|
||||||
|
"date": lambda df: df["date"].map(lambda x: to_datetime_safe(x)),
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, email_metadata: list[dict[str, str]]):
|
def __init__(self, email_metadata: list[dict[str, str]]):
|
||||||
@ -93,9 +107,9 @@ class MailDir:
|
|||||||
|
|
||||||
senders = [
|
senders = [
|
||||||
TopSender(
|
TopSender(
|
||||||
email, list(self._df.loc[self._df["Email"] == email, "Name"].unique())
|
email, list(self._df.loc[self._df["email"] == email, "name"].unique())
|
||||||
)
|
)
|
||||||
for email in self._df["Email"].value_counts().iloc[0:n].index
|
for email in self._df["email"].value_counts().iloc[0:n].index
|
||||||
]
|
]
|
||||||
|
|
||||||
return senders
|
return senders
|
||||||
|
28
src/maildirclean/utility.py
Normal file
28
src/maildirclean/utility.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import re
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def first_match_or_empty(pattern: str, text: str) -> str:
|
||||||
|
"""Get the first match for the provided pattern or "" if empty.
|
||||||
|
This is a wrapper to facilitate usage of the re.search in lambda expressions
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pattern: Pattern to search for
|
||||||
|
text: Text that will be searched
|
||||||
|
|
||||||
|
Returns: First pattern match or ""
|
||||||
|
|
||||||
|
"""
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def to_datetime_safe(datetime_str: str):
|
||||||
|
try:
|
||||||
|
return pd.to_datetime(datetime_str, format="mixed", errors="coerce")
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
return datetime.now()
|
@ -125,13 +125,13 @@ def sample_email_dir(tmp_dir):
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_email_metadata():
|
def sample_email_metadata():
|
||||||
return [
|
return [
|
||||||
{"From": "John Doe <john.doe@example.com>", "Date": "2025-01-01"},
|
{"from": "John Doe <john.doe@example.com>", "date": "2025-01-01"},
|
||||||
{"From": "John Doe <john.doe@example.com>", "Date": "2025-01-02"},
|
{"from": "John Doe <john.doe@example.com>", "date": "2025-01-02"},
|
||||||
{"From": "Johnny Doe <john.doe@example.com>", "Date": "2025-01-03"},
|
{"from": "Johnny Doe <john.doe@example.com>", "date": "2025-01-03"},
|
||||||
{"From": "J. Doe <john.doe@example.com>", "Date": "2025-01-04"},
|
{"from": "J. Doe <john.doe@example.com>", "date": "2025-01-04"},
|
||||||
{"From": "Jane Smith <jane.smith@example.com>", "Date": "2025-01-05"},
|
{"from": "Jane Smith <jane.smith@example.com>", "date": "2025-01-05"},
|
||||||
{"From": "Jane S. <jane.smith@example.com>", "Date": "2025-01-06"},
|
{"from": "Jane S. <jane.smith@example.com>", "date": "2025-01-06"},
|
||||||
{"From": "Alex Johnson <alex.johnson@example.com>", "Date": "2025-01-07"},
|
{"from": "Alex Johnson <alex.johnson@example.com>", "date": "2025-01-07"},
|
||||||
{"From": "Alex J. <alex.johnson@example.com>", "Date": "2025-01-08"},
|
{"from": "Alex J. <alex.johnson@example.com>", "date": "2025-01-08"},
|
||||||
{"From": "Sarah Williams <sarah@example.com>", "Date": "2025-01-09"},
|
{"from": "Sarah Williams <sarah@example.com>", "date": "2025-01-09"},
|
||||||
]
|
]
|
||||||
|
@ -8,20 +8,20 @@ def test_email_parsing(test_email):
|
|||||||
|
|
||||||
metadata = make_email_metadata(test_email)
|
metadata = make_email_metadata(test_email)
|
||||||
|
|
||||||
assert metadata["From"] == '"John Doe" <sender@example.com>'
|
assert metadata["from"] == '"John Doe" <sender@example.com>'
|
||||||
assert metadata["Date"] == "Wed, 16 Apr 2025 12:23:35 -0400"
|
assert metadata["date"] == "Wed, 16 Apr 2025 12:23:35 -0400"
|
||||||
assert metadata["Path"] == str(test_email)
|
assert metadata["path"] == str(test_email)
|
||||||
|
|
||||||
|
|
||||||
def test_maildir_creation(test_email):
|
def test_maildir_creation(test_email):
|
||||||
maildir = MailDir([make_email_metadata(test_email)])
|
maildir = MailDir([make_email_metadata(test_email)])
|
||||||
|
|
||||||
metadata = maildir._df.iloc[0]
|
metadata = maildir._df.iloc[0]
|
||||||
assert metadata["From"] == '"John Doe" <sender@example.com>'
|
assert metadata["from"] == '"John Doe" <sender@example.com>'
|
||||||
assert metadata["Name"] == "John Doe"
|
assert metadata["name"] == "John Doe"
|
||||||
assert metadata["Email"] == "sender@example.com"
|
assert metadata["email"] == "sender@example.com"
|
||||||
assert metadata["Date"] == pd.to_datetime("Wed, 16 Apr 2025 12:23:35 -0400")
|
assert metadata["date"] == pd.to_datetime("Wed, 16 Apr 2025 12:23:35 -0400")
|
||||||
assert metadata["Path"] == str(test_email)
|
assert metadata["path"] == str(test_email)
|
||||||
|
|
||||||
|
|
||||||
def test_get_top_n_senders(sample_email_metadata):
|
def test_get_top_n_senders(sample_email_metadata):
|
||||||
@ -94,9 +94,9 @@ def test_parse_maildir(sample_email_dir):
|
|||||||
maildir = parse_maildir(sample_email_dir)
|
maildir = parse_maildir(sample_email_dir)
|
||||||
|
|
||||||
assert len(maildir._df) == 3
|
assert len(maildir._df) == 3
|
||||||
assert "test@something.org" in list(maildir._df["Email"])
|
assert "test@something.org" in list(maildir._df["email"])
|
||||||
assert "not_a_test@something.org" in list(maildir._df["Email"])
|
assert "not_a_test@something.org" in list(maildir._df["email"])
|
||||||
|
|
||||||
assert "Test" in list(maildir._df["Name"])
|
assert "Test" in list(maildir._df["name"])
|
||||||
assert "Not a Test" in list(maildir._df["Name"])
|
assert "Not a Test" in list(maildir._df["name"])
|
||||||
assert "Test2" in list(maildir._df["Name"])
|
assert "Test2" in list(maildir._df["name"])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user