maildir_clean/tests/test_maildir.py

103 lines
3.2 KiB
Python

import pandas as pd
from fixtures import *
from maildirclean.maildir import make_email_metadata, MailDir, TopSender, parse_maildir
def test_email_parsing(test_email):
metadata = make_email_metadata(test_email)
assert metadata["from"] == '"John Doe" <sender@example.com>'
assert metadata["date"] == "Wed, 16 Apr 2025 12:23:35 -0400"
assert metadata["path"] == str(test_email)
def test_maildir_creation(test_email):
maildir = MailDir([make_email_metadata(test_email)])
metadata = maildir._df.iloc[0]
assert metadata["from"] == '"John Doe" <sender@example.com>'
assert metadata["name"] == "John Doe"
assert metadata["email"] == "sender@example.com"
assert metadata["date"] == pd.to_datetime("Wed, 16 Apr 2025 12:23:35 -0400")
assert metadata["path"] == str(test_email)
def test_get_top_n_senders(sample_email_metadata):
# Initialize MailDir with sample data
maildir = MailDir(sample_email_metadata)
# Test getting top 2 senders
top_senders = maildir.get_top_n_senders(2)
# Assertions
assert len(top_senders) == 2
# john.doe@example.com should be the top sender (4 emails)
assert top_senders[0].email == "john.doe@example.com"
assert set(top_senders[0].names) == {"John Doe", "Johnny Doe", "J. Doe"}
# jane.smith@example.com should be the second (2 emails)
assert top_senders[1].email == "jane.smith@example.com"
assert set(top_senders[1].names) == {"Jane Smith", "Jane S."}
def test_get_top_n_senders_with_empty_data():
# Initialize MailDir with empty data
maildir = MailDir([])
# Test getting top senders from empty data
top_senders = maildir.get_top_n_senders(5)
# Should return empty list
assert len(top_senders) == 0
def test_get_top_n_senders_with_n_greater_than_unique_senders(sample_email_metadata):
# Initialize MailDir with sample data
maildir = MailDir(sample_email_metadata)
# Test getting more senders than exist
top_senders = maildir.get_top_n_senders(10)
# Should only return 4 senders (as there are only 4 unique emails)
assert len(top_senders) == 4
# Verify all expected emails are present
emails = [sender.email for sender in top_senders]
assert set(emails) == {
"john.doe@example.com",
"jane.smith@example.com",
"alex.johnson@example.com",
"sarah@example.com",
}
def test_get_top_n_senders_ordering(sample_email_metadata):
# Initialize MailDir with sample data
maildir = MailDir(sample_email_metadata)
# Test getting all senders
top_senders = maildir.get_top_n_senders(4)
# Verify ordering by count
assert [sender.email for sender in top_senders] == [
"john.doe@example.com", # 4 emails
"jane.smith@example.com", # 2 emails
"alex.johnson@example.com", # 2 emails
"sarah@example.com", # 1 email
]
def test_parse_maildir(sample_email_dir):
maildir = parse_maildir(sample_email_dir)
assert len(maildir._df) == 3
assert "test@something.org" in list(maildir._df["email"])
assert "not_a_test@something.org" in list(maildir._df["email"])
assert "Test" in list(maildir._df["name"])
assert "Not a Test" in list(maildir._df["name"])
assert "Test2" in list(maildir._df["name"])