103 lines
3.2 KiB
Python
103 lines
3.2 KiB
Python
import pandas as pd
|
|
|
|
from fixtures import *
|
|
from maildirclean.maildir import make_email_metadata, MailDir, TopSender, parse_maildir
|
|
|
|
|
|
def test_email_parsing(test_email):
|
|
|
|
metadata = make_email_metadata(test_email)
|
|
|
|
assert metadata["from"] == '"John Doe" <sender@example.com>'
|
|
assert metadata["date"] == "Wed, 16 Apr 2025 12:23:35 -0400"
|
|
assert metadata["path"] == str(test_email)
|
|
|
|
|
|
def test_maildir_creation(test_email):
|
|
maildir = MailDir([make_email_metadata(test_email)])
|
|
|
|
metadata = maildir._df.iloc[0]
|
|
assert metadata["from"] == '"John Doe" <sender@example.com>'
|
|
assert metadata["name"] == "John Doe"
|
|
assert metadata["email"] == "sender@example.com"
|
|
assert metadata["date"] == pd.to_datetime("Wed, 16 Apr 2025 12:23:35 -0400")
|
|
assert metadata["path"] == str(test_email)
|
|
|
|
|
|
def test_get_top_n_senders(sample_email_metadata):
|
|
# Initialize MailDir with sample data
|
|
maildir = MailDir(sample_email_metadata)
|
|
|
|
# Test getting top 2 senders
|
|
top_senders = maildir.get_top_n_senders(2)
|
|
|
|
# Assertions
|
|
assert len(top_senders) == 2
|
|
|
|
# john.doe@example.com should be the top sender (4 emails)
|
|
assert top_senders[0].email == "john.doe@example.com"
|
|
assert set(top_senders[0].names) == {"John Doe", "Johnny Doe", "J. Doe"}
|
|
|
|
# jane.smith@example.com should be the second (2 emails)
|
|
assert top_senders[1].email == "jane.smith@example.com"
|
|
assert set(top_senders[1].names) == {"Jane Smith", "Jane S."}
|
|
|
|
|
|
def test_get_top_n_senders_with_empty_data():
|
|
# Initialize MailDir with empty data
|
|
maildir = MailDir([])
|
|
|
|
# Test getting top senders from empty data
|
|
top_senders = maildir.get_top_n_senders(5)
|
|
|
|
# Should return empty list
|
|
assert len(top_senders) == 0
|
|
|
|
|
|
def test_get_top_n_senders_with_n_greater_than_unique_senders(sample_email_metadata):
|
|
# Initialize MailDir with sample data
|
|
maildir = MailDir(sample_email_metadata)
|
|
|
|
# Test getting more senders than exist
|
|
top_senders = maildir.get_top_n_senders(10)
|
|
|
|
# Should only return 4 senders (as there are only 4 unique emails)
|
|
assert len(top_senders) == 4
|
|
|
|
# Verify all expected emails are present
|
|
emails = [sender.email for sender in top_senders]
|
|
assert set(emails) == {
|
|
"john.doe@example.com",
|
|
"jane.smith@example.com",
|
|
"alex.johnson@example.com",
|
|
"sarah@example.com",
|
|
}
|
|
|
|
|
|
def test_get_top_n_senders_ordering(sample_email_metadata):
|
|
# Initialize MailDir with sample data
|
|
maildir = MailDir(sample_email_metadata)
|
|
|
|
# Test getting all senders
|
|
top_senders = maildir.get_top_n_senders(4)
|
|
|
|
# Verify ordering by count
|
|
assert [sender.email for sender in top_senders] == [
|
|
"john.doe@example.com", # 4 emails
|
|
"jane.smith@example.com", # 2 emails
|
|
"alex.johnson@example.com", # 2 emails
|
|
"sarah@example.com", # 1 email
|
|
]
|
|
|
|
|
|
def test_parse_maildir(sample_email_dir):
|
|
maildir = parse_maildir(sample_email_dir)
|
|
|
|
assert len(maildir._df) == 3
|
|
assert "test@something.org" in list(maildir._df["email"])
|
|
assert "not_a_test@something.org" in list(maildir._df["email"])
|
|
|
|
assert "Test" in list(maildir._df["name"])
|
|
assert "Not a Test" in list(maildir._df["name"])
|
|
assert "Test2" in list(maildir._df["name"])
|