import pandas as pd from fixtures import * from maildirclean.maildir import make_email_metadata, MailDir, TopSender, parse_maildir def test_email_parsing(test_email): metadata = make_email_metadata(test_email) assert metadata["from"] == '"John Doe" ' assert metadata["date"] == "Wed, 16 Apr 2025 12:23:35 -0400" assert metadata["path"] == str(test_email) def test_maildir_creation(test_email): maildir = MailDir([make_email_metadata(test_email)]) metadata = maildir._df.iloc[0] assert metadata["from"] == '"John Doe" ' assert metadata["name"] == "John Doe" assert metadata["email"] == "sender@example.com" assert metadata["date"] == pd.to_datetime("Wed, 16 Apr 2025 12:23:35 -0400") assert metadata["path"] == str(test_email) def test_get_top_n_senders(sample_email_metadata): # Initialize MailDir with sample data maildir = MailDir(sample_email_metadata) # Test getting top 2 senders top_senders = maildir.get_top_n_senders(2) # Assertions assert len(top_senders) == 2 # john.doe@example.com should be the top sender (4 emails) assert top_senders[0].email == "john.doe@example.com" assert set(top_senders[0].names) == {"John Doe", "Johnny Doe", "J. Doe"} # jane.smith@example.com should be the second (2 emails) assert top_senders[1].email == "jane.smith@example.com" assert set(top_senders[1].names) == {"Jane Smith", "Jane S."} def test_get_top_n_senders_with_empty_data(): # Initialize MailDir with empty data maildir = MailDir([]) # Test getting top senders from empty data top_senders = maildir.get_top_n_senders(5) # Should return empty list assert len(top_senders) == 0 def test_get_top_n_senders_with_n_greater_than_unique_senders(sample_email_metadata): # Initialize MailDir with sample data maildir = MailDir(sample_email_metadata) # Test getting more senders than exist top_senders = maildir.get_top_n_senders(10) # Should only return 4 senders (as there are only 4 unique emails) assert len(top_senders) == 4 # Verify all expected emails are present emails = [sender.email for sender in top_senders] assert set(emails) == { "john.doe@example.com", "jane.smith@example.com", "alex.johnson@example.com", "sarah@example.com", } def test_get_top_n_senders_ordering(sample_email_metadata): # Initialize MailDir with sample data maildir = MailDir(sample_email_metadata) # Test getting all senders top_senders = maildir.get_top_n_senders(4) # Verify ordering by count assert [sender.email for sender in top_senders] == [ "john.doe@example.com", # 4 emails "jane.smith@example.com", # 2 emails "alex.johnson@example.com", # 2 emails "sarah@example.com", # 1 email ] def test_parse_maildir(sample_email_dir): maildir = parse_maildir(sample_email_dir) assert len(maildir._df) == 3 assert "test@something.org" in list(maildir._df["email"]) assert "not_a_test@something.org" in list(maildir._df["email"]) assert "Test" in list(maildir._df["name"]) assert "Not a Test" in list(maildir._df["name"]) assert "Test2" in list(maildir._df["name"])