maildir_clean/tests/test_maildir.py

import pandas as pd

from fixtures import *
from maildirclean.maildir import make_email_metadata, MailDir, TopSender, parse_maildir


def test_email_parsing(test_email):

    metadata = make_email_metadata(test_email)

    assert metadata["from"] == '"John Doe" <sender@example.com>'
    assert metadata["date"] == "Wed, 16 Apr 2025 12:23:35 -0400"
    assert metadata["path"] == str(test_email)


def test_maildir_creation(test_email):
    maildir = MailDir([make_email_metadata(test_email)])

    metadata = maildir._df.iloc[0]
    assert metadata["from"] == '"John Doe" <sender@example.com>'
    assert metadata["name"] == "John Doe"
    assert metadata["email"] == "sender@example.com"
    assert metadata["date"] == pd.to_datetime("Wed, 16 Apr 2025 12:23:35 -0400")
    assert metadata["path"] == str(test_email)


def test_get_top_n_senders(sample_email_metadata):
    # Initialize MailDir with sample data
    maildir = MailDir(sample_email_metadata)

    # Test getting top 2 senders
    top_senders = maildir.get_top_n_senders(2)

    # Assertions
    assert len(top_senders) == 2

    # john.doe@example.com should be the top sender (4 emails)
    assert top_senders[0].email == "john.doe@example.com"
    assert set(top_senders[0].names) == {"John Doe", "Johnny Doe", "J. Doe"}

    # jane.smith@example.com should be the second (2 emails)
    assert top_senders[1].email == "jane.smith@example.com"
    assert set(top_senders[1].names) == {"Jane Smith", "Jane S."}


def test_get_top_n_senders_with_empty_data():
    # Initialize MailDir with empty data
    maildir = MailDir([])

    # Test getting top senders from empty data
    top_senders = maildir.get_top_n_senders(5)

    # Should return empty list
    assert len(top_senders) == 0


def test_get_top_n_senders_with_n_greater_than_unique_senders(sample_email_metadata):
    # Initialize MailDir with sample data
    maildir = MailDir(sample_email_metadata)

    # Test getting more senders than exist
    top_senders = maildir.get_top_n_senders(10)

    # Should only return 4 senders (as there are only 4 unique emails)
    assert len(top_senders) == 4

    # Verify all expected emails are present
    emails = [sender.email for sender in top_senders]
    assert set(emails) == {
        "john.doe@example.com",
        "jane.smith@example.com",
        "alex.johnson@example.com",
        "sarah@example.com",
    }


def test_get_top_n_senders_ordering(sample_email_metadata):
    # Initialize MailDir with sample data
    maildir = MailDir(sample_email_metadata)

    # Test getting all senders
    top_senders = maildir.get_top_n_senders(4)

    # Verify ordering by count
    assert [sender.email for sender in top_senders] == [
        "john.doe@example.com",  # 4 emails
        "jane.smith@example.com",  # 2 emails
        "alex.johnson@example.com",  # 2 emails
        "sarah@example.com",  # 1 email
    ]


def test_parse_maildir(sample_email_dir):
    maildir = parse_maildir(sample_email_dir)

    assert len(maildir._df) == 3
    assert "test@something.org" in list(maildir._df["email"])
    assert "not_a_test@something.org" in list(maildir._df["email"])

    assert "Test" in list(maildir._df["name"])
    assert "Not a Test" in list(maildir._df["name"])
    assert "Test2" in list(maildir._df["name"])