import os import time import pytest from .fixtures import * from code_rag.doc_tracker import DocMetaData, DocumentTracker, calculate_file_hash def doc_infos_are_equal(left, right): """Check to see if two doc_infos are the same""" for k, v in left.items(): try: print(v.to_dict(), right[k].to_dict(), v.to_dict() == right[k].to_dict()) if v != right[k]: return False except KeyError: return False return True @pytest.fixture def document_tracker(tracker_file): """Create a DocumentTracker instance""" return DocumentTracker(tracking_file=tracker_file) # Tests for DocumentTracker def test_init_new_tracker(tracker_file): """Test creating a new tracker""" tracker = DocumentTracker(tracking_file=tracker_file) assert tracker.doc_info == {} assert not os.path.exists(tracker_file) def test_save_and_load_tracking_data(document_tracker, tracker_file): """Test saving and loading tracking data""" # Add some data update_time = time.time() document_tracker.doc_info = { "test.txt": DocMetaData(123456, "abcdef", update_time, ["1", "2"]) } document_tracker._save_tracking_data() # Check file exists assert os.path.exists(tracker_file) # Create a new tracker that should load the data new_tracker = DocumentTracker(tracking_file=tracker_file) assert doc_infos_are_equal( new_tracker.doc_info, {"test.txt": DocMetaData(123456, "abcdef", update_time, ["1", "2"])}, ) def test_calculate_file_hash(document_tracker, sample_docs): """Test hash calculation for a file""" file_path = sample_docs[0] hash1 = calculate_file_hash(file_path) # Same content should yield same hash hash2 = calculate_file_hash(file_path) assert hash1 == hash2 # Different content should yield different hash with open(file_path, "a") as f: f.write("Additional content") hash3 = calculate_file_hash(file_path) assert hash1 != hash3 def test_get_changed_files_new(document_tracker, docs_dir, sample_docs): """Test detecting new files""" changes = document_tracker.get_changed_files(docs_dir) assert set(changes["new"]) == set(sample_docs) assert changes["modified"] == [] assert changes["deleted"] == [] # Verify tracking was updated for file_path in sample_docs: assert file_path in document_tracker.doc_info assert document_tracker.doc_info[file_path].chunk_ids == [] def test_get_changed_files_modified(document_tracker, docs_dir, sample_docs): """Test detecting modified files""" # First scan to establish tracking document_tracker.get_changed_files(docs_dir) # Modify a file and wait to ensure timestamp difference time.sleep(0.1) with open(sample_docs[0], "a") as f: f.write("Modified content") # Detect changes changes = document_tracker.get_changed_files(docs_dir) assert changes["new"] == [] assert changes["modified"] == [sample_docs[0]] assert changes["deleted"] == [] def test_get_changed_files_deleted(document_tracker, docs_dir, sample_docs): """Test detecting deleted files""" # First scan to establish tracking document_tracker.get_changed_files(docs_dir) # Delete a file os.remove(sample_docs[0]) # Detect changes changes = document_tracker.get_changed_files(docs_dir) assert changes["new"] == [] assert changes["modified"] == [] assert changes["deleted"] == [sample_docs[0]] def test_update_chunk_mappings(document_tracker, sample_docs): """Test updating chunk mappings""" file_path = sample_docs[0] chunk_ids = ["chunk1", "chunk2", "chunk3"] # First make sure the file is tracked document_tracker.doc_info[file_path] = DocMetaData( 123, "abc", "2023-01-01", [], ) # Update chunk mappings document_tracker.update_chunk_mappings(file_path, chunk_ids) assert document_tracker.doc_info[file_path].chunk_ids == chunk_ids def test_get_chunks_to_delete(document_tracker): """Test getting chunks to delete for deleted files""" # Setup tracking data document_tracker.doc_info = { "file1.txt": DocMetaData(0, "abc", 0, ["chunk1", "chunk2"]), "file2.txt": DocMetaData(0, "abc", 0, ["chunk3", "chunk4"]), "file3.txt": DocMetaData(0, "abc", 0, ["chunk5"]), } # Test with one deleted file chunks = document_tracker.get_chunks_to_delete(["file1.txt"]) assert set(chunks) == {"chunk1", "chunk2"} # Verify file was removed from tracking assert "file1.txt" not in document_tracker.doc_info # Test with multiple deleted files chunks = document_tracker.get_chunks_to_delete(["file2.txt", "file3.txt"]) assert set(chunks) == {"chunk3", "chunk4", "chunk5"} # Verify files were removed from tracking assert "file2.txt" not in document_tracker.doc_info assert "file3.txt" not in document_tracker.doc_info