158 lines
4.9 KiB
Python
158 lines
4.9 KiB
Python
import os
|
|
import time
|
|
import pytest
|
|
|
|
from .fixtures import *
|
|
from code_rag.doc_tracker import DocMetaData, DocumentTracker, calculate_file_hash
|
|
|
|
|
|
def doc_infos_are_equal(left, right):
|
|
"""Check to see if two doc_infos are the same"""
|
|
for k, v in left.items():
|
|
try:
|
|
print(v.to_dict(), right[k].to_dict(), v.to_dict() == right[k].to_dict())
|
|
if v != right[k]:
|
|
return False
|
|
except KeyError:
|
|
return False
|
|
return True
|
|
|
|
|
|
@pytest.fixture
|
|
def document_tracker(tracker_file):
|
|
"""Create a DocumentTracker instance"""
|
|
return DocumentTracker(tracking_file=tracker_file)
|
|
|
|
|
|
# Tests for DocumentTracker
|
|
def test_init_new_tracker(tracker_file):
|
|
"""Test creating a new tracker"""
|
|
tracker = DocumentTracker(tracking_file=tracker_file)
|
|
assert tracker.doc_info == {}
|
|
assert not os.path.exists(tracker_file)
|
|
|
|
|
|
def test_save_and_load_tracking_data(document_tracker, tracker_file):
|
|
"""Test saving and loading tracking data"""
|
|
# Add some data
|
|
update_time = time.time()
|
|
document_tracker.doc_info = {
|
|
"test.txt": DocMetaData(123456, "abcdef", update_time, ["1", "2"])
|
|
}
|
|
document_tracker._save_tracking_data()
|
|
|
|
# Check file exists
|
|
assert os.path.exists(tracker_file)
|
|
|
|
# Create a new tracker that should load the data
|
|
new_tracker = DocumentTracker(tracking_file=tracker_file)
|
|
assert doc_infos_are_equal(
|
|
new_tracker.doc_info,
|
|
{"test.txt": DocMetaData(123456, "abcdef", update_time, ["1", "2"])},
|
|
)
|
|
|
|
|
|
def test_calculate_file_hash(document_tracker, sample_docs):
|
|
"""Test hash calculation for a file"""
|
|
file_path = sample_docs[0]
|
|
hash1 = calculate_file_hash(file_path)
|
|
|
|
# Same content should yield same hash
|
|
hash2 = calculate_file_hash(file_path)
|
|
assert hash1 == hash2
|
|
|
|
# Different content should yield different hash
|
|
with open(file_path, "a") as f:
|
|
f.write("Additional content")
|
|
|
|
hash3 = calculate_file_hash(file_path)
|
|
assert hash1 != hash3
|
|
|
|
|
|
def test_get_changed_files_new(document_tracker, docs_dir, sample_docs):
|
|
"""Test detecting new files"""
|
|
changes = document_tracker.get_changed_files(docs_dir)
|
|
assert set(changes["new"]) == set(sample_docs)
|
|
assert changes["modified"] == []
|
|
assert changes["deleted"] == []
|
|
|
|
# Verify tracking was updated
|
|
for file_path in sample_docs:
|
|
assert file_path in document_tracker.doc_info
|
|
assert document_tracker.doc_info[file_path].chunk_ids == []
|
|
|
|
|
|
def test_get_changed_files_modified(document_tracker, docs_dir, sample_docs):
|
|
"""Test detecting modified files"""
|
|
# First scan to establish tracking
|
|
document_tracker.get_changed_files(docs_dir)
|
|
|
|
# Modify a file and wait to ensure timestamp difference
|
|
time.sleep(0.1)
|
|
with open(sample_docs[0], "a") as f:
|
|
f.write("Modified content")
|
|
|
|
# Detect changes
|
|
changes = document_tracker.get_changed_files(docs_dir)
|
|
assert changes["new"] == []
|
|
assert changes["modified"] == [sample_docs[0]]
|
|
assert changes["deleted"] == []
|
|
|
|
|
|
def test_get_changed_files_deleted(document_tracker, docs_dir, sample_docs):
|
|
"""Test detecting deleted files"""
|
|
# First scan to establish tracking
|
|
document_tracker.get_changed_files(docs_dir)
|
|
|
|
# Delete a file
|
|
os.remove(sample_docs[0])
|
|
|
|
# Detect changes
|
|
changes = document_tracker.get_changed_files(docs_dir)
|
|
assert changes["new"] == []
|
|
assert changes["modified"] == []
|
|
assert changes["deleted"] == [sample_docs[0]]
|
|
|
|
|
|
def test_update_chunk_mappings(document_tracker, sample_docs):
|
|
"""Test updating chunk mappings"""
|
|
file_path = sample_docs[0]
|
|
chunk_ids = ["chunk1", "chunk2", "chunk3"]
|
|
|
|
# First make sure the file is tracked
|
|
document_tracker.doc_info[file_path] = DocMetaData(
|
|
123,
|
|
"abc",
|
|
"2023-01-01",
|
|
[],
|
|
)
|
|
|
|
# Update chunk mappings
|
|
document_tracker.update_chunk_mappings(file_path, chunk_ids)
|
|
assert document_tracker.doc_info[file_path].chunk_ids == chunk_ids
|
|
|
|
|
|
def test_get_chunks_to_delete(document_tracker):
|
|
"""Test getting chunks to delete for deleted files"""
|
|
# Setup tracking data
|
|
document_tracker.doc_info = {
|
|
"file1.txt": DocMetaData(0, "abc", 0, ["chunk1", "chunk2"]),
|
|
"file2.txt": DocMetaData(0, "abc", 0, ["chunk3", "chunk4"]),
|
|
"file3.txt": DocMetaData(0, "abc", 0, ["chunk5"]),
|
|
}
|
|
|
|
# Test with one deleted file
|
|
chunks = document_tracker.get_chunks_to_delete(["file1.txt"])
|
|
assert set(chunks) == {"chunk1", "chunk2"}
|
|
|
|
# Verify file was removed from tracking
|
|
assert "file1.txt" not in document_tracker.doc_info
|
|
|
|
# Test with multiple deleted files
|
|
chunks = document_tracker.get_chunks_to_delete(["file2.txt", "file3.txt"])
|
|
assert set(chunks) == {"chunk3", "chunk4", "chunk5"}
|
|
|
|
# Verify files were removed from tracking
|
|
assert "file2.txt" not in document_tracker.doc_info
|
|
assert "file3.txt" not in document_tracker.doc_info
|