Fix tests

This commit is contained in:
Alex Selimov 2025-03-21 12:48:51 -04:00
parent e0d0962fc5
commit 4da2e2ff81
10 changed files with 4 additions and 6 deletions

View File

@ -160,9 +160,7 @@ class RAG:
# 3. Process new and modified documents
files_to_process = changed_files["new"] + changed_files["modified"]
if files_to_process:
chunks, _ = self.process_documents(
files_to_process, text_splitter, self.tracker
)
chunks, _ = self.process_documents(files_to_process, text_splitter)
print(f"Adding {len(chunks)} new chunks to the vector store")
vectorstore.add_documents(chunks)
else:

View File

@ -23,6 +23,7 @@ def test_process_documents(tracker_file, docs_dir, db_dir, sample_docs, rag_pipe
files = [
os.path.join(rag_pipeline.docs_dir, "doc1.txt"),
os.path.join(rag_pipeline.docs_dir, "doc2.txt"),
os.path.join(rag_pipeline.docs_dir, "doc3.txt"),
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
@ -30,13 +31,12 @@ def test_process_documents(tracker_file, docs_dir, db_dir, sample_docs, rag_pipe
chunks, file_chunk_map = rag_pipeline.process_documents(files, text_splitter)
# Verify chunks were created
assert len(chunks) >= 2 # At least one chunk per document
assert len(chunks) >= 3 # At least one chunk per document
tracker = rag_pipeline.tracker
# Verify chunk IDs were tracked
for file_path in files:
assert file_path in tracker.doc_info
assert "chunk_ids" in tracker.doc_info[file_path]
assert len(tracker.doc_info[file_path]["chunk_ids"]) > 0
assert len(tracker.doc_info[file_path].chunk_ids) > 0
# Verify metadata in chunks
for chunk in chunks: