Fix tests

This commit is contained in:
Alex Selimov 2025-03-21 12:48:51 -04:00
parent e0d0962fc5
commit 4da2e2ff81
10 changed files with 4 additions and 6 deletions

View File

@ -160,9 +160,7 @@ class RAG:
# 3. Process new and modified documents # 3. Process new and modified documents
files_to_process = changed_files["new"] + changed_files["modified"] files_to_process = changed_files["new"] + changed_files["modified"]
if files_to_process: if files_to_process:
chunks, _ = self.process_documents( chunks, _ = self.process_documents(files_to_process, text_splitter)
files_to_process, text_splitter, self.tracker
)
print(f"Adding {len(chunks)} new chunks to the vector store") print(f"Adding {len(chunks)} new chunks to the vector store")
vectorstore.add_documents(chunks) vectorstore.add_documents(chunks)
else: else:

View File

@ -23,6 +23,7 @@ def test_process_documents(tracker_file, docs_dir, db_dir, sample_docs, rag_pipe
files = [ files = [
os.path.join(rag_pipeline.docs_dir, "doc1.txt"), os.path.join(rag_pipeline.docs_dir, "doc1.txt"),
os.path.join(rag_pipeline.docs_dir, "doc2.txt"), os.path.join(rag_pipeline.docs_dir, "doc2.txt"),
os.path.join(rag_pipeline.docs_dir, "doc3.txt"),
] ]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
@ -30,13 +31,12 @@ def test_process_documents(tracker_file, docs_dir, db_dir, sample_docs, rag_pipe
chunks, file_chunk_map = rag_pipeline.process_documents(files, text_splitter) chunks, file_chunk_map = rag_pipeline.process_documents(files, text_splitter)
# Verify chunks were created # Verify chunks were created
assert len(chunks) >= 2 # At least one chunk per document assert len(chunks) >= 3 # At least one chunk per document
tracker = rag_pipeline.tracker tracker = rag_pipeline.tracker
# Verify chunk IDs were tracked # Verify chunk IDs were tracked
for file_path in files: for file_path in files:
assert file_path in tracker.doc_info assert file_path in tracker.doc_info
assert "chunk_ids" in tracker.doc_info[file_path] assert len(tracker.doc_info[file_path].chunk_ids) > 0
assert len(tracker.doc_info[file_path]["chunk_ids"]) > 0
# Verify metadata in chunks # Verify metadata in chunks
for chunk in chunks: for chunk in chunks: