Skip to content

Commit 196d3ef

Browse files
committed
tests: ingestion service
1 parent e4138d2 commit 196d3ef

File tree

1 file changed

+307
-0
lines changed

1 file changed

+307
-0
lines changed
Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,310 @@
11
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
22
#
33
# SPDX-License-Identifier: Apache-2.0
4+
5+
from pathlib import Path
6+
from unittest.mock import Mock, patch
7+
8+
import pytest
9+
10+
from autogen.agents.experimental.document_agent.agents.ingestion_service import DocumentIngestionService
11+
from autogen.agents.experimental.document_agent.core.base_interfaces import RAGQueryEngine
12+
from autogen.agents.experimental.document_agent.core.config import DocAgentConfig, ProcessingConfig
13+
from autogen.agents.experimental.document_agent.ingestion.document_processor import DoclingDocumentProcessor
14+
15+
16+
class TestDocumentIngestionService:
17+
"""Test cases for DocumentIngestionService."""
18+
19+
@pytest.fixture
20+
def mock_query_engine(self) -> Mock:
21+
"""Create a mock RAG query engine."""
22+
mock_engine = Mock(spec=RAGQueryEngine)
23+
# Explicitly mock the add_docs method to ensure it's a Mock object
24+
mock_engine.add_docs = Mock()
25+
return mock_engine
26+
27+
@pytest.fixture
28+
def mock_config(self) -> Mock:
29+
"""Create a mock DocAgentConfig."""
30+
config = Mock(spec=DocAgentConfig)
31+
# Create a mock ProcessingConfig
32+
processing_config = Mock(spec=ProcessingConfig)
33+
processing_config.output_dir = Path("/tmp/output")
34+
processing_config.chunk_size = 1000
35+
processing_config.supported_formats = ["txt", "pdf", "docx"]
36+
37+
# Set the processing attribute
38+
config.processing = processing_config
39+
return config
40+
41+
@pytest.fixture
42+
def mock_document_processor(self) -> Mock:
43+
"""Create a mock DoclingDocumentProcessor."""
44+
return Mock(spec=DoclingDocumentProcessor)
45+
46+
@pytest.fixture
47+
def service(self, mock_query_engine: Mock, mock_config: Mock) -> DocumentIngestionService:
48+
"""Create a DocumentIngestionService instance for testing."""
49+
with patch(
50+
"autogen.agents.experimental.document_agent.agents.ingestion_service.DoclingDocumentProcessor"
51+
) as mock_processor_class:
52+
mock_processor_class.return_value = Mock(spec=DoclingDocumentProcessor)
53+
return DocumentIngestionService(mock_query_engine, mock_config)
54+
55+
def test_init_with_query_engine_and_config(self, mock_query_engine: Mock, mock_config: Mock) -> None:
56+
"""Test initialization with both query engine and config."""
57+
with patch(
58+
"autogen.agents.experimental.document_agent.agents.ingestion_service.DoclingDocumentProcessor"
59+
) as mock_processor_class:
60+
mock_processor_class.return_value = Mock(spec=DoclingDocumentProcessor)
61+
62+
service = DocumentIngestionService(mock_query_engine, mock_config)
63+
64+
assert service.query_engine == mock_query_engine
65+
assert service.config == mock_config
66+
mock_processor_class.assert_called_once_with(
67+
output_dir=mock_config.processing.output_dir, chunk_size=mock_config.processing.chunk_size
68+
)
69+
70+
def test_init_with_default_config(self, mock_query_engine: Mock) -> None:
71+
"""Test initialization with default config."""
72+
with (
73+
patch(
74+
"autogen.agents.experimental.document_agent.agents.ingestion_service.DocAgentConfig"
75+
) as mock_config_class,
76+
patch(
77+
"autogen.agents.experimental.document_agent.agents.ingestion_service.DoclingDocumentProcessor"
78+
) as mock_processor_class,
79+
):
80+
# Create a proper mock config with nested structure
81+
mock_config_instance = Mock(spec=DocAgentConfig)
82+
mock_processing_config = Mock(spec=ProcessingConfig)
83+
mock_processing_config.output_dir = Path("./parsed_docs")
84+
mock_processing_config.chunk_size = 512
85+
mock_config_instance.processing = mock_processing_config
86+
87+
mock_config_class.return_value = mock_config_instance
88+
mock_processor_class.return_value = Mock(spec=DoclingDocumentProcessor)
89+
90+
service = DocumentIngestionService(mock_query_engine)
91+
92+
assert service.query_engine == mock_query_engine
93+
assert service.config == mock_config_instance
94+
mock_config_class.assert_called_once()
95+
96+
def test_ingest_document_success(self, service: DocumentIngestionService, mock_document_processor: Mock) -> None:
97+
"""Test successful document ingestion."""
98+
# Setup
99+
document_path = "/path/to/document.pdf"
100+
processed_files: list[Path] = [Path("/tmp/output/doc1.txt"), Path("/tmp/output/doc2.txt")]
101+
102+
service.document_processor = mock_document_processor
103+
mock_document_processor.process_document.return_value = processed_files
104+
105+
# Execute
106+
result = service.ingest_document(document_path)
107+
108+
# Assert
109+
mock_document_processor.process_document.assert_called_once_with(
110+
document_path, service.config.processing.output_dir
111+
)
112+
113+
assert "Successfully ingested 2 document(s)" in result
114+
assert "doc1.txt" in result
115+
assert "doc2.txt" in result
116+
117+
def test_ingest_document_no_processed_files(
118+
self, service: DocumentIngestionService, mock_document_processor: Mock
119+
) -> None:
120+
"""Test document ingestion when no files are processed."""
121+
# Setup
122+
document_path = "/path/to/document.pdf"
123+
mock_document_processor.process_document.return_value = []
124+
service.document_processor = mock_document_processor
125+
126+
# Execute
127+
result = service.ingest_document(document_path)
128+
129+
# Assert
130+
assert result == "No documents were processed."
131+
132+
def test_ingest_document_exception(self, service: DocumentIngestionService, mock_document_processor: Mock) -> None:
133+
"""Test document ingestion when an exception occurs."""
134+
# Setup
135+
document_path = "/path/to/document.pdf"
136+
mock_document_processor.process_document.side_effect = Exception("Processing failed")
137+
service.document_processor = mock_document_processor
138+
139+
# Execute
140+
result = service.ingest_document(document_path)
141+
142+
# Assert
143+
assert result == "Error ingesting document: Processing failed"
144+
145+
def test_ingest_documents_multiple_success(self, service: DocumentIngestionService) -> None:
146+
"""Test successful ingestion of multiple documents."""
147+
# Setup
148+
document_paths: list[str] = ["/path/to/doc1.pdf", "/path/to/doc2.txt"]
149+
150+
with patch.object(service, "ingest_document") as mock_ingest:
151+
mock_ingest.side_effect = ["Success 1", "Success 2"]
152+
153+
# Execute
154+
results = service.ingest_documents(document_paths)
155+
156+
# Assert
157+
assert results == ["Success 1", "Success 2"]
158+
assert mock_ingest.call_count == 2
159+
mock_ingest.assert_any_call("/path/to/doc1.pdf")
160+
mock_ingest.assert_any_call("/path/to/doc2.txt")
161+
162+
def test_ingest_documents_empty_sequence(self, service: DocumentIngestionService) -> None:
163+
"""Test ingestion of empty document sequence."""
164+
# Setup
165+
document_paths: list[str] = []
166+
167+
# Execute
168+
results = service.ingest_documents(document_paths)
169+
170+
# Assert
171+
assert results == []
172+
173+
def test_ingest_directory_success(self, service: DocumentIngestionService) -> None:
174+
"""Test successful directory ingestion."""
175+
# Setup
176+
directory_path = "/path/to/documents"
177+
mock_directory = Mock(spec=Path)
178+
mock_directory.exists.return_value = True
179+
mock_directory.is_dir.return_value = True
180+
181+
# Mock finding files - need to properly mock the glob method for each extension
182+
# The method calls glob for each supported format (txt, pdf, docx)
183+
mock_directory.glob.side_effect = [
184+
[Path("doc1.txt")], # First call for *.txt
185+
[Path("doc2.pdf")], # Second call for *.pdf
186+
[Path("doc3.docx")], # Third call for *.docx
187+
[], # Fourth call for *.TXT (uppercase)
188+
[], # Fifth call for *.PDF (uppercase)
189+
[], # Sixth call for *.DOCX (uppercase)
190+
]
191+
192+
with (
193+
patch(
194+
"autogen.agents.experimental.document_agent.agents.ingestion_service.Path", return_value=mock_directory
195+
),
196+
patch.object(service, "ingest_documents") as mock_ingest_docs,
197+
):
198+
mock_ingest_docs.return_value = [
199+
"Successfully ingested 1 document(s): ['doc1.txt']",
200+
"Successfully ingested 1 document(s): ['doc2.pdf']",
201+
"Successfully ingested 1 document(s): ['doc3.docx']",
202+
]
203+
204+
# Execute
205+
result = service.ingest_directory(directory_path)
206+
207+
# Assert
208+
assert "Ingestion complete: 3 successful, 0 failed" in result
209+
mock_ingest_docs.assert_called_once()
210+
211+
def test_ingest_directory_not_found(self, service: DocumentIngestionService) -> None:
212+
"""Test directory ingestion when directory doesn't exist."""
213+
# Setup
214+
directory_path = "/nonexistent/path"
215+
mock_directory = Mock(spec=Path)
216+
mock_directory.exists.return_value = False
217+
218+
with patch(
219+
"autogen.agents.experimental.document_agent.agents.ingestion_service.Path", return_value=mock_directory
220+
):
221+
# Execute
222+
result = service.ingest_directory(directory_path)
223+
224+
# Assert
225+
assert result == f"Directory not found: {directory_path}"
226+
227+
def test_ingest_directory_no_supported_files(self, service: DocumentIngestionService) -> None:
228+
"""Test directory ingestion when no supported files are found."""
229+
# Setup
230+
directory_path = "/path/to/documents"
231+
mock_directory = Mock(spec=Path)
232+
mock_directory.exists.return_value = True
233+
mock_directory.is_dir.return_value = True
234+
235+
# Mock finding no files
236+
mock_directory.glob.return_value = []
237+
238+
with patch(
239+
"autogen.agents.experimental.document_agent.agents.ingestion_service.Path", return_value=mock_directory
240+
):
241+
# Execute
242+
result = service.ingest_directory(directory_path)
243+
244+
# Assert
245+
assert result == f"No supported documents found in directory: {directory_path}"
246+
247+
def test_ingest_directory_exception(self, service: DocumentIngestionService) -> None:
248+
"""Test directory ingestion when an exception occurs."""
249+
# Setup
250+
directory_path = "/path/to/documents"
251+
252+
with patch(
253+
"autogen.agents.experimental.document_agent.agents.ingestion_service.Path",
254+
side_effect=Exception("Path error"),
255+
):
256+
# Execute
257+
result = service.ingest_directory(directory_path)
258+
259+
# Assert
260+
assert result == "Error ingesting directory: Path error"
261+
262+
def test_get_ingestion_status(self, service: DocumentIngestionService) -> None:
263+
"""Test getting ingestion service status."""
264+
# Execute
265+
status = service.get_ingestion_status()
266+
267+
# Assert
268+
assert status["query_engine_configured"] is True
269+
assert status["output_directory"] == str(service.config.processing.output_dir)
270+
assert status["chunk_size"] == service.config.processing.chunk_size
271+
assert status["supported_formats"] == service.config.processing.supported_formats
272+
273+
def test_get_ingestion_status_no_query_engine(self, mock_config: Mock) -> None:
274+
"""Test getting status when no query engine is configured."""
275+
# Setup - need to patch the DoclingDocumentProcessor import to avoid dependency issues
276+
with patch(
277+
"autogen.agents.experimental.document_agent.agents.ingestion_service.DoclingDocumentProcessor"
278+
) as mock_processor_class:
279+
mock_processor_class.return_value = Mock(spec=DoclingDocumentProcessor)
280+
281+
service = DocumentIngestionService(None, mock_config) # type: ignore[arg-type]
282+
283+
# Execute
284+
status = service.get_ingestion_status()
285+
286+
# Assert
287+
assert status["query_engine_configured"] is False
288+
289+
def test_set_query_engine(self, service: DocumentIngestionService, mock_query_engine: Mock) -> None:
290+
"""Test setting a new query engine."""
291+
# Setup
292+
new_query_engine = Mock(spec=RAGQueryEngine)
293+
294+
# Execute
295+
service.set_query_engine(new_query_engine)
296+
297+
# Assert
298+
assert service.query_engine == new_query_engine
299+
300+
def test_set_query_engine_logs_info(self, service: DocumentIngestionService, mock_query_engine: Mock) -> None:
301+
"""Test that setting query engine logs an info message."""
302+
# Setup
303+
new_query_engine = Mock(spec=RAGQueryEngine)
304+
305+
with patch("autogen.agents.experimental.document_agent.agents.ingestion_service.logger") as mock_logger:
306+
# Execute
307+
service.set_query_engine(new_query_engine)
308+
309+
# Assert
310+
mock_logger.info.assert_called_once_with("Query engine updated for ingestion service")

0 commit comments

Comments
 (0)