@@ -53,6 +53,74 @@ def test_generate_embedding_with_prompt_template(
5353 "SELECT llm_embed_generate(?) AS embedding" , (expected_content ,)
5454 )
5555
56+ def test_extract_document_title (self ):
57+ text = """# This is the Title
58+ This is the content of the document.
59+ It has multiple lines.
60+ """
61+
62+ engine = Engine (None , Settings (), None ) # type: ignore
63+
64+ title = engine .extract_document_title (text )
65+ assert title == "This is the Title"
66+
67+ @pytest .mark .parametrize (
68+ "fallback, expected_title" ,
69+ [
70+ (True , "This is the first line of the document without a title." ),
71+ (False , None ),
72+ ],
73+ )
74+ def test_extract_document_title_from_first_line (self , fallback , expected_title ):
75+ text = """
76+ This is the first line of the document without a title.
77+ It has multiple lines.
78+ """
79+
80+ engine = Engine (None , Settings (), None ) # type: ignore
81+
82+ title = engine .extract_document_title (text , fallback )
83+ assert title == expected_title
84+
85+ @pytest .mark .parametrize (
86+ "max_chunks_per_document, expected_chunk_count" ,
87+ [(0 , 2 ), (1 , 1 ), (4 , 2 )],
88+ )
89+ def test_process_with_max_chunks_per_document (
90+ self , mocker , max_chunks_per_document , expected_chunk_count
91+ ):
92+ # Arrange
93+ chunks = [
94+ Chunk (content = "Chunk 1" ),
95+ Chunk (content = "Chunk 2" ),
96+ Chunk (content = "Chunk 3" ),
97+ ]
98+
99+ mock_conn = mocker .Mock ()
100+ settings = Settings (max_chunks_per_document = max_chunks_per_document )
101+ mock_chunker = mocker .Mock ()
102+ mock_chunker .chunk .return_value = chunks
103+
104+ engine = Engine (mock_conn , settings , mock_chunker )
105+
106+ mock_generate_embedding = mocker .patch .object (engine , "generate_embedding" )
107+ mock_generate_embedding = mocker .spy (
108+ mock_generate_embedding , "generate_embedding"
109+ )
110+ mock_generate_embedding .return_value = chunks
111+
112+ document = Document (content = "Test document content" )
113+
114+ # Act
115+ engine .process (document )
116+
117+ # Assert
118+ for call_args in mock_generate_embedding .call_args_list :
119+ chunks = call_args [0 ][0 ] # First argument
120+ assert len (chunks ) == expected_chunk_count
121+
122+
123+ class TestEngineSearch :
56124 def test_search_with_empty_database (self , engine ):
57125 results = engine .search ("nonexistent query" , top_k = 5 )
58126
@@ -230,32 +298,3 @@ def test_search_exact_match(self, db_conn):
230298 assert len (results ) > 0
231299 assert doc1_id == results [0 ].document .id
232300 assert 0.0 == results [0 ].vec_distance
233-
234- def test_extract_document_title (self ):
235- text = """# This is the Title
236- This is the content of the document.
237- It has multiple lines.
238- """
239-
240- engine = Engine (None , Settings (), None ) # type: ignore
241-
242- title = engine .extract_document_title (text )
243- assert title == "This is the Title"
244-
245- @pytest .mark .parametrize (
246- "fallback, expected_title" ,
247- [
248- (True , "This is the first line of the document without a title." ),
249- (False , None ),
250- ],
251- )
252- def test_extract_document_title_from_first_line (self , fallback , expected_title ):
253- text = """
254- This is the first line of the document without a title.
255- It has multiple lines.
256- """
257-
258- engine = Engine (None , Settings (), None ) # type: ignore
259-
260- title = engine .extract_document_title (text , fallback )
261- assert title == expected_title
0 commit comments