@@ -149,7 +149,7 @@ def test_kmeans_cluster_documents_empty(self):
149149
150150class TestExtractRepresentativeChunksSmart :
151151 """Test smart chunk selection"""
152-
152+
153153 def test_extract_representative_chunks_smart_basic (self ):
154154 """Test basic smart chunk selection"""
155155 chunks = [
@@ -158,13 +158,13 @@ def test_extract_representative_chunks_smart_basic(self):
158158 {'content' : 'Third chunk content' },
159159 {'content' : 'Fourth chunk content' }
160160 ]
161-
161+
162162 result = extract_representative_chunks_smart (chunks , max_chunks = 3 )
163-
163+
164164 assert len (result ) <= 3
165165 assert result [0 ] == chunks [0 ] # First chunk always included
166166 assert result [- 1 ] == chunks [- 1 ] # Last chunk included
167-
167+
168168 def test_extract_representative_chunks_smart_import_error (self ):
169169 """Test fallback when calculate_term_weights import fails"""
170170 chunks = [
@@ -173,11 +173,11 @@ def test_extract_representative_chunks_smart_import_error(self):
173173 {'content' : 'Third chunk content' },
174174 {'content' : 'Fourth chunk content' }
175175 ]
176-
176+
177177 # Mock the import to fail
178178 with patch .dict ('sys.modules' , {'nexent.core.nlp.tokenizer' : None }):
179179 result = extract_representative_chunks_smart (chunks , max_chunks = 3 )
180-
180+
181181 # The fallback logic actually returns 3 chunks (first, middle, last)
182182 assert len (result ) == 3
183183 assert result [0 ] == chunks [0 ] # First chunk
@@ -186,7 +186,7 @@ def test_extract_representative_chunks_smart_import_error(self):
186186
187187class TestSummarizeDocument :
188188 """Test document summarization"""
189-
189+
190190 def test_summarize_document_no_model (self ):
191191 """Test document summarization without model"""
192192 result = summarize_document (
@@ -197,7 +197,7 @@ def test_summarize_document_no_model(self):
197197 )
198198 assert isinstance (result , str )
199199 assert "test.pdf" in result
200-
200+
201201 def test_summarize_document_with_model_placeholder (self ):
202202 """Test document summarization with model ID but no actual LLM call"""
203203 result = summarize_document (
@@ -212,7 +212,7 @@ def test_summarize_document_with_model_placeholder(self):
212212
213213class TestSummarizeCluster :
214214 """Test cluster summarization"""
215-
215+
216216 def test_summarize_cluster_no_model (self ):
217217 """Test cluster summarization without model"""
218218 result = summarize_cluster (
@@ -222,7 +222,7 @@ def test_summarize_cluster_no_model(self):
222222 )
223223 assert isinstance (result , str )
224224 assert "Summary" in result
225-
225+
226226 def test_summarize_cluster_with_model_placeholder (self ):
227227 """Test cluster summarization with model ID but no actual LLM call"""
228228 result = summarize_cluster (
@@ -236,7 +236,7 @@ def test_summarize_cluster_with_model_placeholder(self):
236236
237237class TestSummarizeClustersMapReduce :
238238 """Test map-reduce cluster summarization"""
239-
239+
240240 def test_summarize_clusters_map_reduce_basic (self ):
241241 """Test basic map-reduce summarization"""
242242 document_samples = {
@@ -252,24 +252,24 @@ def test_summarize_clusters_map_reduce_basic(self):
252252 }
253253 }
254254 clusters = {0 : ['doc1' , 'doc2' ]}
255-
255+
256256 with patch ('backend.utils.document_vector_utils.summarize_document' ) as mock_summarize_doc , \
257257 patch ('backend.utils.document_vector_utils.summarize_cluster' ) as mock_summarize_cluster :
258-
258+
259259 mock_summarize_doc .return_value = "Document summary"
260260 mock_summarize_cluster .return_value = "Cluster summary"
261-
261+
262262 result = summarize_clusters_map_reduce (
263263 document_samples = document_samples ,
264264 clusters = clusters ,
265265 model_id = 1 ,
266266 tenant_id = "test_tenant"
267267 )
268-
268+
269269 assert isinstance (result , dict )
270270 assert 0 in result
271271 assert result [0 ] == "Cluster summary"
272-
272+
273273 def test_summarize_clusters_map_reduce_no_valid_documents (self ):
274274 """Test map-reduce when no valid documents in cluster"""
275275 document_samples = {
@@ -279,38 +279,38 @@ def test_summarize_clusters_map_reduce_no_valid_documents(self):
279279 }
280280 }
281281 clusters = {0 : ['doc1' ]}
282-
282+
283283 with patch ('backend.utils.document_vector_utils.summarize_document' ) as mock_summarize_doc , \
284284 patch ('backend.utils.document_vector_utils.summarize_cluster' ) as mock_summarize_cluster :
285-
285+
286286 mock_summarize_doc .return_value = ""
287287 mock_summarize_cluster .return_value = "Mock cluster summary"
288-
288+
289289 result = summarize_clusters_map_reduce (
290290 document_samples = document_samples ,
291291 clusters = clusters ,
292292 model_id = 1 ,
293293 tenant_id = "test_tenant"
294294 )
295-
295+
296296 assert isinstance (result , dict )
297297 assert 0 in result
298298 assert result [0 ] == "Mock cluster summary"
299299
300300
301301class TestMergeClusterSummaries :
302302 """Test cluster summary merging"""
303-
303+
304304 def test_merge_cluster_summaries (self ):
305305 """Test merging multiple cluster summaries"""
306306 cluster_summaries = {
307307 0 : "First cluster summary" ,
308308 1 : "Second cluster summary" ,
309309 2 : "Third cluster summary"
310310 }
311-
311+
312312 result = merge_cluster_summaries (cluster_summaries )
313-
313+
314314 assert isinstance (result , str )
315315 assert "First cluster summary" in result
316316 assert "Second cluster summary" in result
@@ -320,7 +320,7 @@ def test_merge_cluster_summaries(self):
320320
321321class TestGetDocumentsFromEs :
322322 """Test ES document retrieval"""
323-
323+
324324 def test_get_documents_from_es_mock (self ):
325325 """Test ES document retrieval with mocked client"""
326326 mock_es_core = MagicMock ()
@@ -348,9 +348,9 @@ def test_get_documents_from_es_mock(self):
348348 }
349349 }
350350 }
351-
351+
352352 result = get_documents_from_es ('test_index' , mock_es_core , sample_doc_count = 10 )
353-
353+
354354 assert isinstance (result , dict )
355355 # The function returns a dict with document IDs as keys, not 'documents' key
356356 assert len (result ) > 0
@@ -361,7 +361,7 @@ def test_get_documents_from_es_mock(self):
361361
362362class TestProcessDocumentsForClustering :
363363 """Test document processing for clustering"""
364-
364+
365365 def test_process_documents_for_clustering_mock (self ):
366366 """Test document processing with mocked functions"""
367367 mock_es_core = MagicMock ()
@@ -389,22 +389,22 @@ def test_process_documents_for_clustering_mock(self):
389389 }
390390 }
391391 }
392-
392+
393393 with patch ('backend.utils.document_vector_utils.calculate_document_embedding' ) as mock_calc_embedding :
394394 mock_calc_embedding .return_value = np .array ([1.0 , 2.0 , 3.0 ])
395-
395+
396396 documents , embeddings = process_documents_for_clustering (
397397 'test_index' , mock_es_core , sample_doc_count = 10
398398 )
399-
399+
400400 assert isinstance (documents , dict )
401401 assert isinstance (embeddings , dict )
402402 assert len (documents ) == len (embeddings )
403403
404404
405405class TestExtractClusterContent :
406406 """Test cluster content extraction"""
407-
407+
408408 def test_extract_cluster_content (self ):
409409 """Test extracting content from cluster documents"""
410410 document_samples = {
@@ -418,9 +418,9 @@ def test_extract_cluster_content(self):
418418 }
419419 }
420420 doc_ids = ['doc1' , 'doc2' ]
421-
421+
422422 result = extract_cluster_content (document_samples , doc_ids )
423-
423+
424424 assert isinstance (result , str ) # The function returns a formatted string
425425 assert 'Content 1' in result
426426 assert 'Content 2' in result
@@ -430,7 +430,7 @@ def test_extract_cluster_content(self):
430430
431431class TestAnalyzeClusterCoherence :
432432 """Test cluster coherence analysis"""
433-
433+
434434 def test_analyze_cluster_coherence (self ):
435435 """Test cluster coherence analysis"""
436436 document_samples = {
@@ -444,9 +444,9 @@ def test_analyze_cluster_coherence(self):
444444 }
445445 }
446446 doc_ids = ['doc1' , 'doc2' ]
447-
447+
448448 result = analyze_cluster_coherence (doc_ids , document_samples )
449-
449+
450450 assert isinstance (result , dict )
451451 assert 'doc_count' in result
452452 assert result ['doc_count' ] == 2
0 commit comments