@@ -1176,6 +1176,277 @@ async def run_test():
11761176 # Basic functionality test - just verify the response is correct type
11771177 # The detailed function calls are tested in their own unit tests
11781178
1179+ def test_summary_index_name_no_tenant_id (self ):
1180+ """
1181+ Test summary_index_name raises exception when tenant_id is missing.
1182+
1183+ This test verifies that:
1184+ 1. An exception is raised when tenant_id is None
1185+ 2. The exception message contains "Tenant ID is required"
1186+ """
1187+ # Execute and Assert
1188+ async def run_test ():
1189+ with self .assertRaises (Exception ) as context :
1190+ await self .es_service .summary_index_name (
1191+ index_name = "test_index" ,
1192+ batch_size = 1000 ,
1193+ vdb_core = self .mock_vdb_core ,
1194+ language = 'en' ,
1195+ model_id = 1 ,
1196+ tenant_id = None # Missing tenant_id
1197+ )
1198+ self .assertIn ("Tenant ID is required" , str (context .exception ))
1199+
1200+ asyncio .run (run_test ())
1201+
1202+ def test_summary_index_name_no_documents (self ):
1203+ """
1204+ Test summary_index_name when no documents are found in index.
1205+
1206+ This test verifies that:
1207+ 1. An exception is raised when document_samples is empty
1208+ 2. The exception message contains "No documents found in index"
1209+ """
1210+ # Mock the new Map-Reduce functions
1211+ with patch ('utils.document_vector_utils.process_documents_for_clustering' ) as mock_process_docs , \
1212+ patch ('utils.document_vector_utils.kmeans_cluster_documents' ) as mock_cluster , \
1213+ patch ('utils.document_vector_utils.summarize_clusters_map_reduce' ) as mock_summarize , \
1214+ patch ('utils.document_vector_utils.merge_cluster_summaries' ) as mock_merge :
1215+
1216+ # Mock return empty document_samples
1217+ mock_process_docs .return_value = (
1218+ {}, # Empty document_samples
1219+ {} # Empty doc_embeddings
1220+ )
1221+
1222+ # Execute
1223+ async def run_test ():
1224+ with self .assertRaises (Exception ) as context :
1225+ result = await self .es_service .summary_index_name (
1226+ index_name = "test_index" ,
1227+ batch_size = 1000 ,
1228+ vdb_core = self .mock_vdb_core ,
1229+ language = 'en' ,
1230+ model_id = 1 ,
1231+ tenant_id = "test_tenant"
1232+ )
1233+ # Consume the stream to trigger execution
1234+ generator = result .body_iterator
1235+ async for item in generator :
1236+ break
1237+
1238+ self .assertIn ("No documents found in index" , str (context .exception ))
1239+
1240+ asyncio .run (run_test ())
1241+
1242+ def test_summary_index_name_runtime_error_fallback (self ):
1243+ """
1244+ Test summary_index_name fallback when get_running_loop raises RuntimeError.
1245+
1246+ This test verifies that:
1247+ 1. When get_running_loop() raises RuntimeError, get_event_loop() is used as fallback
1248+ 2. The summary generation still works correctly
1249+ """
1250+ # Mock the new Map-Reduce functions
1251+ with patch ('utils.document_vector_utils.process_documents_for_clustering' ) as mock_process_docs , \
1252+ patch ('utils.document_vector_utils.kmeans_cluster_documents' ) as mock_cluster , \
1253+ patch ('utils.document_vector_utils.summarize_clusters_map_reduce' ) as mock_summarize , \
1254+ patch ('utils.document_vector_utils.merge_cluster_summaries' ) as mock_merge :
1255+
1256+ # Mock return values
1257+ mock_process_docs .return_value = (
1258+ {"doc1" : {"chunks" : [{"content" : "test content" }]}}, # document_samples
1259+ {"doc1" : np .array ([0.1 , 0.2 , 0.3 ])} # doc_embeddings
1260+ )
1261+ mock_cluster .return_value = {"doc1" : 0 } # clusters
1262+ mock_summarize .return_value = {0 : "Test cluster summary" } # cluster_summaries
1263+ mock_merge .return_value = "Final merged summary" # final_summary
1264+
1265+ # Create a mock loop with run_in_executor that returns a coroutine
1266+ mock_loop = MagicMock ()
1267+ async def mock_run_in_executor (executor , func , * args ):
1268+ # Execute the function synchronously and return its result
1269+ return func ()
1270+ mock_loop .run_in_executor = mock_run_in_executor
1271+
1272+ # Patch asyncio functions to trigger RuntimeError fallback
1273+ with patch ('backend.services.vectordatabase_service.asyncio.get_running_loop' , side_effect = RuntimeError ("No running event loop" )), \
1274+ patch ('backend.services.vectordatabase_service.asyncio.get_event_loop' , return_value = mock_loop ) as mock_get_event_loop :
1275+
1276+ # Execute
1277+ async def run_test ():
1278+ result = await self .es_service .summary_index_name (
1279+ index_name = "test_index" ,
1280+ batch_size = 1000 ,
1281+ vdb_core = self .mock_vdb_core ,
1282+ language = 'en' ,
1283+ model_id = 1 ,
1284+ tenant_id = "test_tenant"
1285+ )
1286+
1287+ # Consume part of the stream to trigger execution
1288+ generator = result .body_iterator
1289+ try :
1290+ async for item in generator :
1291+ break
1292+ except StopAsyncIteration :
1293+ pass
1294+
1295+ return result
1296+
1297+ result = asyncio .run (run_test ())
1298+
1299+ # Assert
1300+ self .assertIsInstance (result , StreamingResponse )
1301+ # Verify fallback was used
1302+ mock_get_event_loop .assert_called ()
1303+
1304+ def test_summary_index_name_generator_exception (self ):
1305+ """
1306+ Test summary_index_name handles exceptions in the generator function.
1307+
1308+ This test verifies that:
1309+ 1. Exceptions in the generator are caught and streamed as error messages
1310+ 2. The error status is properly formatted
1311+ """
1312+ # Mock the new Map-Reduce functions
1313+ with patch ('utils.document_vector_utils.process_documents_for_clustering' ) as mock_process_docs , \
1314+ patch ('utils.document_vector_utils.kmeans_cluster_documents' ) as mock_cluster , \
1315+ patch ('utils.document_vector_utils.summarize_clusters_map_reduce' ) as mock_summarize , \
1316+ patch ('utils.document_vector_utils.merge_cluster_summaries' ) as mock_merge :
1317+
1318+ # Mock return values
1319+ mock_process_docs .return_value = (
1320+ {"doc1" : {"chunks" : [{"content" : "test content" }]}}, # document_samples
1321+ {"doc1" : np .array ([0.1 , 0.2 , 0.3 ])} # doc_embeddings
1322+ )
1323+ mock_cluster .return_value = {"doc1" : 0 } # clusters
1324+ mock_summarize .return_value = {0 : "Test cluster summary" } # cluster_summaries
1325+ mock_merge .return_value = "Final merged summary" # final_summary
1326+
1327+ # Execute
1328+ async def run_test ():
1329+ result = await self .es_service .summary_index_name (
1330+ index_name = "test_index" ,
1331+ batch_size = 1000 ,
1332+ vdb_core = self .mock_vdb_core ,
1333+ language = 'en' ,
1334+ model_id = 1 ,
1335+ tenant_id = "test_tenant"
1336+ )
1337+
1338+ # Consume the stream completely
1339+ generator = result .body_iterator
1340+ items = []
1341+ try :
1342+ async for item in generator :
1343+ items .append (item )
1344+ except Exception :
1345+ pass
1346+
1347+ return result , items
1348+
1349+ result , items = asyncio .run (run_test ())
1350+
1351+ # Assert
1352+ self .assertIsInstance (result , StreamingResponse )
1353+ # Verify that items were generated (at least the completed message)
1354+ self .assertGreater (len (items ), 0 )
1355+
1356+ def test_summary_index_name_sample_count_calculation (self ):
1357+ """
1358+ Test summary_index_name correctly calculates sample_count from batch_size.
1359+
1360+ This test verifies that:
1361+ 1. sample_count is calculated as min(batch_size // 5, 200)
1362+ 2. The sample_doc_count parameter is passed correctly to process_documents_for_clustering
1363+ """
1364+ # Test with batch_size=1000 -> sample_count should be min(200, 200) = 200
1365+ with patch ('utils.document_vector_utils.process_documents_for_clustering' ) as mock_process_docs , \
1366+ patch ('utils.document_vector_utils.kmeans_cluster_documents' ) as mock_cluster , \
1367+ patch ('utils.document_vector_utils.summarize_clusters_map_reduce' ) as mock_summarize , \
1368+ patch ('utils.document_vector_utils.merge_cluster_summaries' ) as mock_merge :
1369+
1370+ # Mock return values
1371+ mock_process_docs .return_value = (
1372+ {"doc1" : {"chunks" : [{"content" : "test content" }]}}, # document_samples
1373+ {"doc1" : np .array ([0.1 , 0.2 , 0.3 ])} # doc_embeddings
1374+ )
1375+ mock_cluster .return_value = {"doc1" : 0 } # clusters
1376+ mock_summarize .return_value = {0 : "Test cluster summary" } # cluster_summaries
1377+ mock_merge .return_value = "Final merged summary" # final_summary
1378+
1379+ # Execute with batch_size=1000
1380+ async def run_test ():
1381+ result = await self .es_service .summary_index_name (
1382+ index_name = "test_index" ,
1383+ batch_size = 1000 ,
1384+ vdb_core = self .mock_vdb_core ,
1385+ language = 'en' ,
1386+ model_id = 1 ,
1387+ tenant_id = "test_tenant"
1388+ )
1389+
1390+ # Consume part of the stream to trigger execution
1391+ generator = result .body_iterator
1392+ try :
1393+ async for item in generator :
1394+ break
1395+ except StopAsyncIteration :
1396+ pass
1397+
1398+ return result
1399+
1400+ asyncio .run (run_test ())
1401+
1402+ # Verify sample_doc_count was called with 200 (min(1000 // 5, 200) = 200)
1403+ self .assertTrue (mock_process_docs .called )
1404+ call_args = mock_process_docs .call_args
1405+ self .assertEqual (call_args .kwargs ['sample_doc_count' ], 200 )
1406+
1407+ # Test with batch_size=50 -> sample_count should be min(10, 200) = 10
1408+ with patch ('utils.document_vector_utils.process_documents_for_clustering' ) as mock_process_docs , \
1409+ patch ('utils.document_vector_utils.kmeans_cluster_documents' ) as mock_cluster , \
1410+ patch ('utils.document_vector_utils.summarize_clusters_map_reduce' ) as mock_summarize , \
1411+ patch ('utils.document_vector_utils.merge_cluster_summaries' ) as mock_merge :
1412+
1413+ # Mock return values
1414+ mock_process_docs .return_value = (
1415+ {"doc1" : {"chunks" : [{"content" : "test content" }]}},
1416+ {"doc1" : np .array ([0.1 , 0.2 , 0.3 ])}
1417+ )
1418+ mock_cluster .return_value = {"doc1" : 0 }
1419+ mock_summarize .return_value = {0 : "Test cluster summary" }
1420+ mock_merge .return_value = "Final merged summary"
1421+
1422+ # Execute with batch_size=50
1423+ async def run_test_small ():
1424+ result = await self .es_service .summary_index_name (
1425+ index_name = "test_index" ,
1426+ batch_size = 50 ,
1427+ vdb_core = self .mock_vdb_core ,
1428+ language = 'en' ,
1429+ model_id = 1 ,
1430+ tenant_id = "test_tenant"
1431+ )
1432+
1433+ # Consume part of the stream to trigger execution
1434+ generator = result .body_iterator
1435+ try :
1436+ async for item in generator :
1437+ break
1438+ except StopAsyncIteration :
1439+ pass
1440+
1441+ return result
1442+
1443+ asyncio .run (run_test_small ())
1444+
1445+ # Verify sample_doc_count was called with 10 (min(50 // 5, 200) = 10)
1446+ self .assertTrue (mock_process_docs .called )
1447+ call_args = mock_process_docs .call_args
1448+ self .assertEqual (call_args .kwargs ['sample_doc_count' ], 10 )
1449+
11791450 def test_get_random_documents (self ):
11801451 """
11811452 Test retrieving random documents from an index.
0 commit comments