🐛 When the knowledge base is automatically summarized, other interfaces will be pended.

YehongPan · YehongPan · commit 259c710dc7ec · 2025-11-21T13:42:24.000+08:00
[Specification Details] Add Test Case.
diff --git a/test/backend/services/test_vectordatabase_service.py b/test/backend/services/test_vectordatabase_service.py
@@ -1176,6 +1176,277 @@ async def run_test():
             # Basic functionality test - just verify the response is correct type
             # The detailed function calls are tested in their own unit tests
 
+    def test_summary_index_name_no_tenant_id(self):
+        """
+        Test summary_index_name raises exception when tenant_id is missing.
+        
+        This test verifies that:
+        1. An exception is raised when tenant_id is None
+        2. The exception message contains "Tenant ID is required"
+        """
+        # Execute and Assert
+        async def run_test():
+            with self.assertRaises(Exception) as context:
+                await self.es_service.summary_index_name(
+                    index_name="test_index",
+                    batch_size=1000,
+                    vdb_core=self.mock_vdb_core,
+                    language='en',
+                    model_id=1,
+                    tenant_id=None  # Missing tenant_id
+                )
+            self.assertIn("Tenant ID is required", str(context.exception))
+        
+        asyncio.run(run_test())
+
+    def test_summary_index_name_no_documents(self):
+        """
+        Test summary_index_name when no documents are found in index.
+        
+        This test verifies that:
+        1. An exception is raised when document_samples is empty
+        2. The exception message contains "No documents found in index"
+        """
+        # Mock the new Map-Reduce functions
+        with patch('utils.document_vector_utils.process_documents_for_clustering') as mock_process_docs, \
+             patch('utils.document_vector_utils.kmeans_cluster_documents') as mock_cluster, \
+             patch('utils.document_vector_utils.summarize_clusters_map_reduce') as mock_summarize, \
+             patch('utils.document_vector_utils.merge_cluster_summaries') as mock_merge:
+            
+            # Mock return empty document_samples
+            mock_process_docs.return_value = (
+                {},  # Empty document_samples
+                {}  # Empty doc_embeddings
+            )
+            
+            # Execute
+            async def run_test():
+                with self.assertRaises(Exception) as context:
+                    result = await self.es_service.summary_index_name(
+                        index_name="test_index",
+                        batch_size=1000,
+                        vdb_core=self.mock_vdb_core,
+                        language='en',
+                        model_id=1,
+                        tenant_id="test_tenant"
+                    )
+                    # Consume the stream to trigger execution
+                    generator = result.body_iterator
+                    async for item in generator:
+                        break
+                
+                self.assertIn("No documents found in index", str(context.exception))
+            
+            asyncio.run(run_test())
+
+    def test_summary_index_name_runtime_error_fallback(self):
+        """
+        Test summary_index_name fallback when get_running_loop raises RuntimeError.
+        
+        This test verifies that:
+        1. When get_running_loop() raises RuntimeError, get_event_loop() is used as fallback
+        2. The summary generation still works correctly
+        """
+        # Mock the new Map-Reduce functions
+        with patch('utils.document_vector_utils.process_documents_for_clustering') as mock_process_docs, \
+             patch('utils.document_vector_utils.kmeans_cluster_documents') as mock_cluster, \
+             patch('utils.document_vector_utils.summarize_clusters_map_reduce') as mock_summarize, \
+             patch('utils.document_vector_utils.merge_cluster_summaries') as mock_merge:
+            
+            # Mock return values
+            mock_process_docs.return_value = (
+                {"doc1": {"chunks": [{"content": "test content"}]}},  # document_samples
+                {"doc1": np.array([0.1, 0.2, 0.3])}  # doc_embeddings
+            )
+            mock_cluster.return_value = {"doc1": 0}  # clusters
+            mock_summarize.return_value = {0: "Test cluster summary"}  # cluster_summaries
+            mock_merge.return_value = "Final merged summary"  # final_summary
+            
+            # Create a mock loop with run_in_executor that returns a coroutine
+            mock_loop = MagicMock()
+            async def mock_run_in_executor(executor, func, *args):
+                # Execute the function synchronously and return its result
+                return func()
+            mock_loop.run_in_executor = mock_run_in_executor
+            
+            # Patch asyncio functions to trigger RuntimeError fallback
+            with patch('backend.services.vectordatabase_service.asyncio.get_running_loop', side_effect=RuntimeError("No running event loop")), \
+                 patch('backend.services.vectordatabase_service.asyncio.get_event_loop', return_value=mock_loop) as mock_get_event_loop:
+                
+                # Execute
+                async def run_test():
+                    result = await self.es_service.summary_index_name(
+                        index_name="test_index",
+                        batch_size=1000,
+                        vdb_core=self.mock_vdb_core,
+                        language='en',
+                        model_id=1,
+                        tenant_id="test_tenant"
+                    )
+                    
+                    # Consume part of the stream to trigger execution
+                    generator = result.body_iterator
+                    try:
+                        async for item in generator:
+                            break
+                    except StopAsyncIteration:
+                        pass
+                    
+                    return result
+                
+                result = asyncio.run(run_test())
+                
+                # Assert
+                self.assertIsInstance(result, StreamingResponse)
+                # Verify fallback was used
+                mock_get_event_loop.assert_called()
+
+    def test_summary_index_name_generator_exception(self):
+        """
+        Test summary_index_name handles exceptions in the generator function.
+        
+        This test verifies that:
+        1. Exceptions in the generator are caught and streamed as error messages
+        2. The error status is properly formatted
+        """
+        # Mock the new Map-Reduce functions
+        with patch('utils.document_vector_utils.process_documents_for_clustering') as mock_process_docs, \
+             patch('utils.document_vector_utils.kmeans_cluster_documents') as mock_cluster, \
+             patch('utils.document_vector_utils.summarize_clusters_map_reduce') as mock_summarize, \
+             patch('utils.document_vector_utils.merge_cluster_summaries') as mock_merge:
+            
+            # Mock return values
+            mock_process_docs.return_value = (
+                {"doc1": {"chunks": [{"content": "test content"}]}},  # document_samples
+                {"doc1": np.array([0.1, 0.2, 0.3])}  # doc_embeddings
+            )
+            mock_cluster.return_value = {"doc1": 0}  # clusters
+            mock_summarize.return_value = {0: "Test cluster summary"}  # cluster_summaries
+            mock_merge.return_value = "Final merged summary"  # final_summary
+            
+            # Execute
+            async def run_test():
+                result = await self.es_service.summary_index_name(
+                    index_name="test_index",
+                    batch_size=1000,
+                    vdb_core=self.mock_vdb_core,
+                    language='en',
+                    model_id=1,
+                    tenant_id="test_tenant"
+                )
+                
+                # Consume the stream completely
+                generator = result.body_iterator
+                items = []
+                try:
+                    async for item in generator:
+                        items.append(item)
+                except Exception:
+                    pass
+                
+                return result, items
+            
+            result, items = asyncio.run(run_test())
+            
+            # Assert
+            self.assertIsInstance(result, StreamingResponse)
+            # Verify that items were generated (at least the completed message)
+            self.assertGreater(len(items), 0)
+
+    def test_summary_index_name_sample_count_calculation(self):
+        """
+        Test summary_index_name correctly calculates sample_count from batch_size.
+        
+        This test verifies that:
+        1. sample_count is calculated as min(batch_size // 5, 200)
+        2. The sample_doc_count parameter is passed correctly to process_documents_for_clustering
+        """
+        # Test with batch_size=1000 -> sample_count should be min(200, 200) = 200
+        with patch('utils.document_vector_utils.process_documents_for_clustering') as mock_process_docs, \
+             patch('utils.document_vector_utils.kmeans_cluster_documents') as mock_cluster, \
+             patch('utils.document_vector_utils.summarize_clusters_map_reduce') as mock_summarize, \
+             patch('utils.document_vector_utils.merge_cluster_summaries') as mock_merge:
+            
+            # Mock return values
+            mock_process_docs.return_value = (
+                {"doc1": {"chunks": [{"content": "test content"}]}},  # document_samples
+                {"doc1": np.array([0.1, 0.2, 0.3])}  # doc_embeddings
+            )
+            mock_cluster.return_value = {"doc1": 0}  # clusters
+            mock_summarize.return_value = {0: "Test cluster summary"}  # cluster_summaries
+            mock_merge.return_value = "Final merged summary"  # final_summary
+            
+            # Execute with batch_size=1000
+            async def run_test():
+                result = await self.es_service.summary_index_name(
+                    index_name="test_index",
+                    batch_size=1000,
+                    vdb_core=self.mock_vdb_core,
+                    language='en',
+                    model_id=1,
+                    tenant_id="test_tenant"
+                )
+                
+                # Consume part of the stream to trigger execution
+                generator = result.body_iterator
+                try:
+                    async for item in generator:
+                        break
+                except StopAsyncIteration:
+                    pass
+                
+                return result
+            
+            asyncio.run(run_test())
+            
+            # Verify sample_doc_count was called with 200 (min(1000 // 5, 200) = 200)
+            self.assertTrue(mock_process_docs.called)
+            call_args = mock_process_docs.call_args
+            self.assertEqual(call_args.kwargs['sample_doc_count'], 200)
+        
+        # Test with batch_size=50 -> sample_count should be min(10, 200) = 10
+        with patch('utils.document_vector_utils.process_documents_for_clustering') as mock_process_docs, \
+             patch('utils.document_vector_utils.kmeans_cluster_documents') as mock_cluster, \
+             patch('utils.document_vector_utils.summarize_clusters_map_reduce') as mock_summarize, \
+             patch('utils.document_vector_utils.merge_cluster_summaries') as mock_merge:
+            
+            # Mock return values
+            mock_process_docs.return_value = (
+                {"doc1": {"chunks": [{"content": "test content"}]}},
+                {"doc1": np.array([0.1, 0.2, 0.3])}
+            )
+            mock_cluster.return_value = {"doc1": 0}
+            mock_summarize.return_value = {0: "Test cluster summary"}
+            mock_merge.return_value = "Final merged summary"
+            
+            # Execute with batch_size=50
+            async def run_test_small():
+                result = await self.es_service.summary_index_name(
+                    index_name="test_index",
+                    batch_size=50,
+                    vdb_core=self.mock_vdb_core,
+                    language='en',
+                    model_id=1,
+                    tenant_id="test_tenant"
+                )
+                
+                # Consume part of the stream to trigger execution
+                generator = result.body_iterator
+                try:
+                    async for item in generator:
+                        break
+                except StopAsyncIteration:
+                    pass
+                
+                return result
+            
+            asyncio.run(run_test_small())
+            
+            # Verify sample_doc_count was called with 10 (min(50 // 5, 200) = 10)
+            self.assertTrue(mock_process_docs.called)
+            call_args = mock_process_docs.call_args
+            self.assertEqual(call_args.kwargs['sample_doc_count'], 10)
+
     def test_get_random_documents(self):
         """
         Test retrieving random documents from an index.