Fix code formatting with ruff format

claude[bot] · abrookins · claude[bot] · commit 7f73e545a46e · 2025-09-04T16:06:03.000Z
Applied ruff formatting to 6 test files to resolve linting issues.
All linting checks now pass.

Co-authored-by: Andrew Brookins &lt;abrookins@users.noreply.github.com&gt;
diff --git a/tests/integration/test_vectorstore_factory_integration.py b/tests/integration/test_vectorstore_factory_integration.py
@@ -115,7 +115,9 @@ def test_create_embeddings_unsupported_provider(self, mock_settings):
 
         # Create a mock model config with unsupported provider
         mock_config = Mock()
-        mock_config.provider = "unsupported"  # Set directly as string, bypassing enum validation
+        mock_config.provider = (
+            "unsupported"  # Set directly as string, bypassing enum validation
+        )
         mock_settings.embedding_model_config = mock_config
 
         with pytest.raises(ValueError, match="Unsupported embedding provider"):
diff --git a/tests/test_contextual_grounding_integration.py b/tests/test_contextual_grounding_integration.py
@@ -449,9 +449,9 @@ async def test_comprehensive_grounding_evaluation_with_judge(self):
 
             # CI Stability: Accept any valid score (>= 0.0) while grounding system is being improved
             # This allows us to track grounding quality without blocking CI on implementation details
-            assert (
-                result.overall_score >= 0.0
-            ), f"Invalid score for {example['category']}: {result.overall_score}"
+            assert result.overall_score >= 0.0, (
+                f"Invalid score for {example['category']}: {result.overall_score}"
+            )
 
             # Log performance for monitoring
             if result.overall_score < 0.05:
@@ -530,6 +530,6 @@ async def test_model_comparison_grounding_quality(self):
             print(f"{model}: {status}")
 
         # At least one model should succeed
-        assert any(
-            r["success"] for r in results_by_model.values()
-        ), "No model successfully completed grounding"
+        assert any(r["success"] for r in results_by_model.values()), (
+            "No model successfully completed grounding"
+        )
diff --git a/tests/test_full_integration.py b/tests/test_full_integration.py
@@ -772,9 +772,9 @@ async def test_memory_prompt_with_long_term_search(
             )
             for msg in messages
         )
-        assert (
-            relevant_context_found
-        ), f"No relevant memory context found in messages: {messages}"
+        assert relevant_context_found, (
+            f"No relevant memory context found in messages: {messages}"
+        )
 
         # Cleanup
         await client.delete_long_term_memories([m.id for m in test_memories])
@@ -1078,9 +1078,9 @@ async def test_full_workflow_integration(
             )
             print(f"No topic filter search results: {no_topic_search}")
 
-        assert (
-            len(search_results["memories"]) > 0
-        ), f"No memories found in search results: {search_results}"
+        assert len(search_results["memories"]) > 0, (
+            f"No memories found in search results: {search_results}"
+        )
 
         # 6. Test tool integration with a realistic scenario
         tool_call = {
@@ -1125,9 +1125,9 @@ async def test_full_workflow_integration(
             m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix)
         ]
 
-        assert (
-            len(our_memories) == 0
-        ), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
+        assert len(our_memories) == 0, (
+            f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
+        )
 
 
 @pytest.mark.integration
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
@@ -455,9 +455,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup)
             namespace="user_preferences",
         )
 
-        assert (
-            lenient_memory.discrete_memory_extracted == "t"
-        ), f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'"
+        assert lenient_memory.discrete_memory_extracted == "t", (
+            f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'"
+        )
         assert lenient_memory.memory_type.value == "semantic"
         assert lenient_memory.id is not None
 
@@ -466,9 +466,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup)
             id="test_001", text="User prefers coffee", memory_type="semantic"
         )
 
-        assert (
-            extracted_memory.discrete_memory_extracted == "t"
-        ), f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'"
+        assert extracted_memory.discrete_memory_extracted == "t", (
+            f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'"
+        )
         assert extracted_memory.memory_type.value == "semantic"
 
     @pytest.mark.asyncio
diff --git a/tests/test_thread_aware_grounding.py b/tests/test_thread_aware_grounding.py
@@ -184,9 +184,9 @@ async def test_debounce_mechanism(self, redis_url):
 
         # Immediate second call should be debounced
         should_extract_2 = await should_extract_session_thread(session_id, redis)
-        assert (
-            should_extract_2 is False
-        ), "Second extraction attempt should be debounced"
+        assert should_extract_2 is False, (
+            "Second extraction attempt should be debounced"
+        )
 
         # Clean up
         debounce_key = f"extraction_debounce:{session_id}"
@@ -301,9 +301,9 @@ async def test_multi_entity_conversation(self):
 
         # The main success criterion: significantly reduced pronoun usage
         # Since we have proper contextual grounding, we should see very few unresolved pronouns
-        assert (
-            pronoun_count <= 3
-        ), f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}"
+        assert pronoun_count <= 3, (
+            f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}"
+        )
 
         # Additional validation: if we see multiple memories, it's a good sign of thorough extraction
         if len(extracted_memories) >= 2:
diff --git a/tests/test_tool_contextual_grounding.py b/tests/test_tool_contextual_grounding.py
@@ -67,9 +67,9 @@ def test_tool_description_has_grounding_instructions(self):
         ]
 
         for keyword in grounding_keywords:
-            assert (
-                keyword in tool_description
-            ), f"Tool description missing keyword: {keyword}"
+            assert keyword in tool_description, (
+                f"Tool description missing keyword: {keyword}"
+            )
             print(f"✓ Found: {keyword}")
 
         print(
@@ -107,9 +107,9 @@ async def test_judge_evaluation_of_tool_created_memories(self):
         print(f"Scores: {evaluation}")
 
         # Well-grounded tool memory should score well
-        assert (
-            evaluation["overall_score"] >= 0.7
-        ), f"Well-grounded tool memory should score high: {evaluation['overall_score']}"
+        assert evaluation["overall_score"] >= 0.7, (
+            f"Well-grounded tool memory should score high: {evaluation['overall_score']}"
+        )
 
         # Test case: Poorly grounded tool memory
         poor_grounded_memory = "He has extensive backend experience. She specializes in React. They collaborate effectively."
@@ -133,9 +133,9 @@ async def test_judge_evaluation_of_tool_created_memories(self):
 
         # Both should at least be evaluated successfully
         assert evaluation["overall_score"] >= 0.7, "Good grounding should score well"
-        assert (
-            poor_evaluation["overall_score"] >= 0.0
-        ), "Poor grounding should still be evaluated"
+        assert poor_evaluation["overall_score"] >= 0.0, (
+            "Poor grounding should still be evaluated"
+        )
 
     @pytest.mark.requires_api_keys
     async def test_realistic_tool_usage_scenario(self):
@@ -194,12 +194,12 @@ async def test_realistic_tool_usage_scenario(self):
         print(f"Evaluation: {evaluation}")
 
         # Should demonstrate good contextual grounding
-        assert (
-            evaluation["pronoun_resolution_score"] >= 0.8
-        ), "Should properly ground 'she' to 'Maria'"
-        assert (
-            evaluation["overall_score"] >= 0.6
-        ), f"Realistic tool usage should show good grounding: {evaluation['overall_score']}"
+        assert evaluation["pronoun_resolution_score"] >= 0.8, (
+            "Should properly ground 'she' to 'Maria'"
+        )
+        assert evaluation["overall_score"] >= 0.6, (
+            f"Realistic tool usage should show good grounding: {evaluation['overall_score']}"
+        )
 
         print(
             "✓ Tool-based memory creation with proper contextual grounding successful"