Use rate limit handler on v2 invoke methods (#495)

jonnylaw · web-flow · commit 469ed2e3f774 · 2026-03-24T13:48:53.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## Next
 
+### Fixed
+
+- Fixed a bug where the rate limit handler was not being called on the `VertexAILLM` and `MistralAILLM` `__invoke_v2` and `__ainvoke_v2` methods.
+
 ### Added
 
 - `NodeType` and `RelationshipType` now reject labels and types that start or end with double underscores (`__`), e.g. `__Person__`. This convention is reserved for internal Neo4j GraphRAG labels. A `ValidationError` is raised on construction.
diff --git a/src/neo4j_graphrag/llm/mistralai_llm.py b/src/neo4j_graphrag/llm/mistralai_llm.py
@@ -200,6 +200,7 @@ def __invoke_v1(
         except SDKError as e:
             raise LLMGenerationError(e)
 
+    @rate_limit_handler_decorator
     def __invoke_v2(
         self,
         input: List[LLMMessage],
@@ -277,6 +278,7 @@ async def __ainvoke_v1(
         except SDKError as e:
             raise LLMGenerationError(e)
 
+    @async_rate_limit_handler_decorator
     async def __ainvoke_v2(
         self,
         input: List[LLMMessage],
diff --git a/src/neo4j_graphrag/llm/vertexai_llm.py b/src/neo4j_graphrag/llm/vertexai_llm.py
@@ -286,6 +286,7 @@ def __invoke_v1(
         except ResponseValidationError as e:
             raise LLMGenerationError("Error calling VertexAILLM") from e
 
+    @rate_limit_handler_decorator
     def __invoke_v2(
         self,
         input: List[LLMMessage],
@@ -348,6 +349,7 @@ async def __ainvoke_v1(
         except ResponseValidationError as e:
             raise LLMGenerationError("Error calling VertexAILLM") from e
 
+    @async_rate_limit_handler_decorator
     async def __ainvoke_v2(
         self,
         input: list[LLMMessage],
diff --git a/tests/unit/llm/test_mistralai_llm.py b/tests/unit/llm/test_mistralai_llm.py
@@ -13,14 +13,15 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 from typing import Any, Optional
-from unittest.mock import MagicMock, Mock, patch
+from unittest.mock import AsyncMock, MagicMock, Mock, patch
 from typing import List
 
 import httpx
 import pytest
 from neo4j_graphrag.exceptions import LLMGenerationError
 from neo4j_graphrag.llm import LLMResponse, MistralAILLM
 from neo4j_graphrag.types import LLMMessage
+from neo4j_graphrag.utils.rate_limit import NoOpRateLimitHandler
 from pydantic import BaseModel, ConfigDict
 
 
@@ -439,3 +440,46 @@ class TestModel(BaseModel):
     assert "MistralAILLM does not currently support structured output" in str(
         exc_info.value
     )
+
+
+@patch("neo4j_graphrag.llm.mistralai_llm.SDKError", MockSDKError)
+@patch("neo4j_graphrag.llm.mistralai_llm.Mistral")
+def test_mistralai_invoke_v2_rate_limit_handler_called(
+    mock_mistral: Mock,
+) -> None:
+    """Test that the rate limit handler is invoked on the V2 (List[LLMMessage]) path."""
+    messages: List[LLMMessage] = [{"role": "user", "content": "Hello"}]
+    mock_mistral_instance = mock_mistral.return_value
+    chat_response_mock = MagicMock()
+    chat_response_mock.choices = [MagicMock(message=MagicMock(content="Hi there!"))]
+    mock_mistral_instance.chat.complete.return_value = chat_response_mock
+
+    spy_handler = MagicMock(wraps=NoOpRateLimitHandler())
+    llm = MistralAILLM(model_name="mistral-model", rate_limit_handler=spy_handler)
+    response = llm.invoke(messages)
+
+    assert response.content == "Hi there!"
+    spy_handler.handle_sync.assert_called_once()
+
+
+@pytest.mark.asyncio
+@patch("neo4j_graphrag.llm.mistralai_llm.SDKError", MockSDKError)
+@patch("neo4j_graphrag.llm.mistralai_llm.Mistral")
+async def test_mistralai_ainvoke_v2_rate_limit_handler_called(
+    mock_mistral: Mock,
+) -> None:
+    """Test that the rate limit handler is invoked on the async V2 (List[LLMMessage]) path."""
+    messages: List[LLMMessage] = [{"role": "user", "content": "Hello"}]
+    mock_mistral_instance = mock_mistral.return_value
+    chat_response_mock = MagicMock()
+    chat_response_mock.choices = [MagicMock(message=MagicMock(content="Hi there!"))]
+    mock_mistral_instance.chat.complete_async = AsyncMock(
+        return_value=chat_response_mock
+    )
+
+    spy_handler = MagicMock(wraps=NoOpRateLimitHandler())
+    llm = MistralAILLM(model_name="mistral-model", rate_limit_handler=spy_handler)
+    response = await llm.ainvoke(messages)
+
+    assert response.content == "Hi there!"
+    spy_handler.handle_async.assert_called_once()
diff --git a/tests/unit/llm/test_vertexai_llm.py b/tests/unit/llm/test_vertexai_llm.py
@@ -29,6 +29,7 @@
 from neo4j_graphrag.llm.vertexai_llm import VertexAILLM
 from neo4j_graphrag.tool import Tool
 from neo4j_graphrag.types import LLMMessage
+from neo4j_graphrag.utils.rate_limit import NoOpRateLimitHandler
 
 from pydantic import BaseModel, ConfigDict
 
@@ -598,3 +599,42 @@ async def test_vertexai_ainvoke_v2_with_json_schema_response_format(
     # Verify generation_config has response_schema
     call_args = mock_model.generate_content_async.call_args.kwargs
     assert "generation_config" in call_args
+
+
+@patch("neo4j_graphrag.llm.vertexai_llm.GenerativeModel")
+def test_vertexai_invoke_v2_rate_limit_handler_called(
+    GenerativeModelMock: MagicMock,
+) -> None:
+    """Test that the rate limit handler is invoked on the V2 (List[LLMMessage]) path."""
+    messages: List[LLMMessage] = [{"role": "user", "content": "Hello"}]
+    mock_response = Mock()
+    mock_response.text = "Hi there!"
+    mock_model = GenerativeModelMock.return_value
+    mock_model.generate_content.return_value = mock_response
+
+    spy_handler = MagicMock(wraps=NoOpRateLimitHandler())
+    llm = VertexAILLM(model_name="gemini-1.5-flash-001", rate_limit_handler=spy_handler)
+    response = llm.invoke(messages)
+
+    assert response.content == "Hi there!"
+    spy_handler.handle_sync.assert_called_once()
+
+
+@pytest.mark.asyncio
+@patch("neo4j_graphrag.llm.vertexai_llm.GenerativeModel")
+async def test_vertexai_ainvoke_v2_rate_limit_handler_called(
+    GenerativeModelMock: MagicMock,
+) -> None:
+    """Test that the rate limit handler is invoked on the async V2 (List[LLMMessage]) path."""
+    messages: List[LLMMessage] = [{"role": "user", "content": "Hello"}]
+    mock_response = AsyncMock()
+    mock_response.text = "Hi there!"
+    mock_model = GenerativeModelMock.return_value
+    mock_model.generate_content_async = AsyncMock(return_value=mock_response)
+
+    spy_handler = MagicMock(wraps=NoOpRateLimitHandler())
+    llm = VertexAILLM(model_name="gemini-1.5-flash-001", rate_limit_handler=spy_handler)
+    response = await llm.ainvoke(messages)
+
+    assert response.content == "Hi there!"
+    spy_handler.handle_async.assert_called_once()