fix(vertex): honor max_retries=0 and disable GAPIC retries (#1185)

jitokim · mdrxy · web-flow · commit e221d199ee15 · 2025-09-17T14:17:25.000-04:00
## PR Description Fixes issue where max_retries did not work for VertexAI and ChatVertexAI. Now max_retries=0 correctly disables retries, and GAPIC retries are also disabled. ## Relevant issues Fixes #1093 ## Type   🐛 Bug Fix ## Changes(optional) ## Testing(optional)   ## Note(optional)    Signed-off-by: jitokim <pigberger70@gmail.com> Co-authored-by: Mason Daugherty <mason@langchain.dev>
diff --git a/libs/vertexai/langchain_google_vertexai/_retry.py b/libs/vertexai/langchain_google_vertexai/_retry.py
@@ -113,9 +113,12 @@ def get_google_api_call_error_retry_instance():
         else:
             retry_instance = (retry_instance) | (retry_if_exception_type(error))
 
+    # Interpret max_retries=0 as "no retries" which still allows 1 attempt.
+    attempts = 1 if max_retries is None or max_retries <= 0 else max_retries
+
     return retry(
         reraise=True,
-        stop=stop_after_attempt(max_retries),
+        stop=stop_after_attempt(attempts),
         wait=wait_exponential(**wait_params),
         retry=retry_instance,
         before_sleep=_before_sleep,
diff --git a/libs/vertexai/langchain_google_vertexai/chat_models.py b/libs/vertexai/langchain_google_vertexai/chat_models.py
@@ -170,7 +170,14 @@
 _allowed_beta_params = [
     "media_resolution",
 ]
-_allowed_params_prediction_service = ["request", "timeout", "metadata", "labels"]
+_allowed_params_prediction_service = [
+    "request",
+    "timeout",
+    "metadata",
+    "labels",
+    # Allow controlling GAPIC client retries from callers.
+    "retry",
+]
 
 
 _FUNCTION_CALL_THOUGHT_SIGNATURES_MAP_KEY = (
@@ -775,6 +782,10 @@ def _completion_with_retry(
     def _completion_with_retry_inner(generation_method: Callable, **kwargs: Any) -> Any:
         return generation_method(**kwargs)
 
+    # If user requested 0 retries, disable GAPIC retries too unless explicitly set.
+    if max_retries <= 0 and "retry" not in kwargs:
+        kwargs["retry"] = None
+
     params = {
         k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service
     }
@@ -805,6 +816,10 @@ async def _completion_with_retry_inner(
     ) -> Any:
         return await generation_method(**kwargs)
 
+    # If user requested 0 retries, disable GAPIC retries too unless explicitly set.
+    if max_retries <= 0 and "retry" not in kwargs:
+        kwargs["retry"] = None
+
     params = {
         k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service
     }
diff --git a/libs/vertexai/tests/unit_tests/test_retry.py b/libs/vertexai/tests/unit_tests/test_retry.py
@@ -0,0 +1,66 @@
+from unittest.mock import MagicMock
+
+import pytest
+
+from langchain_google_vertexai._retry import create_base_retry_decorator
+from langchain_google_vertexai.chat_models import _completion_with_retry
+
+
+class _DummyTimeoutError(Exception):
+    pass
+
+
+def test_create_base_retry_decorator_zero_retries_means_single_attempt() -> None:
+    """Ensure max_retries=0 performs exactly one attempt and no retries."""
+    calls = {"count": 0}
+
+    def will_timeout():
+        calls["count"] += 1
+        raise _DummyTimeoutError("timeout")
+
+    decorator = create_base_retry_decorator(
+        error_types=[_DummyTimeoutError], max_retries=0
+    )
+    wrapped = decorator(will_timeout)
+
+    with pytest.raises(_DummyTimeoutError):
+        wrapped()
+
+    assert calls["count"] == 1
+
+
+def test_completion_with_retry_injects_retry_none_when_zero() -> None:
+    """_completion_with_retry should pass retry=None to GAPIC when max_retries=0."""
+    gen_method = MagicMock(return_value="ok")
+
+    result = _completion_with_retry(
+        gen_method,
+        max_retries=0,
+        request={"dummy": True},
+        timeout=120,
+        metadata=(),
+    )
+
+    assert result == "ok"
+    # Ensure called exactly once and with retry=None passed through
+    assert gen_method.call_count == 1
+    kwargs = gen_method.call_args.kwargs
+    assert "retry" in kwargs and kwargs["retry"] is None
+
+
+def test_completion_with_retry_does_not_inject_retry_when_positive() -> None:
+    """When max_retries>0, do not auto-inject retry=None."""
+    gen_method = MagicMock(return_value="ok")
+
+    result = _completion_with_retry(
+        gen_method,
+        max_retries=2,
+        request={"dummy": True},
+        timeout=30,
+        metadata=(),
+    )
+
+    assert result == "ok"
+    assert gen_method.call_count == 1
+    kwargs = gen_method.call_args.kwargs
+    assert "retry" not in kwargs