langchain-ai · Phạm Gia Linh (phamgialinhlx) · Feb 24, 2026
diff --git a/libs/genai/langchain_google_genai/chat_models.py b/libs/genai/langchain_google_genai/chat_models.py
@@ -2677,27 +2677,22 @@ def _add_response_parameters(
     ) -> dict[str, Any]:
         """Add response-specific parameters to generation config.
 
-        Includes `response_mime_type`, `response_schema`, and `response_json_schema`.
+        Handles `response_mime_type` and `response_json_schema`.
+
+        Note: `response_schema` is handled separately in `_prepare_request`
+        because it must bypass GenerationConfig (whose strict Schema type
+        rejects $defs/$ref) and go directly to GenerateContentConfig.
         """
         # Handle response mime type
         response_mime_type = kwargs.get("response_mime_type", self.response_mime_type)
         if response_mime_type is not None:
             gen_config["response_mime_type"] = response_mime_type
 
-        response_schema = kwargs.get("response_schema", self.response_schema)
-        response_json_schema = kwargs.get("response_json_schema")  # If passed as kwarg
-
-        # Handle both response_schema and response_json_schema
-        # (Regardless, we use `response_json_schema` in the request)
-        schema_to_use = (
-            response_json_schema
-            if response_json_schema is not None
-            else response_schema
-        )
-        if schema_to_use:
+        response_json_schema = kwargs.get("response_json_schema")
+        if response_json_schema is not None:
             self._validate_and_add_response_schema(
                 gen_config=gen_config,
-                response_schema=schema_to_use,
+                response_schema=response_json_schema,
                 response_mime_type=response_mime_type,
             )
 
@@ -2789,6 +2784,29 @@ def _prepare_request(
                 if schema and "response_json_schema" not in kwargs:
                     kwargs["response_json_schema"] = schema
 
+        # Extract response_schema before _prepare_params. It must bypass
+        # GenerationConfig (whose strict Schema type rejects $defs/$ref)
+        # and go directly to GenerateContentConfig where the SDK's
+        # process_schema() pipeline inlines $defs/$ref.
+        response_schema = kwargs.pop("response_schema", self.response_schema)
+        if kwargs.get("response_json_schema") is not None:
+            response_schema = None  # response_json_schema takes precedence
+        if response_schema is not None:
+            response_mime_type = kwargs.get(
+                "response_mime_type", self.response_mime_type
+            )
+            if response_mime_type != "application/json":
+                error_message = (
+                    "JSON schema structured output is only supported when "
+                    "response_mime_type is set to 'application/json'"
+                )
+                if response_mime_type == "text/x.enum":
+                    error_message += (
+                        ". Instead of 'text/x.enum', define enums using "
+                        "your JSON schema."
+                    )
+                raise ValueError(error_message)
+
         # Get generation parameters
         # (consumes thinking kwargs into params.thinking_config)
         params: GenerationConfig = self._prepare_params(
@@ -2829,6 +2847,7 @@ def _prepare_request(
             max_retries=max_retries,
             image_config=image_config,
             labels=labels,
+            response_schema=response_schema,
             **remaining_kwargs,
         )
 
@@ -2977,6 +2996,7 @@ def _build_request_config(
         max_retries: int | None = None,
         image_config: dict[str, Any] | None = None,
         labels: dict[str, str] | None = None,
+        response_schema: dict[str, Any] | None = None,
         **kwargs: Any,
     ) -> GenerateContentConfig:
         """Build the final request configuration."""
@@ -2999,6 +3019,13 @@ def _build_request_config(
         if image_config_dict is not None:
             image_config_obj = ImageConfig(**image_config_dict)
 
+        # response_schema is passed directly to GenerateContentConfig
+        # (bypassing GenerationConfig) so the SDK's process_schema()
+        # pipeline can inline $defs/$ref for Vertex AI compatibility.
+        config_kwargs: dict[str, Any] = {}
+        if response_schema is not None:
+            config_kwargs["response_schema"] = response_schema
+
         return GenerateContentConfig(
             tools=list(formatted_tools) if formatted_tools else None,
             tool_config=formatted_tool_config,
@@ -3009,6 +3036,7 @@ def _build_request_config(
             image_config=image_config_obj,
             labels=labels,
             **params.model_dump(exclude_unset=True),
+            **config_kwargs,
             **kwargs,
         )
 
@@ -3408,12 +3436,12 @@ class Recipe(BaseModel):
                 msg = f"Unsupported schema type {type(schema)}"
                 raise ValueError(msg)
 
-            # Note: The Google GenAI SDK automatically handles schema transformation
-            # (inlining $defs, resolving $ref) via its process_schema() function.
-            # This ensures Union types and nested schemas work correctly.
+            # Use response_schema (not response_json_schema) so the SDK's
+            # process_schema() pipeline inlines $defs/$ref. This is required for
+            # Vertex AI, which silently returns empty arrays for schemas with $ref.
             llm = self.bind(
                 response_mime_type="application/json",
-                response_json_schema=schema_json,
+                response_schema=schema_json,
                 ls_structured_output_format={
                     "kwargs": {"method": method},
                     "schema": ls_schema,

diff --git a/libs/genai/tests/unit_tests/test_chat_models.py b/libs/genai/tests/unit_tests/test_chat_models.py
@@ -3359,8 +3359,12 @@ class TestModel(BaseModel):
 
 
 def test_response_json_schema_param_mapping() -> None:
-    """Test both `response_schema` and `response_json_schema` map correctly to
-    `response_json_schema` in `GenerationConfig`."""
+    """Test `response_schema` and `response_json_schema` map correctly.
+
+    `response_schema` bypasses GenerationConfig and goes directly to
+    GenerateContentConfig (via _prepare_request). `response_json_schema`
+    goes through GenerationConfig as before.
+    """
     llm = ChatGoogleGenerativeAI(
         model=MODEL_NAME, google_api_key=SecretStr(FAKE_API_KEY)
     )
@@ -3371,37 +3375,46 @@ def test_response_json_schema_param_mapping() -> None:
         "required": ["name"],
     }
 
-    # Test response_schema parameter maps to response_json_schema in gen_config
+    # response_schema bypasses GenerationConfig — _prepare_params ignores it
     gen_config_1 = llm._prepare_params(
         stop=None, response_mime_type="application/json", response_schema=schema_dict
     )
-    assert gen_config_1.response_json_schema == schema_dict
+    assert gen_config_1.response_json_schema is None
 
-    # Test response_json_schema parameter maps directly to response_json_schema in
-    # gen_config
+    # response_schema is routed through _prepare_request to GenerateContentConfig
+    messages: list[BaseMessage] = [HumanMessage(content="test")]
+    request_1 = llm._prepare_request(
+        messages,
+        response_mime_type="application/json",
+        response_schema=schema_dict,
+    )
+    assert request_1["config"].response_schema is not None
+
+    # response_json_schema maps to response_json_schema in gen_config
     gen_config_2 = llm._prepare_params(
         stop=None,
         response_mime_type="application/json",
         response_json_schema=schema_dict,
     )
     assert gen_config_2.response_json_schema == schema_dict
 
-    # Test that response_json_schema takes precedence over response_schema
+    # response_json_schema takes precedence over response_schema
     different_schema = {
         "type": "object",
         "properties": {"age": {"type": "integer"}},
         "required": ["age"],
     }
 
-    gen_config_3 = llm._prepare_params(
-        stop=None,
+    request_3 = llm._prepare_request(
+        messages,
         response_mime_type="application/json",
         response_schema=schema_dict,
         response_json_schema=different_schema,
     )
     assert (
-        gen_config_3.response_json_schema == different_schema
+        request_3["config"].response_json_schema == different_schema
     )  # response_json_schema takes precedence
+    assert request_3["config"].response_schema is None
 
 
 def test_with_struct_out() -> None:
@@ -3454,7 +3467,7 @@ class RecursiveModel(BaseModel):
     structured = llm.with_structured_output(RecursiveModel, method="json_schema")
     llm = cast("Any", structured).first
 
-    schema = llm.kwargs["response_json_schema"]
+    schema = llm.kwargs["response_schema"]
 
     assert "$defs" in schema, "json_schema should preserve $defs definitions"
     assert schema == raw_schema, "json_schema should preserve raw schema exactly"
@@ -3569,7 +3582,7 @@ class ModerationResult(BaseModel):
 
     llm = cast("Any", structured).first
 
-    assert "response_json_schema" in llm.kwargs
+    assert "response_schema" in llm.kwargs
 
 
 def test_response_schema_mime_type_validation() -> None:
@@ -3579,13 +3592,14 @@ def test_response_schema_mime_type_validation() -> None:
     )
 
     schema = {"type": "object", "properties": {"field": {"type": "string"}}}
+    messages: list[BaseMessage] = [HumanMessage(content="test")]
 
-    # Test response_schema validation - error happens during _prepare_params
+    # Test response_schema validation - error happens during _prepare_request
     with pytest.raises(
         ValueError, match=r"JSON schema structured output is only supported when"
     ):
-        llm._prepare_params(
-            stop=None, response_schema=schema, response_mime_type="text/plain"
+        llm._prepare_request(
+            messages, response_schema=schema, response_mime_type="text/plain"
         )
 
     # Test that binding succeeds (validation happens later during generation)