diff --git a/CHANGELOG.md b/CHANGELOG.md index bbab44cbe..fef952ffd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ - MarkdownLoader (experimental): added a Markdown loader to support `.md` and `.markdown` files. +### Fixed + +- `VertexAILLM`: passing a Pydantic model with `extra="forbid"` as `response_format` no longer raises a `ParseError`. Unsupported JSON Schema fields (e.g. `additionalProperties`) are now stripped before the schema is forwarded to VertexAI's protobuf `Schema` type. + ### Changed - SimpleKG pipeline (experimental): the `from_pdf` parameter is deprecated in favor of `from_file` (PDF and Markdown inputs). `from_pdf` still works but emits a deprecation warning and will be removed in a future version. diff --git a/src/neo4j_graphrag/llm/vertexai_llm.py b/src/neo4j_graphrag/llm/vertexai_llm.py index b49f3634e..23fd5e156 100644 --- a/src/neo4j_graphrag/llm/vertexai_llm.py +++ b/src/neo4j_graphrag/llm/vertexai_llm.py @@ -69,6 +69,30 @@ _GENERATION_CONFIG_SCHEMA_PARAMS = {"response_schema", "response_mime_type"} +def _strip_unsupported_schema_fields(schema: dict[str, Any]) -> dict[str, Any]: + """Recursively remove JSON Schema fields not supported by VertexAI's Schema proto. + + For example, Pydantic adds ``additionalProperties: false`` when a model uses + ``extra="forbid"``, but the VertexAI protobuf Schema type does not have that + field and raises a ``ParseError`` when it encounters it. + """ + _UNSUPPORTED = {"additionalProperties", "$defs", "$schema"} + result = {k: v for k, v in schema.items() if k not in _UNSUPPORTED} + if "properties" in result and isinstance(result["properties"], dict): + result["properties"] = { + k: _strip_unsupported_schema_fields(v) + for k, v in result["properties"].items() + } + if "items" in result and isinstance(result["items"], dict): + result["items"] = _strip_unsupported_schema_fields(result["items"]) + if "anyOf" in result and isinstance(result["anyOf"], list): + result["anyOf"] = [ + _strip_unsupported_schema_fields(s) if isinstance(s, dict) else s + for s in result["anyOf"] + ] + return result + + def _extract_generation_config_params( config: Any, exclude_schema: bool = True ) -> dict[str, Any]: @@ -574,7 +598,9 @@ def _get_call_params_v2( response_format, BaseModel ): # if we migrate to new google-genai-sdk, Pydantic models can be passed directly - schema = response_format.model_json_schema() + schema = _strip_unsupported_schema_fields( + response_format.model_json_schema() + ) else: schema = response_format params["response_mime_type"] = "application/json"