diff --git a/CHANGELOG.md b/CHANGELOG.md index fef952ffd..bbab44cbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,6 @@ - MarkdownLoader (experimental): added a Markdown loader to support `.md` and `.markdown` files. -### Fixed - -- `VertexAILLM`: passing a Pydantic model with `extra="forbid"` as `response_format` no longer raises a `ParseError`. Unsupported JSON Schema fields (e.g. `additionalProperties`) are now stripped before the schema is forwarded to VertexAI's protobuf `Schema` type. - ### Changed - SimpleKG pipeline (experimental): the `from_pdf` parameter is deprecated in favor of `from_file` (PDF and Markdown inputs). `from_pdf` still works but emits a deprecation warning and will be removed in a future version. diff --git a/src/neo4j_graphrag/llm/vertexai_llm.py b/src/neo4j_graphrag/llm/vertexai_llm.py index 23fd5e156..b49f3634e 100644 --- a/src/neo4j_graphrag/llm/vertexai_llm.py +++ b/src/neo4j_graphrag/llm/vertexai_llm.py @@ -69,30 +69,6 @@ _GENERATION_CONFIG_SCHEMA_PARAMS = {"response_schema", "response_mime_type"} -def _strip_unsupported_schema_fields(schema: dict[str, Any]) -> dict[str, Any]: - """Recursively remove JSON Schema fields not supported by VertexAI's Schema proto. - - For example, Pydantic adds ``additionalProperties: false`` when a model uses - ``extra="forbid"``, but the VertexAI protobuf Schema type does not have that - field and raises a ``ParseError`` when it encounters it. - """ - _UNSUPPORTED = {"additionalProperties", "$defs", "$schema"} - result = {k: v for k, v in schema.items() if k not in _UNSUPPORTED} - if "properties" in result and isinstance(result["properties"], dict): - result["properties"] = { - k: _strip_unsupported_schema_fields(v) - for k, v in result["properties"].items() - } - if "items" in result and isinstance(result["items"], dict): - result["items"] = _strip_unsupported_schema_fields(result["items"]) - if "anyOf" in result and isinstance(result["anyOf"], list): - result["anyOf"] = [ - _strip_unsupported_schema_fields(s) if isinstance(s, dict) else s - for s in result["anyOf"] - ] - return result - - def _extract_generation_config_params( config: Any, exclude_schema: bool = True ) -> dict[str, Any]: @@ -598,9 +574,7 @@ def _get_call_params_v2( response_format, BaseModel ): # if we migrate to new google-genai-sdk, Pydantic models can be passed directly - schema = _strip_unsupported_schema_fields( - response_format.model_json_schema() - ) + schema = response_format.model_json_schema() else: schema = response_format params["response_mime_type"] = "application/json"