neo4j · HemantSudarshan · Mar 8, 2026
@@ -6,6 +6,10 @@
 
 - Updated examples, default values, and documentation to use `gpt-4.1` / `gpt-4.1-mini` instead of deprecated GPT-4* models (e.g. `gpt-4o`, `gpt-4`).
 
+### Added
+
+- Exposed `use_structured_output` parameter in `SimpleKGPipeline` constructor for enabling structured output in entity extraction and automatic schema extraction with supported LLMs (OpenAI, VertexAI).
+
 ## 1.13.1
 
 - Fixed invalid lexical graph relationships causing "Relationship references unknown start node" errors during parquet import when nodes are pruned.

@@ -162,6 +162,26 @@ They are also accessible via the `SimpleKGPipeline` interface.
         # ...
     )
 
+Structured Output
+-----------------
+
+When using an LLM that supports structured output (such as OpenAI or VertexAI),
+you can enable it to improve the reliability of entity extraction and automatic
+schema extraction:
+
+.. code:: python
+
+    kg_builder = SimpleKGPipeline(
+        # ...
+        use_structured_output=True,
+        # ...
+    )
+
+.. note::
+
+    Structured output is only supported by LLMs that have ``supports_structured_output=True``
+    (currently ``OpenAILLM`` and ``VertexAILLM``). Using it with an unsupported LLM will raise an error.
+
 Skip Entity Resolution
 ----------------------
 
@@ -479,7 +499,8 @@ within the configuration file.
         },
         "lexical_graph_config": {
             "chunk_node_label": "TextPart"
-        }
+        },
+        "use_structured_output": false
     }
 
 
@@ -520,6 +541,7 @@ or in YAML:
         - ["House", "RULES", "Planet"]
     lexical_graph_config:
         chunk_node_label: TextPart
+    use_structured_output: false
 
 
 It is also possible to further customize components, with a syntax similar to the one

@@ -94,6 +94,7 @@ class SimpleKGPipelineConfig(TemplatePipelineConfig):
     perform_entity_resolution: bool = True
     lexical_graph_config: Optional[LexicalGraphConfig] = None
     neo4j_database: Optional[str] = None
+    use_structured_output: bool = False
 
     pdf_loader: Optional[ComponentType] = None
     kg_writer: Optional[ComponentType] = None
@@ -186,7 +187,10 @@ def _get_schema(self) -> BaseSchemaBuilder:
         Return SchemaFromTextExtractor for automatic extraction or SchemaBuilder for manual schema.
         """
         if not self.has_user_provided_schema():
-            return SchemaFromTextExtractor(llm=self.get_default_llm())
+            return SchemaFromTextExtractor(
+                llm=self.get_default_llm(),
+                use_structured_output=self.use_structured_output,
+            )
         return SchemaBuilder()
 
     def _process_schema_with_precedence(self) -> dict[str, Any]:
@@ -222,6 +226,7 @@ def _get_extractor(self) -> EntityRelationExtractor:
             llm=self.get_default_llm(),
             prompt_template=self.prompt_template,
             on_error=self.on_error,
+            use_structured_output=self.use_structured_output,
         )
 
     def _get_pruner(self) -> GraphPruning:

@@ -89,6 +89,7 @@ class SimpleKGPipeline:
         perform_entity_resolution (bool): Merge entities with same label and name. Default: True
         prompt_template (str): A custom prompt template to use for extraction.
         lexical_graph_config (Optional[LexicalGraphConfig], optional): Lexical graph configuration to customize node labels and relationship types in the lexical graph.
+        use_structured_output (bool): Whether to use structured output (LLMInterfaceV2) for entity extraction and automatic schema extraction. Only supported for OpenAILLM and VertexAILLM. Defaults to False.
     """
 
     def __init__(
@@ -115,6 +116,7 @@ def __init__(
         perform_entity_resolution: bool = True,
         lexical_graph_config: Optional[LexicalGraphConfig] = None,
         neo4j_database: Optional[str] = None,
+        use_structured_output: bool = False,
     ):
         try:
             config = SimpleKGPipelineConfig.model_validate(
@@ -137,6 +139,7 @@ def __init__(
                     perform_entity_resolution=perform_entity_resolution,
                     lexical_graph_config=lexical_graph_config,
                     neo4j_database=neo4j_database,
+                    use_structured_output=use_structured_output,
                 )
             )
         except (ValidationError, ValueError) as e:

@@ -178,6 +178,39 @@ def test_simple_kg_pipeline_config_extractor(mock_llm: Mock, llm: LLMInterface)
     assert extractor.prompt_template.template == "my template {text}"
 
 
+@patch(
+    "neo4j_graphrag.experimental.pipeline.config.template_pipeline.simple_kg_builder.SimpleKGPipelineConfig.get_default_llm"
+)
+def test_simple_kg_pipeline_config_extractor_with_structured_output(
+    mock_llm: Mock, llm: LLMInterface
+) -> None:
+    llm.supports_structured_output = True
+    mock_llm.return_value = llm
+    config = SimpleKGPipelineConfig(
+        on_error="IGNORE",  # type: ignore
+        use_structured_output=True,
+    )
+    extractor = config._get_extractor()
+    assert isinstance(extractor, LLMEntityRelationExtractor)
+    assert extractor.use_structured_output is True
+
+
+@patch(
+    "neo4j_graphrag.experimental.pipeline.config.template_pipeline.simple_kg_builder.SimpleKGPipelineConfig.get_default_llm"
+)
+def test_simple_kg_pipeline_config_schema_with_structured_output(
+    mock_llm: Mock, llm: LLMInterface
+) -> None:
+    llm.supports_structured_output = True
+    mock_llm.return_value = llm
+    config = SimpleKGPipelineConfig(
+        use_structured_output=True,
+    )
+    schema = config._get_schema()
+    assert isinstance(schema, SchemaFromTextExtractor)
+    assert schema.use_structured_output is True
+
+
 @patch(
     "neo4j_graphrag.experimental.components.kg_writer.get_version",
     return_value=((5, 23, 0), False, False),

@@ -193,3 +193,39 @@ async def test_knowledge_graph_builder_with_lexical_graph_config(_: Mock) -> Non
         assert pipe_inputs["extractor"]["lexical_graph_config"] == lexical_graph_config
         assert pipe_inputs["extractor"]["document_info"] is not None
         assert pipe_inputs["extractor"]["document_info"]["path"] == "document.txt"
+
+
+@mock.patch(
+    "neo4j_graphrag.experimental.components.kg_writer.get_version",
+    return_value=((5, 23, 0), False, False),
+)
+def test_simple_kg_pipeline_accepts_use_structured_output(_: Mock) -> None:
+    llm = MagicMock(spec=LLMInterface)
+    llm.supports_structured_output = True
+    driver = MagicMock(spec=neo4j.Driver)
+    embedder = MagicMock(spec=Embedder)
+
+    # Should not raise
+    kg_builder = SimpleKGPipeline(
+        llm=llm,
+        driver=driver,
+        embedder=embedder,
+        from_pdf=False,
+        use_structured_output=True,
+    )
+    assert kg_builder is not None
+
+
+def test_simple_kg_pipeline_use_structured_output_unsupported_llm() -> None:
+    llm = MagicMock(spec=LLMInterface)
+    llm.supports_structured_output = False
+    driver = MagicMock(spec=neo4j.Driver)
+    embedder = MagicMock(spec=Embedder)
+
+    with pytest.raises(ValueError):
+        SimpleKGPipeline(
+            llm=llm,
+            driver=driver,
+            embedder=embedder,
+            use_structured_output=True,
+        )