Feat/additional context (#2021)

AlonsoGuevara · snehitgajjar · Snehit Gajjar · web-flow · commit 57132052102d · 2025-08-08T16:59:24.000-06:00
* Users/snehitgajjar/add optional api param for pipeline state (#2019) * Add support for additional context for PipelineState * Clean up * Clean up * Clean up * Nit --------- Co-authored-by: Snehit Gajjar <snehitgajjar@microsoft.com> * Semver * Update graphrag/api/index.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Remove additional_context from serialization --------- Co-authored-by: Snehit Gajjar <snehit.gajjar@gmail.com> Co-authored-by: Snehit Gajjar <snehitgajjar@microsoft.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
diff --git a/.semversioner/next-release/minor-20250807204918927024.json b/.semversioner/next-release/minor-20250807204918927024.json
@@ -0,0 +1,4 @@
+{
+  "type": "minor",
+  "description": "Add additional context variable to build index signature for custom parameter bag"
+}
diff --git a/graphrag/api/index.py b/graphrag/api/index.py
@@ -9,6 +9,7 @@
 """
 
 import logging
+from typing import Any
 
 from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
@@ -30,6 +31,7 @@ async def build_index(
     is_update_run: bool = False,
     memory_profile: bool = False,
     callbacks: list[WorkflowCallbacks] | None = None,
+    additional_context: dict[str, Any] | None = None,
 ) -> list[PipelineRunResult]:
     """Run the pipeline with the given configuration.
 
@@ -43,6 +45,8 @@ async def build_index(
         Whether to enable memory profiling.
     callbacks : list[WorkflowCallbacks] | None default=None
         A list of callbacks to register.
+    additional_context : dict[str, Any] | None default=None
+        Additional context to pass to the pipeline run. This can be accessed in the pipeline state under the 'additional_context' key.
 
     Returns
     -------
@@ -73,6 +77,7 @@ async def build_index(
         config,
         callbacks=workflow_callbacks,
         is_update_run=is_update_run,
+        additional_context=additional_context,
     ):
         outputs.append(output)
         if output.errors and len(output.errors) > 0:
diff --git a/graphrag/index/run/run_pipeline.py b/graphrag/index/run/run_pipeline.py
@@ -9,6 +9,7 @@
 import time
 from collections.abc import AsyncIterable
 from dataclasses import asdict
+from typing import Any
 
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
 from graphrag.config.models.graph_rag_config import GraphRagConfig
@@ -28,6 +29,7 @@ async def run_pipeline(
     config: GraphRagConfig,
     callbacks: WorkflowCallbacks,
     is_update_run: bool = False,
+    additional_context: dict[str, Any] | None = None,
 ) -> AsyncIterable[PipelineRunResult]:
     """Run all workflows using a simplified pipeline."""
     root_dir = config.root_dir
@@ -40,6 +42,9 @@ async def run_pipeline(
     state_json = await output_storage.get("context.json")
     state = json.loads(state_json) if state_json else {}
 
+    if additional_context:
+        state.setdefault("additional_context", {}).update(additional_context)
+
     if is_update_run:
         logger.info("Running incremental indexing.")
 
@@ -126,9 +131,17 @@ async def _dump_json(context: PipelineRunContext) -> None:
     await context.output_storage.set(
         "stats.json", json.dumps(asdict(context.stats), indent=4, ensure_ascii=False)
     )
-    await context.output_storage.set(
-        "context.json", json.dumps(context.state, indent=4, ensure_ascii=False)
-    )
+    # Dump context state, excluding additional_context
+    temp_context = context.state.pop(
+        "additional_context", None
+    )  # Remove reference only, as object size is uncertain
+    try:
+        state_blob = json.dumps(context.state, indent=4, ensure_ascii=False)
+    finally:
+        if temp_context:
+            context.state["additional_context"] = temp_context
+
+    await context.output_storage.set("context.json", state_blob)
 
 
 async def _copy_previous_output(

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "type": "minor",
 +  "description": "Add additional context variable to build index signature for custom parameter bag"
 +}