microsoft
diff --git a/‎.semversioner/next-release/minor-20241024220555036046.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/minor-20241024220555036046.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.semversioner/next-release/patch-20240930234415130922.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20240930234415130922.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.semversioner/next-release/patch-20241002002557586548.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20241002002557586548.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.semversioner/next-release/patch-20241008011651057484.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20241008011651057484.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.semversioner/next-release/patch-20241024201857589092.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20241024201857589092.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎graphrag/api/index.py‎
Lines changed: 2 additions & 1 deletion b/‎graphrag/api/index.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎graphrag/cli/index.py‎
Lines changed: 1 addition & 3 deletions b/‎graphrag/cli/index.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎graphrag/cli/main.py‎
Lines changed: 0 additions & 11 deletions b/‎graphrag/cli/main.py‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎graphrag/config/create_graphrag_config.py‎
Lines changed: 21 additions & 0 deletions b/‎graphrag/config/create_graphrag_config.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎graphrag/config/defaults.py‎
Lines changed: 1 addition & 0 deletions b/‎graphrag/config/defaults.py‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,4 @@
+{
+  "type": "minor",
+  "description": "Add Incremental Indexing"
+}
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Add relationship merge"
+}
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Add text units update"
+}
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Add naive community merge using time period"
+}
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Add config for incremental updates"
+}
@@ -24,7 +24,6 @@ async def build_index(
     config: GraphRagConfig,
     run_id: str = "",
     is_resume_run: bool = False,
-    is_update_run: bool = False,
     memory_profile: bool = False,
     progress_reporter: ProgressReporter | None = None,
     emit: list[TableEmitterType] = [TableEmitterType.Parquet],  # noqa: B006
@@ -54,6 +53,8 @@ async def build_index(
     list[PipelineRunResult]
         The list of pipeline run results
     """
+    is_update_run = bool(config.update_index_storage)
+
     if is_resume_run and is_update_run:
         msg = "Cannot resume and update a run at the same time."
         raise ValueError(msg)
 
@@ -69,7 +69,6 @@ def index_cli(
     root_dir: Path,
     verbose: bool,
     resume: str | None,
-    update_index_id: str | None,
     memprofile: bool,
     cache: bool,
     reporter: ReporterType,
@@ -82,7 +81,7 @@ def index_cli(
     """Run the pipeline with the given config."""
     progress_reporter = create_progress_reporter(reporter)
     info, error, success = _logger(progress_reporter)
-    run_id = resume or update_index_id or time.strftime("%Y%m%d-%H%M%S")
+    run_id = resume or time.strftime("%Y%m%d-%H%M%S")
 
     config = load_config(root_dir, config_filepath)
     config.storage.base_dir = str(output_dir) if output_dir else config.storage.base_dir
@@ -123,7 +122,6 @@ def index_cli(
             config=config,
             run_id=run_id,
             is_resume_run=bool(resume),
-            is_update_run=bool(update_index_id),
             memory_profile=memprofile,
             progress_reporter=progress_reporter,
             emit=emit,
 
@@ -102,12 +102,6 @@ def _index_cli(
             help="Skip any preflight validation. Useful when running no LLM steps."
         ),
     ] = False,
-    update_index: Annotated[
-        str | None,
-        typer.Option(
-            help="Update an index run id, leveraging previous outputs and applying new indexes."
-        ),
-    ] = None,
     output: Annotated[
         Path | None,
         typer.Option(
@@ -119,15 +113,10 @@ def _index_cli(
     ] = None,
 ):
     """Build a knowledge graph index."""
-    if resume and update_index:
-        msg = "Cannot resume and update a run at the same time"
-        raise ValueError(msg)
-
     index_cli(
         root_dir=root,
         verbose=verbose,
         resume=resume,
-        update_index_id=update_index,
         memprofile=memprofile,
         cache=cache,
         reporter=ReporterType(reporter),
 
@@ -375,6 +375,25 @@ def hydrate_parallelization_params(
                 container_name=reader.str(Fragment.container_name),
                 base_dir=reader.str(Fragment.base_dir) or defs.STORAGE_BASE_DIR,
             )
+
+        with (
+            reader.envvar_prefix(Section.update_index_storage),
+            reader.use(values.get("update_index_storage")),
+        ):
+            s_type = reader.str(Fragment.type)
+            if s_type:
+                update_index_storage_model = StorageConfig(
+                    type=StorageType(s_type) if s_type else defs.STORAGE_TYPE,
+                    connection_string=reader.str(Fragment.conn_string),
+                    storage_account_blob_url=reader.str(
+                        Fragment.storage_account_blob_url
+                    ),
+                    container_name=reader.str(Fragment.container_name),
+                    base_dir=reader.str(Fragment.base_dir)
+                    or defs.UPDATE_STORAGE_BASE_DIR,
+                )
+            else:
+                update_index_storage_model = None
         with reader.envvar_prefix(Section.chunk), reader.use(values.get("chunks")):
             group_by_columns = reader.list("group_by_columns", "BY_COLUMNS")
             if group_by_columns is None:
@@ -547,6 +566,7 @@ def hydrate_parallelization_params(
         embed_graph=embed_graph_model,
         reporting=reporting_model,
         storage=storage_model,
+        update_index_storage=update_index_storage_model,
         cache=cache_model,
         input=input_model,
         chunks=chunks_model,
@@ -624,6 +644,7 @@ class Section(str, Enum):
     storage = "STORAGE"
     summarize_descriptions = "SUMMARIZE_DESCRIPTIONS"
     umap = "UMAP"
+    update_index_storage = "UPDATE_INDEX_STORAGE"
     local_search = "LOCAL_SEARCH"
     global_search = "GLOBAL_SEARCH"
 
 
@@ -86,6 +86,7 @@
 STORAGE_TYPE = StorageType.file
 SUMMARIZE_DESCRIPTIONS_MAX_LENGTH = 500
 UMAP_ENABLED = False
+UPDATE_STORAGE_BASE_DIR = "update_output"
 
 VECTOR_STORE = f"""
     type: {VectorStoreType.LanceDB.value}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "type": "minor",
 +  "description": "Add Incremental Indexing"
 +}