microsoft
diff --git a/‎.semversioner/next-release/patch-20250206203219915745.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20250206203219915745.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.semversioner/next-release/patch-20250211204342373101.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20250211204342373101.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.semversioner/next-release/patch-20250212004406773499.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20250212004406773499.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎graphrag/api/query.py‎
Lines changed: 23 additions & 7 deletions b/‎graphrag/api/query.py‎
Lines changed: 23 additions & 7 deletions
diff --git a/‎graphrag/cli/initialize.py‎
Lines changed: 5 additions & 1 deletion b/‎graphrag/cli/initialize.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎graphrag/cli/query.py‎
Lines changed: 16 additions & 0 deletions b/‎graphrag/cli/query.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎graphrag/config/defaults.py‎
Lines changed: 2 additions & 0 deletions b/‎graphrag/config/defaults.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎graphrag/config/init_content.py‎
Lines changed: 2 additions & 1 deletion b/‎graphrag/config/init_content.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎graphrag/config/models/chunking_config.py‎
Lines changed: 8 additions & 0 deletions b/‎graphrag/config/models/chunking_config.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎graphrag/config/models/community_reports_config.py‎
Lines changed: 14 additions & 4 deletions b/‎graphrag/config/models/community_reports_config.py‎
Lines changed: 14 additions & 4 deletions
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "add option to prepend metadata into chunks"
+}
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "update multi-index query to support new workflows"
+}
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Export NLP community reports prompt."
+}
@@ -237,12 +237,12 @@ async def multi_index_global_search(
         raise NotImplementedError(message)
 
     links = {
-        "community": {},
+        "communities": {},
         "community_reports": {},
         "entities": {},
     }
     max_vals = {
-        "community": -1,
+        "communities": -1,
         "community_reports": -1,
         "entities": -1,
     }
@@ -272,16 +272,20 @@ async def multi_index_global_search(
         communities_df["community"] = communities_df["community"].astype(int)
         communities_df["parent"] = communities_df["parent"].astype(int)
         for i in communities_df["community"]:
-            links["community"][i + max_vals["community"] + 1] = {
+            links["communities"][i + max_vals["communities"] + 1] = {
                 "index_name": index_name,
                 "id": str(i),
             }
-        communities_df["community"] += max_vals["community"] + 1
+        communities_df["community"] += max_vals["communities"] + 1
         communities_df["parent"] = communities_df["parent"].apply(
-            lambda x: x if x == -1 else x + max_vals["community"] + 1
+            lambda x: x if x == -1 else x + max_vals["communities"] + 1
+        )
+        communities_df["human_readable_id"] += max_vals["communities"] + 1
+        # concat the index name to the entity_ids, since this is used for joining later
+        communities_df["entity_ids"] = communities_df["entity_ids"].apply(
+            lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
         )
-        communities_df["human_readable_id"] += max_vals["community"] + 1
-        max_vals["community"] = int(communities_df["community"].max())
+        max_vals["communities"] = int(communities_df["community"].max())
         communities_dfs.append(communities_df)
 
         # Prepare each index's entities dataframe for merging
@@ -514,13 +518,15 @@ async def multi_index_local_search(
 
     links = {
         "community_reports": {},
+        "communities": {},
         "entities": {},
         "text_units": {},
         "relationships": {},
         "covariates": {},
     }
     max_vals = {
         "community_reports": -1,
+        "communities": -1,
         "entities": -1,
         "text_units": 0,
         "relationships": -1,
@@ -544,6 +550,10 @@ async def multi_index_local_search(
             }
         communities_df["community"] += max_vals["communities"] + 1
         communities_df["human_readable_id"] += max_vals["communities"] + 1
+        # concat the index name to the entity_ids, since this is used for joining later
+        communities_df["entity_ids"] = communities_df["entity_ids"].apply(
+            lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
+        )
         max_vals["communities"] = int(communities_df["community"].max())
         communities_dfs.append(communities_df)
 
@@ -873,12 +883,14 @@ async def multi_index_drift_search(
 
     links = {
         "community_reports": {},
+        "communities": {},
         "entities": {},
         "text_units": {},
         "relationships": {},
     }
     max_vals = {
         "community_reports": -1,
+        "communities": -1,
         "entities": -1,
         "text_units": 0,
         "relationships": -1,
@@ -901,6 +913,10 @@ async def multi_index_drift_search(
             }
         communities_df["community"] += max_vals["communities"] + 1
         communities_df["human_readable_id"] += max_vals["communities"] + 1
+        # concat the index name to the entity_ids, since this is used for joining later
+        communities_df["entity_ids"] = communities_df["entity_ids"].apply(
+            lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
+        )
         max_vals["communities"] = int(communities_df["community"].max())
         communities_dfs.append(communities_df)
 
 
@@ -10,6 +10,9 @@
 from graphrag.prompts.index.community_report import (
     COMMUNITY_REPORT_PROMPT,
 )
+from graphrag.prompts.index.community_report_text_units import (
+    COMMUNITY_REPORT_TEXT_PROMPT,
+)
 from graphrag.prompts.index.extract_claims import EXTRACT_CLAIMS_PROMPT
 from graphrag.prompts.index.extract_graph import GRAPH_EXTRACTION_PROMPT
 from graphrag.prompts.index.summarize_descriptions import SUMMARIZE_PROMPT
@@ -72,7 +75,8 @@ def initialize_project_at(path: Path, force: bool) -> None:
         "extract_graph": GRAPH_EXTRACTION_PROMPT,
         "summarize_descriptions": SUMMARIZE_PROMPT,
         "extract_claims": EXTRACT_CLAIMS_PROMPT,
-        "community_report": COMMUNITY_REPORT_PROMPT,
+        "community_report_graph": COMMUNITY_REPORT_PROMPT,
+        "community_report_text": COMMUNITY_REPORT_TEXT_PROMPT,
         "drift_search_system_prompt": DRIFT_LOCAL_SYSTEM_PROMPT,
         "drift_reduce_prompt": DRIFT_REDUCE_PROMPT,
         "global_search_map_system_prompt": MAP_SYSTEM_PROMPT,
 
@@ -58,6 +58,10 @@ def run_global_search(
         final_community_reports_list = dataframe_dict["community_reports"]
         index_names = dataframe_dict["index_names"]
 
+        logger.success(
+            f"Running Multi-index Global Search: {dataframe_dict['index_names']}"
+        )
+
         response, context_data = asyncio.run(
             api.multi_index_global_search(
                 config=config,
@@ -169,6 +173,10 @@ def run_local_search(
         final_relationships_list = dataframe_dict["relationships"]
         index_names = dataframe_dict["index_names"]
 
+        logger.success(
+            f"Running Multi-index Local Search: {dataframe_dict['index_names']}"
+        )
+
         # If any covariates tables are missing from any index, set the covariates list to None
         if len(dataframe_dict["covariates"]) != dataframe_dict["num_indexes"]:
             final_covariates_list = None
@@ -293,6 +301,10 @@ def run_drift_search(
         final_relationships_list = dataframe_dict["relationships"]
         index_names = dataframe_dict["index_names"]
 
+        logger.success(
+            f"Running Multi-index Drift Search: {dataframe_dict['index_names']}"
+        )
+
         response, context_data = asyncio.run(
             api.multi_index_drift_search(
                 config=config,
@@ -399,6 +411,10 @@ def run_basic_search(
         final_text_units_list = dataframe_dict["text_units"]
         index_names = dataframe_dict["index_names"]
 
+        logger.success(
+            f"Running Multi-index Basic Search: {dataframe_dict['index_names']}"
+        )
+
         response, context_data = asyncio.run(
             api.multi_index_basic_search(
                 config=config,
 
@@ -67,6 +67,8 @@
 CHUNK_OVERLAP = 100
 CHUNK_GROUP_BY_COLUMNS = ["id"]
 CHUNK_STRATEGY = ChunkStrategyType.tokens
+CHUNK_PREPEND_METADATA = False
+CHUNK_SIZE_INCLUDES_METADATA = False
 
 # Claim extraction
 DESCRIPTION = "Any claims or facts that could be relevant to information discovery."
 
@@ -114,7 +114,8 @@
 
 community_reports:
   model_id: {defs.COMMUNITY_REPORT_MODEL_ID}
-  prompt: "prompts/community_report.txt"
+  graph_prompt: "prompts/community_report_graph.txt"
+  text_prompt: "prompts/community_report_text.txt"
   max_length: {defs.COMMUNITY_REPORT_MAX_LENGTH}
   max_input_length: {defs.COMMUNITY_REPORT_MAX_INPUT_LENGTH}
 
 
@@ -26,3 +26,11 @@ class ChunkingConfig(BaseModel):
     encoding_model: str = Field(
         description="The encoding model to use.", default=defs.ENCODING_MODEL
     )
+    prepend_metadata: bool = Field(
+        description="Prepend metadata into each chunk.",
+        default=defs.CHUNK_PREPEND_METADATA,
+    )
+    chunk_size_includes_metadata: bool = Field(
+        description="Count metadata in max tokens.",
+        default=defs.CHUNK_SIZE_INCLUDES_METADATA,
+    )
@@ -14,8 +14,13 @@
 class CommunityReportsConfig(BaseModel):
     """Configuration section for community reports."""
 
-    prompt: str | None = Field(
-        description="The community report extraction prompt to use.", default=None
+    graph_prompt: str | None = Field(
+        description="The community report extraction prompt to use for graph-based summarization.",
+        default=None,
+    )
+    text_prompt: str | None = Field(
+        description="The community report extraction prompt to use for text-based summarization.",
+        default=None,
     )
     max_length: int = Field(
         description="The community report maximum length in tokens.",
@@ -46,10 +51,15 @@ def resolved_strategy(
             "llm": model_config.model_dump(),
             "stagger": model_config.parallelization_stagger,
             "num_threads": model_config.parallelization_num_threads,
-            "extraction_prompt": (Path(root_dir) / self.prompt).read_text(
+            "graph_prompt": (Path(root_dir) / self.graph_prompt).read_text(
+                encoding="utf-8"
+            )
+            if self.graph_prompt
+            else None,
+            "text_prompt": (Path(root_dir) / self.text_prompt).read_text(
                 encoding="utf-8"
             )
-            if self.prompt
+            if self.text_prompt
             else None,
             "max_report_length": self.max_length,
             "max_input_length": self.max_input_length,
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "type": "patch",
 +  "description": "add option to prepend metadata into chunks"
 +}