Fix/streamline workflow miq bugs (#1694)

KennyZhang1 · dworthen · natoverse · web-flow · commit b9dc7b90d580 · 2025-02-11T16:13:28.000-05:00
* Add vector store id reference to embeddings config.

* added communities to links and maxvals

* Consistent naming

* Update entity_ids to include index_name

* added consistent logging messages to miq cli

* semversioner

---------

Co-authored-by: Derek Worthen &lt;worthend.derek@gmail.com&gt;
Co-authored-by: Nathan Evans &lt;github@talkswithnumbers.com&gt;
diff --git a/.semversioner/next-release/patch-20250211204342373101.json b/.semversioner/next-release/patch-20250211204342373101.json
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "update multi-index query to support new workflows"
+}
diff --git a/graphrag/api/query.py b/graphrag/api/query.py
@@ -253,12 +253,12 @@ async def multi_index_global_search(
         raise NotImplementedError(message)
 
     links = {
-        "community": {},
+        "communities": {},
         "community_reports": {},
         "entities": {},
     }
     max_vals = {
-        "community": -1,
+        "communities": -1,
         "community_reports": -1,
         "entities": -1,
     }
@@ -288,16 +288,20 @@ async def multi_index_global_search(
         communities_df["community"] = communities_df["community"].astype(int)
         communities_df["parent"] = communities_df["parent"].astype(int)
         for i in communities_df["community"]:
-            links["community"][i + max_vals["community"] + 1] = {
+            links["communities"][i + max_vals["communities"] + 1] = {
                 "index_name": index_name,
                 "id": str(i),
             }
-        communities_df["community"] += max_vals["community"] + 1
+        communities_df["community"] += max_vals["communities"] + 1
         communities_df["parent"] = communities_df["parent"].apply(
-            lambda x: x if x == -1 else x + max_vals["community"] + 1
+            lambda x: x if x == -1 else x + max_vals["communities"] + 1
+        )
+        communities_df["human_readable_id"] += max_vals["communities"] + 1
+        # concat the index name to the entity_ids, since this is used for joining later
+        communities_df["entity_ids"] = communities_df["entity_ids"].apply(
+            lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
         )
-        communities_df["human_readable_id"] += max_vals["community"] + 1
-        max_vals["community"] = int(communities_df["community"].max())
+        max_vals["communities"] = int(communities_df["community"].max())
         communities_dfs.append(communities_df)
 
         # Prepare each index's entities dataframe for merging
@@ -535,13 +539,15 @@ async def multi_index_local_search(
 
     links = {
         "community_reports": {},
+        "communities": {},
         "entities": {},
         "text_units": {},
         "relationships": {},
         "covariates": {},
     }
     max_vals = {
         "community_reports": -1,
+        "communities": -1,
         "entities": -1,
         "text_units": 0,
         "relationships": -1,
@@ -565,6 +571,10 @@ async def multi_index_local_search(
             }
         communities_df["community"] += max_vals["communities"] + 1
         communities_df["human_readable_id"] += max_vals["communities"] + 1
+        # concat the index name to the entity_ids, since this is used for joining later
+        communities_df["entity_ids"] = communities_df["entity_ids"].apply(
+            lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
+        )
         max_vals["communities"] = int(communities_df["community"].max())
         communities_dfs.append(communities_df)
 
@@ -913,12 +923,14 @@ async def multi_index_drift_search(
 
     links = {
         "community_reports": {},
+        "communities": {},
         "entities": {},
         "text_units": {},
         "relationships": {},
     }
     max_vals = {
         "community_reports": -1,
+        "communities": -1,
         "entities": -1,
         "text_units": 0,
         "relationships": -1,
@@ -941,6 +953,10 @@ async def multi_index_drift_search(
             }
         communities_df["community"] += max_vals["communities"] + 1
         communities_df["human_readable_id"] += max_vals["communities"] + 1
+        # concat the index name to the entity_ids, since this is used for joining later
+        communities_df["entity_ids"] = communities_df["entity_ids"].apply(
+            lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
+        )
         max_vals["communities"] = int(communities_df["community"].max())
         communities_dfs.append(communities_df)
 
diff --git a/graphrag/cli/query.py b/graphrag/cli/query.py
@@ -58,6 +58,10 @@ def run_global_search(
         final_community_reports_list = dataframe_dict["community_reports"]
         index_names = dataframe_dict["index_names"]
 
+        logger.success(
+            f"Running Multi-index Global Search: {dataframe_dict['index_names']}"
+        )
+
         response, context_data = asyncio.run(
             api.multi_index_global_search(
                 config=config,
@@ -169,6 +173,10 @@ def run_local_search(
         final_relationships_list = dataframe_dict["relationships"]
         index_names = dataframe_dict["index_names"]
 
+        logger.success(
+            f"Running Multi-index Local Search: {dataframe_dict['index_names']}"
+        )
+
         # If any covariates tables are missing from any index, set the covariates list to None
         if len(dataframe_dict["covariates"]) != dataframe_dict["num_indexes"]:
             final_covariates_list = None
@@ -293,6 +301,10 @@ def run_drift_search(
         final_relationships_list = dataframe_dict["relationships"]
         index_names = dataframe_dict["index_names"]
 
+        logger.success(
+            f"Running Multi-index Drift Search: {dataframe_dict['index_names']}"
+        )
+
         response, context_data = asyncio.run(
             api.multi_index_drift_search(
                 config=config,
@@ -399,6 +411,10 @@ def run_basic_search(
         final_text_units_list = dataframe_dict["text_units"]
         index_names = dataframe_dict["index_names"]
 
+        logger.success(
+            f"Running Multi-index Basic Search: {dataframe_dict['index_names']}"
+        )
+
         response, context_data = asyncio.run(
             api.multi_index_basic_search(
                 config=config,

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "type": "patch",
 +  "description": "update multi-index query to support new workflows"
 +}