Skip to content

Commit b9dc7b9

Browse files
KennyZhang1dworthennatoverse
authored
Fix/streamline workflow miq bugs (#1694)
* Add vector store id reference to embeddings config. * added communities to links and maxvals * Consistent naming * Update entity_ids to include index_name * added consistent logging messages to miq cli * semversioner --------- Co-authored-by: Derek Worthen <[email protected]> Co-authored-by: Nathan Evans <[email protected]>
1 parent a6a78d5 commit b9dc7b9

File tree

3 files changed

+43
-7
lines changed

3 files changed

+43
-7
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "update multi-index query to support new workflows"
4+
}

graphrag/api/query.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -253,12 +253,12 @@ async def multi_index_global_search(
253253
raise NotImplementedError(message)
254254

255255
links = {
256-
"community": {},
256+
"communities": {},
257257
"community_reports": {},
258258
"entities": {},
259259
}
260260
max_vals = {
261-
"community": -1,
261+
"communities": -1,
262262
"community_reports": -1,
263263
"entities": -1,
264264
}
@@ -288,16 +288,20 @@ async def multi_index_global_search(
288288
communities_df["community"] = communities_df["community"].astype(int)
289289
communities_df["parent"] = communities_df["parent"].astype(int)
290290
for i in communities_df["community"]:
291-
links["community"][i + max_vals["community"] + 1] = {
291+
links["communities"][i + max_vals["communities"] + 1] = {
292292
"index_name": index_name,
293293
"id": str(i),
294294
}
295-
communities_df["community"] += max_vals["community"] + 1
295+
communities_df["community"] += max_vals["communities"] + 1
296296
communities_df["parent"] = communities_df["parent"].apply(
297-
lambda x: x if x == -1 else x + max_vals["community"] + 1
297+
lambda x: x if x == -1 else x + max_vals["communities"] + 1
298+
)
299+
communities_df["human_readable_id"] += max_vals["communities"] + 1
300+
# concat the index name to the entity_ids, since this is used for joining later
301+
communities_df["entity_ids"] = communities_df["entity_ids"].apply(
302+
lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
298303
)
299-
communities_df["human_readable_id"] += max_vals["community"] + 1
300-
max_vals["community"] = int(communities_df["community"].max())
304+
max_vals["communities"] = int(communities_df["community"].max())
301305
communities_dfs.append(communities_df)
302306

303307
# Prepare each index's entities dataframe for merging
@@ -535,13 +539,15 @@ async def multi_index_local_search(
535539

536540
links = {
537541
"community_reports": {},
542+
"communities": {},
538543
"entities": {},
539544
"text_units": {},
540545
"relationships": {},
541546
"covariates": {},
542547
}
543548
max_vals = {
544549
"community_reports": -1,
550+
"communities": -1,
545551
"entities": -1,
546552
"text_units": 0,
547553
"relationships": -1,
@@ -565,6 +571,10 @@ async def multi_index_local_search(
565571
}
566572
communities_df["community"] += max_vals["communities"] + 1
567573
communities_df["human_readable_id"] += max_vals["communities"] + 1
574+
# concat the index name to the entity_ids, since this is used for joining later
575+
communities_df["entity_ids"] = communities_df["entity_ids"].apply(
576+
lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
577+
)
568578
max_vals["communities"] = int(communities_df["community"].max())
569579
communities_dfs.append(communities_df)
570580

@@ -913,12 +923,14 @@ async def multi_index_drift_search(
913923

914924
links = {
915925
"community_reports": {},
926+
"communities": {},
916927
"entities": {},
917928
"text_units": {},
918929
"relationships": {},
919930
}
920931
max_vals = {
921932
"community_reports": -1,
933+
"communities": -1,
922934
"entities": -1,
923935
"text_units": 0,
924936
"relationships": -1,
@@ -941,6 +953,10 @@ async def multi_index_drift_search(
941953
}
942954
communities_df["community"] += max_vals["communities"] + 1
943955
communities_df["human_readable_id"] += max_vals["communities"] + 1
956+
# concat the index name to the entity_ids, since this is used for joining later
957+
communities_df["entity_ids"] = communities_df["entity_ids"].apply(
958+
lambda x, index_name=index_name: [i + f"-{index_name}" for i in x]
959+
)
944960
max_vals["communities"] = int(communities_df["community"].max())
945961
communities_dfs.append(communities_df)
946962

graphrag/cli/query.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ def run_global_search(
5858
final_community_reports_list = dataframe_dict["community_reports"]
5959
index_names = dataframe_dict["index_names"]
6060

61+
logger.success(
62+
f"Running Multi-index Global Search: {dataframe_dict['index_names']}"
63+
)
64+
6165
response, context_data = asyncio.run(
6266
api.multi_index_global_search(
6367
config=config,
@@ -169,6 +173,10 @@ def run_local_search(
169173
final_relationships_list = dataframe_dict["relationships"]
170174
index_names = dataframe_dict["index_names"]
171175

176+
logger.success(
177+
f"Running Multi-index Local Search: {dataframe_dict['index_names']}"
178+
)
179+
172180
# If any covariates tables are missing from any index, set the covariates list to None
173181
if len(dataframe_dict["covariates"]) != dataframe_dict["num_indexes"]:
174182
final_covariates_list = None
@@ -293,6 +301,10 @@ def run_drift_search(
293301
final_relationships_list = dataframe_dict["relationships"]
294302
index_names = dataframe_dict["index_names"]
295303

304+
logger.success(
305+
f"Running Multi-index Drift Search: {dataframe_dict['index_names']}"
306+
)
307+
296308
response, context_data = asyncio.run(
297309
api.multi_index_drift_search(
298310
config=config,
@@ -399,6 +411,10 @@ def run_basic_search(
399411
final_text_units_list = dataframe_dict["text_units"]
400412
index_names = dataframe_dict["index_names"]
401413

414+
logger.success(
415+
f"Running Multi-index Basic Search: {dataframe_dict['index_names']}"
416+
)
417+
402418
response, context_data = asyncio.run(
403419
api.multi_index_basic_search(
404420
config=config,

0 commit comments

Comments
 (0)