Skip to content

Commit c649d9f

Browse files
gaudybGaudy Blanco
andauthored
Issue #2004 fix (#2159)
* fix issue #2004 using KeenhoChu idea in his PR * add unit test for dynamic community selection * add unit test for dynamic community selection implementing #2158 logic --------- Co-authored-by: Gaudy Blanco <gaudy-microsoft@MacBook-Pro-m4-Gaudy-For-Work.local>
1 parent c296f1a commit c649d9f

File tree

6 files changed

+222
-11
lines changed

6 files changed

+222
-11
lines changed

docs/examples_notebooks/api_overview.ipynb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@
2828
"from pathlib import Path\n",
2929
"from pprint import pprint\n",
3030
"\n",
31-
"import graphrag.api as api\n",
3231
"import pandas as pd\n",
3332
"from graphrag.config.load_config import load_config\n",
34-
"from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
33+
"from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
34+
"\n",
35+
"import graphrag.api as api"
3536
]
3637
},
3738
{

docs/examples_notebooks/input_documents.ipynb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,11 @@
3030
"from pathlib import Path\n",
3131
"from pprint import pprint\n",
3232
"\n",
33-
"import graphrag.api as api\n",
3433
"import pandas as pd\n",
3534
"from graphrag.config.load_config import load_config\n",
36-
"from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
35+
"from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
36+
"\n",
37+
"import graphrag.api as api"
3738
]
3839
},
3940
{

packages/graphrag/graphrag/query/context_builder/dynamic_community_selection.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,10 @@ async def select(self, query: str) -> tuple[list[CommunityReport], dict[str, Any
123123
# TODO check why some sub_communities are NOT in report_df
124124
if community in self.communities:
125125
for child in self.communities[community].children:
126-
if child in self.reports:
127-
communities_to_rate.append(child)
126+
# Convert child to string to match self.reports key type
127+
child_str = str(child)
128+
if child_str in self.reports:
129+
communities_to_rate.append(child_str)
128130
else:
129131
logger.debug(
130132
"dynamic community selection: cannot find community %s in reports",
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
# Copyright (c) 2024 Microsoft Corporation.
2+
# Licensed under the MIT License
3+
4+
"""Tests for dynamic community selection with type handling."""
5+
6+
from unittest.mock import MagicMock
7+
8+
from graphrag.data_model.community import Community
9+
from graphrag.data_model.community_report import CommunityReport
10+
from graphrag.query.context_builder.dynamic_community_selection import (
11+
DynamicCommunitySelection,
12+
)
13+
14+
15+
def create_mock_tokenizer() -> MagicMock:
16+
"""Create a mock tokenizer."""
17+
tokenizer = MagicMock()
18+
tokenizer.encode.return_value = [1, 2, 3]
19+
return tokenizer
20+
21+
22+
def create_mock_model() -> MagicMock:
23+
"""Create a mock chat model."""
24+
return MagicMock()
25+
26+
27+
def test_dynamic_community_selection_handles_int_children():
28+
"""Test that DynamicCommunitySelection correctly handles children IDs as integers.
29+
30+
This tests the fix for issue #2004 where children IDs could be integers
31+
while self.reports keys are strings, causing child communities to be skipped.
32+
"""
33+
# Create communities with integer children (simulating the bug scenario)
34+
# Note: Even though the type annotation says list[str], actual data may have ints
35+
communities = [
36+
Community(
37+
id="comm-0",
38+
short_id="0",
39+
title="Root Community",
40+
level="0",
41+
parent="",
42+
children=[1, 2], # type: ignore[list-item] # Integer children - testing bug fix
43+
),
44+
Community(
45+
id="comm-1",
46+
short_id="1",
47+
title="Child Community 1",
48+
level="1",
49+
parent="0",
50+
children=[],
51+
),
52+
Community(
53+
id="comm-2",
54+
short_id="2",
55+
title="Child Community 2",
56+
level="1",
57+
parent="0",
58+
children=[],
59+
),
60+
]
61+
62+
# Create community reports with string community_id
63+
reports = [
64+
CommunityReport(
65+
id="report-0",
66+
short_id="0",
67+
title="Report 0",
68+
community_id="0",
69+
summary="Root community summary",
70+
full_content="Root community full content",
71+
rank=1.0,
72+
),
73+
CommunityReport(
74+
id="report-1",
75+
short_id="1",
76+
title="Report 1",
77+
community_id="1",
78+
summary="Child 1 summary",
79+
full_content="Child 1 full content",
80+
rank=1.0,
81+
),
82+
CommunityReport(
83+
id="report-2",
84+
short_id="2",
85+
title="Report 2",
86+
community_id="2",
87+
summary="Child 2 summary",
88+
full_content="Child 2 full content",
89+
rank=1.0,
90+
),
91+
]
92+
93+
model = create_mock_model()
94+
tokenizer = create_mock_tokenizer()
95+
96+
selector = DynamicCommunitySelection(
97+
community_reports=reports,
98+
communities=communities,
99+
model=model,
100+
tokenizer=tokenizer,
101+
threshold=1,
102+
keep_parent=False,
103+
max_level=2,
104+
)
105+
106+
# Verify that reports are keyed by string
107+
assert "0" in selector.reports
108+
assert "1" in selector.reports
109+
assert "2" in selector.reports
110+
111+
# Verify that communities are keyed by string short_id
112+
assert "0" in selector.communities
113+
assert "1" in selector.communities
114+
assert "2" in selector.communities
115+
116+
# Verify that the children are properly accessible
117+
# Before the fix, int children would fail the `in self.reports` check
118+
root_community = selector.communities["0"]
119+
for child in root_community.children:
120+
child_id = str(child)
121+
# This should now work with the fix
122+
assert child_id in selector.reports, (
123+
f"Child {child} (as '{child_id}') should be found in reports"
124+
)
125+
126+
127+
def test_dynamic_community_selection_handles_str_children():
128+
"""Test that DynamicCommunitySelection works correctly with string children IDs."""
129+
communities = [
130+
Community(
131+
id="comm-0",
132+
short_id="0",
133+
title="Root Community",
134+
level="0",
135+
parent="",
136+
children=["1", "2"], # String children - expected type
137+
),
138+
Community(
139+
id="comm-1",
140+
short_id="1",
141+
title="Child Community 1",
142+
level="1",
143+
parent="0",
144+
children=[],
145+
),
146+
Community(
147+
id="comm-2",
148+
short_id="2",
149+
title="Child Community 2",
150+
level="1",
151+
parent="0",
152+
children=[],
153+
),
154+
]
155+
156+
reports = [
157+
CommunityReport(
158+
id="report-0",
159+
short_id="0",
160+
title="Report 0",
161+
community_id="0",
162+
summary="Root community summary",
163+
full_content="Root community full content",
164+
rank=1.0,
165+
),
166+
CommunityReport(
167+
id="report-1",
168+
short_id="1",
169+
title="Report 1",
170+
community_id="1",
171+
summary="Child 1 summary",
172+
full_content="Child 1 full content",
173+
rank=1.0,
174+
),
175+
CommunityReport(
176+
id="report-2",
177+
short_id="2",
178+
title="Report 2",
179+
community_id="2",
180+
summary="Child 2 summary",
181+
full_content="Child 2 full content",
182+
rank=1.0,
183+
),
184+
]
185+
186+
model = create_mock_model()
187+
tokenizer = create_mock_tokenizer()
188+
189+
selector = DynamicCommunitySelection(
190+
community_reports=reports,
191+
communities=communities,
192+
model=model,
193+
tokenizer=tokenizer,
194+
threshold=1,
195+
keep_parent=False,
196+
max_level=2,
197+
)
198+
199+
# Verify that children can be found in reports
200+
root_community = selector.communities["0"]
201+
for child in root_community.children:
202+
child_id = str(child)
203+
assert child_id in selector.reports, (
204+
f"Child {child} (as '{child_id}') should be found in reports"
205+
)

tests/verbs/test_create_community_reports.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44

55
from graphrag.config.models.graph_rag_config import GraphRagConfig
66
from graphrag.data_model.schemas import COMMUNITY_REPORTS_FINAL_COLUMNS
7-
from graphrag.index.operations.summarize_communities.community_reports_extractor import (
8-
CommunityReportResponse,
9-
FindingModel,
10-
)
117
from graphrag.index.workflows.create_community_reports import (
128
run_workflow,
139
)
1410
from graphrag.utils.storage import load_table_from_storage
1511

12+
from graphrag.index.operations.summarize_communities.community_reports_extractor import (
13+
CommunityReportResponse,
14+
FindingModel,
15+
)
16+
1617
from .util import (
1718
DEFAULT_MODEL_CONFIG,
1819
compare_outputs,

unified-search-app/app/app_logic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import logging
88
from typing import TYPE_CHECKING
99

10-
import graphrag.api as api
1110
import streamlit as st
1211
from knowledge_loader.data_sources.loader import (
1312
create_datasource,
@@ -18,6 +17,8 @@
1817
from state.session_variables import SessionVariables
1918
from ui.search import display_search_result
2019

20+
import graphrag.api as api
21+
2122
if TYPE_CHECKING:
2223
import pandas as pd
2324

0 commit comments

Comments
 (0)