Skip to content

Commit a735808

Browse files
Gaudy BlancoGaudy Blanco
authored andcommitted
Merge remote-tracking branch 'origin/main' into custom_vector_store_schema
2 parents 2ab692d + 6c66b7c commit a735808

File tree

10 files changed

+1698
-1679
lines changed

10 files changed

+1698
-1679
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "minor",
3+
"description": "Add config for NLP async mode."
4+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "Add gpt-5 support by updating fnllm dependency."
4+
}

docs/prompt_tuning/auto_prompt_tuning.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,13 @@ graphrag prompt-tune [--root ROOT] [--config CONFIG] [--domain DOMAIN] [--selec
5858
```bash
5959
python -m graphrag prompt-tune --root /path/to/project --config /path/to/settings.yaml --domain "environmental news" \
6060
--selection-method random --limit 10 --language English --max-tokens 2048 --chunk-size 256 --min-examples-required 3 \
61-
--no-entity-types --output /path/to/output
61+
--no-discover-entity-types --output /path/to/output
6262
```
6363

6464
or, with minimal configuration (suggested):
6565

6666
```bash
67-
python -m graphrag prompt-tune --root /path/to/project --config /path/to/settings.yaml --no-entity-types
67+
python -m graphrag prompt-tune --root /path/to/project --config /path/to/settings.yaml --no-discover-entity-types
6868
```
6969

7070
## Document Selection Methods

graphrag/config/defaults.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ class ExtractGraphNLPDefaults:
214214
normalize_edge_weights: bool = True
215215
text_analyzer: TextAnalyzerDefaults = field(default_factory=TextAnalyzerDefaults)
216216
concurrent_requests: int = 25
217+
async_mode: AsyncType = AsyncType.Threaded
217218

218219

219220
@dataclass

graphrag/config/init_content.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
extract_graph_nlp:
113113
text_analyzer:
114114
extractor_type: {graphrag_config_defaults.extract_graph_nlp.text_analyzer.extractor_type.value} # [regex_english, syntactic_parser, cfg]
115+
async_mode: {graphrag_config_defaults.extract_graph_nlp.async_mode.value} # or asyncio
115116
116117
cluster_graph:
117118
max_cluster_size: {graphrag_config_defaults.cluster_graph.max_cluster_size}

graphrag/config/models/extract_graph_nlp_config.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pydantic import BaseModel, Field
77

88
from graphrag.config.defaults import graphrag_config_defaults
9-
from graphrag.config.enums import NounPhraseExtractorType
9+
from graphrag.config.enums import AsyncType, NounPhraseExtractorType
1010

1111

1212
class TextAnalyzerConfig(BaseModel):
@@ -68,3 +68,7 @@ class ExtractGraphNLPConfig(BaseModel):
6868
description="The number of threads to use for the extraction process.",
6969
default=graphrag_config_defaults.extract_graph_nlp.concurrent_requests,
7070
)
71+
async_mode: AsyncType = Field(
72+
description="The async mode to use.",
73+
default=graphrag_config_defaults.extract_graph_nlp.async_mode,
74+
)

graphrag/index/operations/build_noun_graph/build_noun_graph.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,17 @@ async def build_noun_graph(
2424
text_analyzer: BaseNounPhraseExtractor,
2525
normalize_edge_weights: bool,
2626
num_threads: int = 4,
27+
async_mode: AsyncType = AsyncType.Threaded,
2728
cache: PipelineCache | None = None,
2829
) -> tuple[pd.DataFrame, pd.DataFrame]:
2930
"""Build a noun graph from text units."""
3031
text_units = text_unit_df.loc[:, ["id", "text"]]
3132
nodes_df = await _extract_nodes(
32-
text_units, text_analyzer, num_threads=num_threads, cache=cache
33+
text_units,
34+
text_analyzer,
35+
num_threads=num_threads,
36+
async_mode=async_mode,
37+
cache=cache,
3338
)
3439
edges_df = _extract_edges(nodes_df, normalize_edge_weights=normalize_edge_weights)
3540
return (nodes_df, edges_df)
@@ -39,6 +44,7 @@ async def _extract_nodes(
3944
text_unit_df: pd.DataFrame,
4045
text_analyzer: BaseNounPhraseExtractor,
4146
num_threads: int = 4,
47+
async_mode: AsyncType = AsyncType.Threaded,
4248
cache: PipelineCache | None = None,
4349
) -> pd.DataFrame:
4450
"""
@@ -64,7 +70,7 @@ async def extract(row):
6470
text_unit_df,
6571
extract,
6672
num_threads=num_threads,
67-
async_type=AsyncType.Threaded,
73+
async_type=async_mode,
6874
progress_msg="extract noun phrases progress: ",
6975
)
7076

graphrag/index/workflows/extract_graph_nlp.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ async def extract_graph_nlp(
6161
text_analyzer=text_analyzer,
6262
normalize_edge_weights=extraction_config.normalize_edge_weights,
6363
num_threads=extraction_config.concurrent_requests,
64+
async_mode=extraction_config.async_mode,
6465
cache=cache,
6566
)
6667

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies = [
3939
# Async IO
4040
"aiofiles>=24.1.0",
4141
# LLM
42-
"fnllm[azure,openai]>=0.3.0",
42+
"fnllm[azure,openai]>=0.4.1",
4343
"json-repair>=0.30.3",
4444
"openai>=1.68.0",
4545
"nltk==3.9.1",

uv.lock

Lines changed: 1671 additions & 1673 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)