Skip to content

Commit 437f102

Browse files
committed
Align docs with config
1 parent ddc6541 commit 437f102

File tree

9 files changed

+265
-240
lines changed

9 files changed

+265
-240
lines changed

docs/config/yaml.md

Lines changed: 118 additions & 96 deletions
Large diffs are not rendered by default.

graphrag/config/init_content.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,6 @@
5555
tokens_per_minute: 0 # set to 0 to disable rate limiting
5656
requests_per_minute: 0 # set to 0 to disable rate limiting
5757
58-
vector_store:
59-
{defs.DEFAULT_VECTOR_STORE_ID}:
60-
type: {vector_store_defaults.type}
61-
db_uri: {vector_store_defaults.db_uri}
62-
container_name: {vector_store_defaults.container_name}
63-
overwrite: {vector_store_defaults.overwrite}
64-
65-
embed_text:
66-
model_id: {graphrag_config_defaults.embed_text.model_id}
67-
vector_store_id: {graphrag_config_defaults.embed_text.vector_store_id}
68-
6958
### Input settings ###
7059
7160
input:
@@ -78,10 +67,14 @@
7867
overlap: {graphrag_config_defaults.chunks.overlap}
7968
group_by_columns: [{",".join(graphrag_config_defaults.chunks.group_by_columns)}]
8069
81-
### Output settings ###
70+
### Output/storage settings ###
8271
## If blob storage is specified in the following four sections,
8372
## connection_string and container_name must be provided
8473
74+
output:
75+
type: {graphrag_config_defaults.output.type.value} # [file, blob, cosmosdb]
76+
base_dir: "{graphrag_config_defaults.output.base_dir}"
77+
8578
cache:
8679
type: {graphrag_config_defaults.cache.type.value} # [file, blob, cosmosdb]
8780
base_dir: "{graphrag_config_defaults.cache.base_dir}"
@@ -90,12 +83,19 @@
9083
type: {graphrag_config_defaults.reporting.type.value} # [file, blob, cosmosdb]
9184
base_dir: "{graphrag_config_defaults.reporting.base_dir}"
9285
93-
output:
94-
type: {graphrag_config_defaults.output.type.value} # [file, blob, cosmosdb]
95-
base_dir: "{graphrag_config_defaults.output.base_dir}"
86+
vector_store:
87+
{defs.DEFAULT_VECTOR_STORE_ID}:
88+
type: {vector_store_defaults.type}
89+
db_uri: {vector_store_defaults.db_uri}
90+
container_name: {vector_store_defaults.container_name}
91+
overwrite: {vector_store_defaults.overwrite}
9692
9793
### Workflow settings ###
9894
95+
embed_text:
96+
model_id: {graphrag_config_defaults.embed_text.model_id}
97+
vector_store_id: {graphrag_config_defaults.embed_text.vector_store_id}
98+
9999
extract_graph:
100100
model_id: {graphrag_config_defaults.extract_graph.model_id}
101101
prompt: "prompts/extract_graph.txt"
@@ -111,6 +111,9 @@
111111
text_analyzer:
112112
extractor_type: {graphrag_config_defaults.extract_graph_nlp.text_analyzer.extractor_type.value} # [regex_english, syntactic_parser, cfg]
113113
114+
cluster_graph:
115+
max_cluster_size: {graphrag_config_defaults.cluster_graph.max_cluster_size}
116+
114117
extract_claims:
115118
enabled: false
116119
model_id: {graphrag_config_defaults.extract_claims.model_id}
@@ -125,9 +128,6 @@
125128
max_length: {graphrag_config_defaults.community_reports.max_length}
126129
max_input_length: {graphrag_config_defaults.community_reports.max_input_length}
127130
128-
cluster_graph:
129-
max_cluster_size: {graphrag_config_defaults.cluster_graph.max_cluster_size}
130-
131131
embed_graph:
132132
enabled: false # if true, will generate node2vec embeddings for nodes
133133

graphrag/config/models/community_reports_config.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313

1414
class CommunityReportsConfig(BaseModel):
1515
"""Configuration section for community reports."""
16-
16+
17+
model_id: str = Field(
18+
description="The model ID to use for community reports.",
19+
default=graphrag_config_defaults.community_reports.model_id,
20+
)
1721
graph_prompt: str | None = Field(
1822
description="The community report extraction prompt to use for graph-based summarization.",
1923
default=graphrag_config_defaults.community_reports.graph_prompt,
@@ -34,10 +38,6 @@ class CommunityReportsConfig(BaseModel):
3438
description="The override strategy to use.",
3539
default=graphrag_config_defaults.community_reports.strategy,
3640
)
37-
model_id: str = Field(
38-
description="The model ID to use for community reports.",
39-
default=graphrag_config_defaults.community_reports.model_id,
40-
)
4141

4242
def resolved_strategy(
4343
self, root_dir: str, model_config: LanguageModelConfig

graphrag/config/models/extract_claims_config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ class ClaimExtractionConfig(BaseModel):
1818
description="Whether claim extraction is enabled.",
1919
default=graphrag_config_defaults.extract_claims.enabled,
2020
)
21+
model_id: str = Field(
22+
description="The model ID to use for claim extraction.",
23+
default=graphrag_config_defaults.extract_claims.model_id,
24+
)
2125
prompt: str | None = Field(
2226
description="The claim extraction prompt to use.",
2327
default=graphrag_config_defaults.extract_claims.prompt,
@@ -38,10 +42,6 @@ class ClaimExtractionConfig(BaseModel):
3842
default=graphrag_config_defaults.extract_claims.encoding_model,
3943
description="The encoding model to use.",
4044
)
41-
model_id: str = Field(
42-
description="The model ID to use for claim extraction.",
43-
default=graphrag_config_defaults.extract_claims.model_id,
44-
)
4545

4646
def resolved_strategy(
4747
self, root_dir: str, model_config: LanguageModelConfig

graphrag/config/models/extract_graph_config.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313

1414
class ExtractGraphConfig(BaseModel):
1515
"""Configuration section for entity extraction."""
16-
16+
17+
model_id: str = Field(
18+
description="The model ID to use for text embeddings.",
19+
default=graphrag_config_defaults.extract_graph.model_id,
20+
)
1721
prompt: str | None = Field(
1822
description="The entity extraction prompt to use.",
1923
default=graphrag_config_defaults.extract_graph.prompt,
@@ -34,10 +38,6 @@ class ExtractGraphConfig(BaseModel):
3438
default=graphrag_config_defaults.extract_graph.encoding_model,
3539
description="The encoding model to use.",
3640
)
37-
model_id: str = Field(
38-
description="The model ID to use for text embeddings.",
39-
default=graphrag_config_defaults.extract_graph.model_id,
40-
)
4141

4242
def resolved_strategy(
4343
self, root_dir: str, model_config: LanguageModelConfig

graphrag/config/models/graph_rag_config.py

Lines changed: 72 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -89,20 +89,25 @@ def _validate_models(self) -> None:
8989
if defs.DEFAULT_EMBEDDING_MODEL_ID not in self.models:
9090
raise LanguageModelConfigMissingError(defs.DEFAULT_EMBEDDING_MODEL_ID)
9191

92-
reporting: ReportingConfig = Field(
93-
description="The reporting configuration.", default=ReportingConfig()
92+
93+
input: InputConfig = Field(
94+
description="The input configuration.", default=InputConfig()
9495
)
95-
"""The reporting configuration."""
96+
"""The input configuration."""
9697

97-
def _validate_reporting_base_dir(self) -> None:
98-
"""Validate the reporting base directory."""
99-
if self.reporting.type == defs.ReportingType.file:
100-
if self.reporting.base_dir.strip() == "":
101-
msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
102-
raise ValueError(msg)
103-
self.reporting.base_dir = str(
104-
(Path(self.root_dir) / self.reporting.base_dir).resolve()
105-
)
98+
def _validate_input_pattern(self) -> None:
99+
"""Validate the input file pattern based on the specified type."""
100+
if len(self.input.file_pattern) == 0:
101+
if self.input.file_type == defs.InputFileType.text:
102+
self.input.file_pattern = ".*\\.txt$"
103+
else:
104+
self.input.file_pattern = f".*\\.{self.input.file_type.value}$"
105+
106+
chunks: ChunkingConfig = Field(
107+
description="The chunking configuration to use.",
108+
default=ChunkingConfig(),
109+
)
110+
"""The chunking configuration to use."""
106111

107112
output: OutputConfig = Field(
108113
description="The output configuration.",
@@ -120,6 +125,7 @@ def _validate_output_base_dir(self) -> None:
120125
(Path(self.root_dir) / self.output.base_dir).resolve()
121126
)
122127

128+
123129
outputs: dict[str, OutputConfig] | None = Field(
124130
description="A list of output configurations used for multi-index query.",
125131
default=graphrag_config_defaults.outputs,
@@ -161,66 +167,72 @@ def _validate_update_index_output_base_dir(self) -> None:
161167
)
162168
"""The cache configuration."""
163169

164-
input: InputConfig = Field(
165-
description="The input configuration.", default=InputConfig()
170+
reporting: ReportingConfig = Field(
171+
description="The reporting configuration.", default=ReportingConfig()
166172
)
167-
"""The input configuration."""
173+
"""The reporting configuration."""
168174

169-
def _validate_input_pattern(self) -> None:
170-
"""Validate the input file pattern based on the specified type."""
171-
if len(self.input.file_pattern) == 0:
172-
if self.input.file_type == defs.InputFileType.text:
173-
self.input.file_pattern = ".*\\.txt$"
174-
else:
175-
self.input.file_pattern = f".*\\.{self.input.file_type.value}$"
175+
def _validate_reporting_base_dir(self) -> None:
176+
"""Validate the reporting base directory."""
177+
if self.reporting.type == defs.ReportingType.file:
178+
if self.reporting.base_dir.strip() == "":
179+
msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
180+
raise ValueError(msg)
181+
self.reporting.base_dir = str(
182+
(Path(self.root_dir) / self.reporting.base_dir).resolve()
183+
)
176184

177-
embed_graph: EmbedGraphConfig = Field(
178-
description="Graph embedding configuration.",
179-
default=EmbedGraphConfig(),
185+
vector_store: dict[str, VectorStoreConfig] = Field(
186+
description="The vector store configuration.",
187+
default_factory=lambda: {
188+
k: VectorStoreConfig(**asdict(v))
189+
for k, v in graphrag_config_defaults.vector_store.items()
190+
},
180191
)
181-
"""Graph Embedding configuration."""
192+
"""The vector store configuration."""
193+
194+
workflows: list[str] | None = Field(
195+
description="List of workflows to run, in execution order. This always overrides any built-in workflow methods.",
196+
default=graphrag_config_defaults.workflows,
197+
)
198+
"""List of workflows to run, in execution order."""
182199

183200
embed_text: TextEmbeddingConfig = Field(
184201
description="Text embedding configuration.",
185202
default=TextEmbeddingConfig(),
186203
)
187204
"""Text embedding configuration."""
188205

189-
chunks: ChunkingConfig = Field(
190-
description="The chunking configuration to use.",
191-
default=ChunkingConfig(),
192-
)
193-
"""The chunking configuration to use."""
194-
195-
snapshots: SnapshotsConfig = Field(
196-
description="The snapshots configuration to use.",
197-
default=SnapshotsConfig(),
198-
)
199-
"""The snapshots configuration to use."""
200-
201206
extract_graph: ExtractGraphConfig = Field(
202207
description="The entity extraction configuration to use.",
203208
default=ExtractGraphConfig(),
204209
)
205210
"""The entity extraction configuration to use."""
206211

212+
213+
summarize_descriptions: SummarizeDescriptionsConfig = Field(
214+
description="The description summarization configuration to use.",
215+
default=SummarizeDescriptionsConfig(),
216+
)
217+
"""The description summarization configuration to use."""
218+
207219
extract_graph_nlp: ExtractGraphNLPConfig = Field(
208220
description="The NLP-based graph extraction configuration to use.",
209221
default=ExtractGraphNLPConfig(),
210222
)
211223
"""The NLP-based graph extraction configuration to use."""
212224

213-
summarize_descriptions: SummarizeDescriptionsConfig = Field(
214-
description="The description summarization configuration to use.",
215-
default=SummarizeDescriptionsConfig(),
225+
prune_graph: PruneGraphConfig = Field(
226+
description="The graph pruning configuration to use.",
227+
default=PruneGraphConfig(),
216228
)
217-
"""The description summarization configuration to use."""
229+
"""The graph pruning configuration to use."""
218230

219-
community_reports: CommunityReportsConfig = Field(
220-
description="The community reports configuration to use.",
221-
default=CommunityReportsConfig(),
231+
cluster_graph: ClusterGraphConfig = Field(
232+
description="The cluster graph configuration to use.",
233+
default=ClusterGraphConfig(),
222234
)
223-
"""The community reports configuration to use."""
235+
"""The cluster graph configuration to use."""
224236

225237
extract_claims: ClaimExtractionConfig = Field(
226238
description="The claim extraction configuration to use.",
@@ -230,23 +242,29 @@ def _validate_input_pattern(self) -> None:
230242
)
231243
"""The claim extraction configuration to use."""
232244

233-
prune_graph: PruneGraphConfig = Field(
234-
description="The graph pruning configuration to use.",
235-
default=PruneGraphConfig(),
245+
community_reports: CommunityReportsConfig = Field(
246+
description="The community reports configuration to use.",
247+
default=CommunityReportsConfig(),
236248
)
237-
"""The graph pruning configuration to use."""
249+
"""The community reports configuration to use."""
238250

239-
cluster_graph: ClusterGraphConfig = Field(
240-
description="The cluster graph configuration to use.",
241-
default=ClusterGraphConfig(),
251+
embed_graph: EmbedGraphConfig = Field(
252+
description="Graph embedding configuration.",
253+
default=EmbedGraphConfig(),
242254
)
243-
"""The cluster graph configuration to use."""
255+
"""Graph Embedding configuration."""
244256

245257
umap: UmapConfig = Field(
246258
description="The UMAP configuration to use.", default=UmapConfig()
247259
)
248260
"""The UMAP configuration to use."""
249261

262+
snapshots: SnapshotsConfig = Field(
263+
description="The snapshots configuration to use.",
264+
default=SnapshotsConfig(),
265+
)
266+
"""The snapshots configuration to use."""
267+
250268
local_search: LocalSearchConfig = Field(
251269
description="The local search configuration.", default=LocalSearchConfig()
252270
)
@@ -267,21 +285,6 @@ def _validate_input_pattern(self) -> None:
267285
)
268286
"""The basic search configuration."""
269287

270-
vector_store: dict[str, VectorStoreConfig] = Field(
271-
description="The vector store configuration.",
272-
default_factory=lambda: {
273-
k: VectorStoreConfig(**asdict(v))
274-
for k, v in graphrag_config_defaults.vector_store.items()
275-
},
276-
)
277-
"""The vector store configuration."""
278-
279-
workflows: list[str] | None = Field(
280-
description="List of workflows to run, in execution order. This always overrides any built-in workflow methods.",
281-
default=graphrag_config_defaults.workflows,
282-
)
283-
"""List of workflows to run, in execution order."""
284-
285288
def _validate_vector_store_db_uri(self) -> None:
286289
"""Validate the vector store configuration."""
287290
for store in self.vector_store.values():

0 commit comments

Comments
 (0)