Skip to content

Commit 3b1e70c

Browse files
authored
Update config docs (2.1.0) (#1818)
* Align docs with config * Semver * Spelling * Format * Spelling
1 parent 813b4de commit 3b1e70c

File tree

10 files changed

+263
-237
lines changed

10 files changed

+263
-237
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "Align config classes and docs better."
4+
}

docs/config/yaml.md

Lines changed: 118 additions & 96 deletions
Large diffs are not rendered by default.

graphrag/config/init_content.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,6 @@
5555
tokens_per_minute: 0 # set to 0 to disable rate limiting
5656
requests_per_minute: 0 # set to 0 to disable rate limiting
5757
58-
vector_store:
59-
{defs.DEFAULT_VECTOR_STORE_ID}:
60-
type: {vector_store_defaults.type}
61-
db_uri: {vector_store_defaults.db_uri}
62-
container_name: {vector_store_defaults.container_name}
63-
overwrite: {vector_store_defaults.overwrite}
64-
65-
embed_text:
66-
model_id: {graphrag_config_defaults.embed_text.model_id}
67-
vector_store_id: {graphrag_config_defaults.embed_text.vector_store_id}
68-
6958
### Input settings ###
7059
7160
input:
@@ -78,10 +67,14 @@
7867
overlap: {graphrag_config_defaults.chunks.overlap}
7968
group_by_columns: [{",".join(graphrag_config_defaults.chunks.group_by_columns)}]
8069
81-
### Output settings ###
70+
### Output/storage settings ###
8271
## If blob storage is specified in the following four sections,
8372
## connection_string and container_name must be provided
8473
74+
output:
75+
type: {graphrag_config_defaults.output.type.value} # [file, blob, cosmosdb]
76+
base_dir: "{graphrag_config_defaults.output.base_dir}"
77+
8578
cache:
8679
type: {graphrag_config_defaults.cache.type.value} # [file, blob, cosmosdb]
8780
base_dir: "{graphrag_config_defaults.cache.base_dir}"
@@ -90,12 +83,19 @@
9083
type: {graphrag_config_defaults.reporting.type.value} # [file, blob, cosmosdb]
9184
base_dir: "{graphrag_config_defaults.reporting.base_dir}"
9285
93-
output:
94-
type: {graphrag_config_defaults.output.type.value} # [file, blob, cosmosdb]
95-
base_dir: "{graphrag_config_defaults.output.base_dir}"
86+
vector_store:
87+
{defs.DEFAULT_VECTOR_STORE_ID}:
88+
type: {vector_store_defaults.type}
89+
db_uri: {vector_store_defaults.db_uri}
90+
container_name: {vector_store_defaults.container_name}
91+
overwrite: {vector_store_defaults.overwrite}
9692
9793
### Workflow settings ###
9894
95+
embed_text:
96+
model_id: {graphrag_config_defaults.embed_text.model_id}
97+
vector_store_id: {graphrag_config_defaults.embed_text.vector_store_id}
98+
9999
extract_graph:
100100
model_id: {graphrag_config_defaults.extract_graph.model_id}
101101
prompt: "prompts/extract_graph.txt"
@@ -111,6 +111,9 @@
111111
text_analyzer:
112112
extractor_type: {graphrag_config_defaults.extract_graph_nlp.text_analyzer.extractor_type.value} # [regex_english, syntactic_parser, cfg]
113113
114+
cluster_graph:
115+
max_cluster_size: {graphrag_config_defaults.cluster_graph.max_cluster_size}
116+
114117
extract_claims:
115118
enabled: false
116119
model_id: {graphrag_config_defaults.extract_claims.model_id}
@@ -125,9 +128,6 @@
125128
max_length: {graphrag_config_defaults.community_reports.max_length}
126129
max_input_length: {graphrag_config_defaults.community_reports.max_input_length}
127130
128-
cluster_graph:
129-
max_cluster_size: {graphrag_config_defaults.cluster_graph.max_cluster_size}
130-
131131
embed_graph:
132132
enabled: false # if true, will generate node2vec embeddings for nodes
133133

graphrag/config/models/community_reports_config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
class CommunityReportsConfig(BaseModel):
1515
"""Configuration section for community reports."""
1616

17+
model_id: str = Field(
18+
description="The model ID to use for community reports.",
19+
default=graphrag_config_defaults.community_reports.model_id,
20+
)
1721
graph_prompt: str | None = Field(
1822
description="The community report extraction prompt to use for graph-based summarization.",
1923
default=graphrag_config_defaults.community_reports.graph_prompt,
@@ -34,10 +38,6 @@ class CommunityReportsConfig(BaseModel):
3438
description="The override strategy to use.",
3539
default=graphrag_config_defaults.community_reports.strategy,
3640
)
37-
model_id: str = Field(
38-
description="The model ID to use for community reports.",
39-
default=graphrag_config_defaults.community_reports.model_id,
40-
)
4141

4242
def resolved_strategy(
4343
self, root_dir: str, model_config: LanguageModelConfig

graphrag/config/models/extract_claims_config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ class ClaimExtractionConfig(BaseModel):
1818
description="Whether claim extraction is enabled.",
1919
default=graphrag_config_defaults.extract_claims.enabled,
2020
)
21+
model_id: str = Field(
22+
description="The model ID to use for claim extraction.",
23+
default=graphrag_config_defaults.extract_claims.model_id,
24+
)
2125
prompt: str | None = Field(
2226
description="The claim extraction prompt to use.",
2327
default=graphrag_config_defaults.extract_claims.prompt,
@@ -38,10 +42,6 @@ class ClaimExtractionConfig(BaseModel):
3842
default=graphrag_config_defaults.extract_claims.encoding_model,
3943
description="The encoding model to use.",
4044
)
41-
model_id: str = Field(
42-
description="The model ID to use for claim extraction.",
43-
default=graphrag_config_defaults.extract_claims.model_id,
44-
)
4545

4646
def resolved_strategy(
4747
self, root_dir: str, model_config: LanguageModelConfig

graphrag/config/models/extract_graph_config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
class ExtractGraphConfig(BaseModel):
1515
"""Configuration section for entity extraction."""
1616

17+
model_id: str = Field(
18+
description="The model ID to use for text embeddings.",
19+
default=graphrag_config_defaults.extract_graph.model_id,
20+
)
1721
prompt: str | None = Field(
1822
description="The entity extraction prompt to use.",
1923
default=graphrag_config_defaults.extract_graph.prompt,
@@ -34,10 +38,6 @@ class ExtractGraphConfig(BaseModel):
3438
default=graphrag_config_defaults.extract_graph.encoding_model,
3539
description="The encoding model to use.",
3640
)
37-
model_id: str = Field(
38-
description="The model ID to use for text embeddings.",
39-
default=graphrag_config_defaults.extract_graph.model_id,
40-
)
4141

4242
def resolved_strategy(
4343
self, root_dir: str, model_config: LanguageModelConfig

graphrag/config/models/graph_rag_config.py

Lines changed: 69 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -89,20 +89,24 @@ def _validate_models(self) -> None:
8989
if defs.DEFAULT_EMBEDDING_MODEL_ID not in self.models:
9090
raise LanguageModelConfigMissingError(defs.DEFAULT_EMBEDDING_MODEL_ID)
9191

92-
reporting: ReportingConfig = Field(
93-
description="The reporting configuration.", default=ReportingConfig()
92+
input: InputConfig = Field(
93+
description="The input configuration.", default=InputConfig()
9494
)
95-
"""The reporting configuration."""
95+
"""The input configuration."""
9696

97-
def _validate_reporting_base_dir(self) -> None:
98-
"""Validate the reporting base directory."""
99-
if self.reporting.type == defs.ReportingType.file:
100-
if self.reporting.base_dir.strip() == "":
101-
msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
102-
raise ValueError(msg)
103-
self.reporting.base_dir = str(
104-
(Path(self.root_dir) / self.reporting.base_dir).resolve()
105-
)
97+
def _validate_input_pattern(self) -> None:
98+
"""Validate the input file pattern based on the specified type."""
99+
if len(self.input.file_pattern) == 0:
100+
if self.input.file_type == defs.InputFileType.text:
101+
self.input.file_pattern = ".*\\.txt$"
102+
else:
103+
self.input.file_pattern = f".*\\.{self.input.file_type.value}$"
104+
105+
chunks: ChunkingConfig = Field(
106+
description="The chunking configuration to use.",
107+
default=ChunkingConfig(),
108+
)
109+
"""The chunking configuration to use."""
106110

107111
output: OutputConfig = Field(
108112
description="The output configuration.",
@@ -161,66 +165,71 @@ def _validate_update_index_output_base_dir(self) -> None:
161165
)
162166
"""The cache configuration."""
163167

164-
input: InputConfig = Field(
165-
description="The input configuration.", default=InputConfig()
168+
reporting: ReportingConfig = Field(
169+
description="The reporting configuration.", default=ReportingConfig()
166170
)
167-
"""The input configuration."""
171+
"""The reporting configuration."""
168172

169-
def _validate_input_pattern(self) -> None:
170-
"""Validate the input file pattern based on the specified type."""
171-
if len(self.input.file_pattern) == 0:
172-
if self.input.file_type == defs.InputFileType.text:
173-
self.input.file_pattern = ".*\\.txt$"
174-
else:
175-
self.input.file_pattern = f".*\\.{self.input.file_type.value}$"
173+
def _validate_reporting_base_dir(self) -> None:
174+
"""Validate the reporting base directory."""
175+
if self.reporting.type == defs.ReportingType.file:
176+
if self.reporting.base_dir.strip() == "":
177+
msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
178+
raise ValueError(msg)
179+
self.reporting.base_dir = str(
180+
(Path(self.root_dir) / self.reporting.base_dir).resolve()
181+
)
176182

177-
embed_graph: EmbedGraphConfig = Field(
178-
description="Graph embedding configuration.",
179-
default=EmbedGraphConfig(),
183+
vector_store: dict[str, VectorStoreConfig] = Field(
184+
description="The vector store configuration.",
185+
default_factory=lambda: {
186+
k: VectorStoreConfig(**asdict(v))
187+
for k, v in graphrag_config_defaults.vector_store.items()
188+
},
180189
)
181-
"""Graph Embedding configuration."""
190+
"""The vector store configuration."""
191+
192+
workflows: list[str] | None = Field(
193+
description="List of workflows to run, in execution order. This always overrides any built-in workflow methods.",
194+
default=graphrag_config_defaults.workflows,
195+
)
196+
"""List of workflows to run, in execution order."""
182197

183198
embed_text: TextEmbeddingConfig = Field(
184199
description="Text embedding configuration.",
185200
default=TextEmbeddingConfig(),
186201
)
187202
"""Text embedding configuration."""
188203

189-
chunks: ChunkingConfig = Field(
190-
description="The chunking configuration to use.",
191-
default=ChunkingConfig(),
192-
)
193-
"""The chunking configuration to use."""
194-
195-
snapshots: SnapshotsConfig = Field(
196-
description="The snapshots configuration to use.",
197-
default=SnapshotsConfig(),
198-
)
199-
"""The snapshots configuration to use."""
200-
201204
extract_graph: ExtractGraphConfig = Field(
202205
description="The entity extraction configuration to use.",
203206
default=ExtractGraphConfig(),
204207
)
205208
"""The entity extraction configuration to use."""
206209

210+
summarize_descriptions: SummarizeDescriptionsConfig = Field(
211+
description="The description summarization configuration to use.",
212+
default=SummarizeDescriptionsConfig(),
213+
)
214+
"""The description summarization configuration to use."""
215+
207216
extract_graph_nlp: ExtractGraphNLPConfig = Field(
208217
description="The NLP-based graph extraction configuration to use.",
209218
default=ExtractGraphNLPConfig(),
210219
)
211220
"""The NLP-based graph extraction configuration to use."""
212221

213-
summarize_descriptions: SummarizeDescriptionsConfig = Field(
214-
description="The description summarization configuration to use.",
215-
default=SummarizeDescriptionsConfig(),
222+
prune_graph: PruneGraphConfig = Field(
223+
description="The graph pruning configuration to use.",
224+
default=PruneGraphConfig(),
216225
)
217-
"""The description summarization configuration to use."""
226+
"""The graph pruning configuration to use."""
218227

219-
community_reports: CommunityReportsConfig = Field(
220-
description="The community reports configuration to use.",
221-
default=CommunityReportsConfig(),
228+
cluster_graph: ClusterGraphConfig = Field(
229+
description="The cluster graph configuration to use.",
230+
default=ClusterGraphConfig(),
222231
)
223-
"""The community reports configuration to use."""
232+
"""The cluster graph configuration to use."""
224233

225234
extract_claims: ClaimExtractionConfig = Field(
226235
description="The claim extraction configuration to use.",
@@ -230,23 +239,29 @@ def _validate_input_pattern(self) -> None:
230239
)
231240
"""The claim extraction configuration to use."""
232241

233-
prune_graph: PruneGraphConfig = Field(
234-
description="The graph pruning configuration to use.",
235-
default=PruneGraphConfig(),
242+
community_reports: CommunityReportsConfig = Field(
243+
description="The community reports configuration to use.",
244+
default=CommunityReportsConfig(),
236245
)
237-
"""The graph pruning configuration to use."""
246+
"""The community reports configuration to use."""
238247

239-
cluster_graph: ClusterGraphConfig = Field(
240-
description="The cluster graph configuration to use.",
241-
default=ClusterGraphConfig(),
248+
embed_graph: EmbedGraphConfig = Field(
249+
description="Graph embedding configuration.",
250+
default=EmbedGraphConfig(),
242251
)
243-
"""The cluster graph configuration to use."""
252+
"""Graph Embedding configuration."""
244253

245254
umap: UmapConfig = Field(
246255
description="The UMAP configuration to use.", default=UmapConfig()
247256
)
248257
"""The UMAP configuration to use."""
249258

259+
snapshots: SnapshotsConfig = Field(
260+
description="The snapshots configuration to use.",
261+
default=SnapshotsConfig(),
262+
)
263+
"""The snapshots configuration to use."""
264+
250265
local_search: LocalSearchConfig = Field(
251266
description="The local search configuration.", default=LocalSearchConfig()
252267
)
@@ -267,21 +282,6 @@ def _validate_input_pattern(self) -> None:
267282
)
268283
"""The basic search configuration."""
269284

270-
vector_store: dict[str, VectorStoreConfig] = Field(
271-
description="The vector store configuration.",
272-
default_factory=lambda: {
273-
k: VectorStoreConfig(**asdict(v))
274-
for k, v in graphrag_config_defaults.vector_store.items()
275-
},
276-
)
277-
"""The vector store configuration."""
278-
279-
workflows: list[str] | None = Field(
280-
description="List of workflows to run, in execution order. This always overrides any built-in workflow methods.",
281-
default=graphrag_config_defaults.workflows,
282-
)
283-
"""List of workflows to run, in execution order."""
284-
285285
def _validate_vector_store_db_uri(self) -> None:
286286
"""Validate the vector store configuration."""
287287
for store in self.vector_store.values():

0 commit comments

Comments
 (0)