@@ -89,20 +89,25 @@ def _validate_models(self) -> None:
8989 if defs .DEFAULT_EMBEDDING_MODEL_ID not in self .models :
9090 raise LanguageModelConfigMissingError (defs .DEFAULT_EMBEDDING_MODEL_ID )
9191
92- reporting : ReportingConfig = Field (
93- description = "The reporting configuration." , default = ReportingConfig ()
92+
93+ input : InputConfig = Field (
94+ description = "The input configuration." , default = InputConfig ()
9495 )
95- """The reporting configuration."""
96+ """The input configuration."""
9697
97- def _validate_reporting_base_dir (self ) -> None :
98- """Validate the reporting base directory."""
99- if self .reporting .type == defs .ReportingType .file :
100- if self .reporting .base_dir .strip () == "" :
101- msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
102- raise ValueError (msg )
103- self .reporting .base_dir = str (
104- (Path (self .root_dir ) / self .reporting .base_dir ).resolve ()
105- )
98+ def _validate_input_pattern (self ) -> None :
99+ """Validate the input file pattern based on the specified type."""
100+ if len (self .input .file_pattern ) == 0 :
101+ if self .input .file_type == defs .InputFileType .text :
102+ self .input .file_pattern = ".*\\ .txt$"
103+ else :
104+ self .input .file_pattern = f".*\\ .{ self .input .file_type .value } $"
105+
106+ chunks : ChunkingConfig = Field (
107+ description = "The chunking configuration to use." ,
108+ default = ChunkingConfig (),
109+ )
110+ """The chunking configuration to use."""
106111
107112 output : OutputConfig = Field (
108113 description = "The output configuration." ,
@@ -120,6 +125,7 @@ def _validate_output_base_dir(self) -> None:
120125 (Path (self .root_dir ) / self .output .base_dir ).resolve ()
121126 )
122127
128+
123129 outputs : dict [str , OutputConfig ] | None = Field (
124130 description = "A list of output configurations used for multi-index query." ,
125131 default = graphrag_config_defaults .outputs ,
@@ -161,66 +167,72 @@ def _validate_update_index_output_base_dir(self) -> None:
161167 )
162168 """The cache configuration."""
163169
164- input : InputConfig = Field (
165- description = "The input configuration." , default = InputConfig ()
170+ reporting : ReportingConfig = Field (
171+ description = "The reporting configuration." , default = ReportingConfig ()
166172 )
167- """The input configuration."""
173+ """The reporting configuration."""
168174
169- def _validate_input_pattern (self ) -> None :
170- """Validate the input file pattern based on the specified type."""
171- if len (self .input .file_pattern ) == 0 :
172- if self .input .file_type == defs .InputFileType .text :
173- self .input .file_pattern = ".*\\ .txt$"
174- else :
175- self .input .file_pattern = f".*\\ .{ self .input .file_type .value } $"
175+ def _validate_reporting_base_dir (self ) -> None :
176+ """Validate the reporting base directory."""
177+ if self .reporting .type == defs .ReportingType .file :
178+ if self .reporting .base_dir .strip () == "" :
179+ msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
180+ raise ValueError (msg )
181+ self .reporting .base_dir = str (
182+ (Path (self .root_dir ) / self .reporting .base_dir ).resolve ()
183+ )
176184
177- embed_graph : EmbedGraphConfig = Field (
178- description = "Graph embedding configuration." ,
179- default = EmbedGraphConfig (),
185+ vector_store : dict [str , VectorStoreConfig ] = Field (
186+ description = "The vector store configuration." ,
187+ default_factory = lambda : {
188+ k : VectorStoreConfig (** asdict (v ))
189+ for k , v in graphrag_config_defaults .vector_store .items ()
190+ },
180191 )
181- """Graph Embedding configuration."""
192+ """The vector store configuration."""
193+
194+ workflows : list [str ] | None = Field (
195+ description = "List of workflows to run, in execution order. This always overrides any built-in workflow methods." ,
196+ default = graphrag_config_defaults .workflows ,
197+ )
198+ """List of workflows to run, in execution order."""
182199
183200 embed_text : TextEmbeddingConfig = Field (
184201 description = "Text embedding configuration." ,
185202 default = TextEmbeddingConfig (),
186203 )
187204 """Text embedding configuration."""
188205
189- chunks : ChunkingConfig = Field (
190- description = "The chunking configuration to use." ,
191- default = ChunkingConfig (),
192- )
193- """The chunking configuration to use."""
194-
195- snapshots : SnapshotsConfig = Field (
196- description = "The snapshots configuration to use." ,
197- default = SnapshotsConfig (),
198- )
199- """The snapshots configuration to use."""
200-
201206 extract_graph : ExtractGraphConfig = Field (
202207 description = "The entity extraction configuration to use." ,
203208 default = ExtractGraphConfig (),
204209 )
205210 """The entity extraction configuration to use."""
206211
212+
213+ summarize_descriptions : SummarizeDescriptionsConfig = Field (
214+ description = "The description summarization configuration to use." ,
215+ default = SummarizeDescriptionsConfig (),
216+ )
217+ """The description summarization configuration to use."""
218+
207219 extract_graph_nlp : ExtractGraphNLPConfig = Field (
208220 description = "The NLP-based graph extraction configuration to use." ,
209221 default = ExtractGraphNLPConfig (),
210222 )
211223 """The NLP-based graph extraction configuration to use."""
212224
213- summarize_descriptions : SummarizeDescriptionsConfig = Field (
214- description = "The description summarization configuration to use." ,
215- default = SummarizeDescriptionsConfig (),
225+ prune_graph : PruneGraphConfig = Field (
226+ description = "The graph pruning configuration to use." ,
227+ default = PruneGraphConfig (),
216228 )
217- """The description summarization configuration to use."""
229+ """The graph pruning configuration to use."""
218230
219- community_reports : CommunityReportsConfig = Field (
220- description = "The community reports configuration to use." ,
221- default = CommunityReportsConfig (),
231+ cluster_graph : ClusterGraphConfig = Field (
232+ description = "The cluster graph configuration to use." ,
233+ default = ClusterGraphConfig (),
222234 )
223- """The community reports configuration to use."""
235+ """The cluster graph configuration to use."""
224236
225237 extract_claims : ClaimExtractionConfig = Field (
226238 description = "The claim extraction configuration to use." ,
@@ -230,23 +242,29 @@ def _validate_input_pattern(self) -> None:
230242 )
231243 """The claim extraction configuration to use."""
232244
233- prune_graph : PruneGraphConfig = Field (
234- description = "The graph pruning configuration to use." ,
235- default = PruneGraphConfig (),
245+ community_reports : CommunityReportsConfig = Field (
246+ description = "The community reports configuration to use." ,
247+ default = CommunityReportsConfig (),
236248 )
237- """The graph pruning configuration to use."""
249+ """The community reports configuration to use."""
238250
239- cluster_graph : ClusterGraphConfig = Field (
240- description = "The cluster graph configuration to use ." ,
241- default = ClusterGraphConfig (),
251+ embed_graph : EmbedGraphConfig = Field (
252+ description = "Graph embedding configuration." ,
253+ default = EmbedGraphConfig (),
242254 )
243- """The cluster graph configuration to use ."""
255+ """Graph Embedding configuration."""
244256
245257 umap : UmapConfig = Field (
246258 description = "The UMAP configuration to use." , default = UmapConfig ()
247259 )
248260 """The UMAP configuration to use."""
249261
262+ snapshots : SnapshotsConfig = Field (
263+ description = "The snapshots configuration to use." ,
264+ default = SnapshotsConfig (),
265+ )
266+ """The snapshots configuration to use."""
267+
250268 local_search : LocalSearchConfig = Field (
251269 description = "The local search configuration." , default = LocalSearchConfig ()
252270 )
@@ -267,21 +285,6 @@ def _validate_input_pattern(self) -> None:
267285 )
268286 """The basic search configuration."""
269287
270- vector_store : dict [str , VectorStoreConfig ] = Field (
271- description = "The vector store configuration." ,
272- default_factory = lambda : {
273- k : VectorStoreConfig (** asdict (v ))
274- for k , v in graphrag_config_defaults .vector_store .items ()
275- },
276- )
277- """The vector store configuration."""
278-
279- workflows : list [str ] | None = Field (
280- description = "List of workflows to run, in execution order. This always overrides any built-in workflow methods." ,
281- default = graphrag_config_defaults .workflows ,
282- )
283- """List of workflows to run, in execution order."""
284-
285288 def _validate_vector_store_db_uri (self ) -> None :
286289 """Validate the vector store configuration."""
287290 for store in self .vector_store .values ():
0 commit comments