@@ -89,20 +89,24 @@ def _validate_models(self) -> None:
8989 if defs .DEFAULT_EMBEDDING_MODEL_ID not in self .models :
9090 raise LanguageModelConfigMissingError (defs .DEFAULT_EMBEDDING_MODEL_ID )
9191
92- reporting : ReportingConfig = Field (
93- description = "The reporting configuration." , default = ReportingConfig ()
92+ input : InputConfig = Field (
93+ description = "The input configuration." , default = InputConfig ()
9494 )
95- """The reporting configuration."""
95+ """The input configuration."""
9696
97- def _validate_reporting_base_dir (self ) -> None :
98- """Validate the reporting base directory."""
99- if self .reporting .type == defs .ReportingType .file :
100- if self .reporting .base_dir .strip () == "" :
101- msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
102- raise ValueError (msg )
103- self .reporting .base_dir = str (
104- (Path (self .root_dir ) / self .reporting .base_dir ).resolve ()
105- )
97+ def _validate_input_pattern (self ) -> None :
98+ """Validate the input file pattern based on the specified type."""
99+ if len (self .input .file_pattern ) == 0 :
100+ if self .input .file_type == defs .InputFileType .text :
101+ self .input .file_pattern = ".*\\ .txt$"
102+ else :
103+ self .input .file_pattern = f".*\\ .{ self .input .file_type .value } $"
104+
105+ chunks : ChunkingConfig = Field (
106+ description = "The chunking configuration to use." ,
107+ default = ChunkingConfig (),
108+ )
109+ """The chunking configuration to use."""
106110
107111 output : OutputConfig = Field (
108112 description = "The output configuration." ,
@@ -161,66 +165,71 @@ def _validate_update_index_output_base_dir(self) -> None:
161165 )
162166 """The cache configuration."""
163167
164- input : InputConfig = Field (
165- description = "The input configuration." , default = InputConfig ()
168+ reporting : ReportingConfig = Field (
169+ description = "The reporting configuration." , default = ReportingConfig ()
166170 )
167- """The input configuration."""
171+ """The reporting configuration."""
168172
169- def _validate_input_pattern (self ) -> None :
170- """Validate the input file pattern based on the specified type."""
171- if len (self .input .file_pattern ) == 0 :
172- if self .input .file_type == defs .InputFileType .text :
173- self .input .file_pattern = ".*\\ .txt$"
174- else :
175- self .input .file_pattern = f".*\\ .{ self .input .file_type .value } $"
173+ def _validate_reporting_base_dir (self ) -> None :
174+ """Validate the reporting base directory."""
175+ if self .reporting .type == defs .ReportingType .file :
176+ if self .reporting .base_dir .strip () == "" :
177+ msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
178+ raise ValueError (msg )
179+ self .reporting .base_dir = str (
180+ (Path (self .root_dir ) / self .reporting .base_dir ).resolve ()
181+ )
176182
177- embed_graph : EmbedGraphConfig = Field (
178- description = "Graph embedding configuration." ,
179- default = EmbedGraphConfig (),
183+ vector_store : dict [str , VectorStoreConfig ] = Field (
184+ description = "The vector store configuration." ,
185+ default_factory = lambda : {
186+ k : VectorStoreConfig (** asdict (v ))
187+ for k , v in graphrag_config_defaults .vector_store .items ()
188+ },
180189 )
181- """Graph Embedding configuration."""
190+ """The vector store configuration."""
191+
192+ workflows : list [str ] | None = Field (
193+ description = "List of workflows to run, in execution order. This always overrides any built-in workflow methods." ,
194+ default = graphrag_config_defaults .workflows ,
195+ )
196+ """List of workflows to run, in execution order."""
182197
183198 embed_text : TextEmbeddingConfig = Field (
184199 description = "Text embedding configuration." ,
185200 default = TextEmbeddingConfig (),
186201 )
187202 """Text embedding configuration."""
188203
189- chunks : ChunkingConfig = Field (
190- description = "The chunking configuration to use." ,
191- default = ChunkingConfig (),
192- )
193- """The chunking configuration to use."""
194-
195- snapshots : SnapshotsConfig = Field (
196- description = "The snapshots configuration to use." ,
197- default = SnapshotsConfig (),
198- )
199- """The snapshots configuration to use."""
200-
201204 extract_graph : ExtractGraphConfig = Field (
202205 description = "The entity extraction configuration to use." ,
203206 default = ExtractGraphConfig (),
204207 )
205208 """The entity extraction configuration to use."""
206209
210+ summarize_descriptions : SummarizeDescriptionsConfig = Field (
211+ description = "The description summarization configuration to use." ,
212+ default = SummarizeDescriptionsConfig (),
213+ )
214+ """The description summarization configuration to use."""
215+
207216 extract_graph_nlp : ExtractGraphNLPConfig = Field (
208217 description = "The NLP-based graph extraction configuration to use." ,
209218 default = ExtractGraphNLPConfig (),
210219 )
211220 """The NLP-based graph extraction configuration to use."""
212221
213- summarize_descriptions : SummarizeDescriptionsConfig = Field (
214- description = "The description summarization configuration to use." ,
215- default = SummarizeDescriptionsConfig (),
222+ prune_graph : PruneGraphConfig = Field (
223+ description = "The graph pruning configuration to use." ,
224+ default = PruneGraphConfig (),
216225 )
217- """The description summarization configuration to use."""
226+ """The graph pruning configuration to use."""
218227
219- community_reports : CommunityReportsConfig = Field (
220- description = "The community reports configuration to use." ,
221- default = CommunityReportsConfig (),
228+ cluster_graph : ClusterGraphConfig = Field (
229+ description = "The cluster graph configuration to use." ,
230+ default = ClusterGraphConfig (),
222231 )
223- """The community reports configuration to use."""
232+ """The cluster graph configuration to use."""
224233
225234 extract_claims : ClaimExtractionConfig = Field (
226235 description = "The claim extraction configuration to use." ,
@@ -230,23 +239,29 @@ def _validate_input_pattern(self) -> None:
230239 )
231240 """The claim extraction configuration to use."""
232241
233- prune_graph : PruneGraphConfig = Field (
234- description = "The graph pruning configuration to use." ,
235- default = PruneGraphConfig (),
242+ community_reports : CommunityReportsConfig = Field (
243+ description = "The community reports configuration to use." ,
244+ default = CommunityReportsConfig (),
236245 )
237- """The graph pruning configuration to use."""
246+ """The community reports configuration to use."""
238247
239- cluster_graph : ClusterGraphConfig = Field (
240- description = "The cluster graph configuration to use ." ,
241- default = ClusterGraphConfig (),
248+ embed_graph : EmbedGraphConfig = Field (
249+ description = "Graph embedding configuration." ,
250+ default = EmbedGraphConfig (),
242251 )
243- """The cluster graph configuration to use ."""
252+ """Graph Embedding configuration."""
244253
245254 umap : UmapConfig = Field (
246255 description = "The UMAP configuration to use." , default = UmapConfig ()
247256 )
248257 """The UMAP configuration to use."""
249258
259+ snapshots : SnapshotsConfig = Field (
260+ description = "The snapshots configuration to use." ,
261+ default = SnapshotsConfig (),
262+ )
263+ """The snapshots configuration to use."""
264+
250265 local_search : LocalSearchConfig = Field (
251266 description = "The local search configuration." , default = LocalSearchConfig ()
252267 )
@@ -267,21 +282,6 @@ def _validate_input_pattern(self) -> None:
267282 )
268283 """The basic search configuration."""
269284
270- vector_store : dict [str , VectorStoreConfig ] = Field (
271- description = "The vector store configuration." ,
272- default_factory = lambda : {
273- k : VectorStoreConfig (** asdict (v ))
274- for k , v in graphrag_config_defaults .vector_store .items ()
275- },
276- )
277- """The vector store configuration."""
278-
279- workflows : list [str ] | None = Field (
280- description = "List of workflows to run, in execution order. This always overrides any built-in workflow methods." ,
281- default = graphrag_config_defaults .workflows ,
282- )
283- """List of workflows to run, in execution order."""
284-
285285 def _validate_vector_store_db_uri (self ) -> None :
286286 """Validate the vector store configuration."""
287287 for store in self .vector_store .values ():
0 commit comments