|
31 | 31 | from graphrag.config.input_models.graphrag_config_input import GraphRagConfigInput |
32 | 32 | from graphrag.config.input_models.llm_config_input import LLMConfigInput |
33 | 33 | from graphrag.config.models.cache_config import CacheConfig |
34 | | -from graphrag.config.models.chunking_config import ChunkingConfig |
| 34 | +from graphrag.config.models.chunking_config import ChunkingConfig, ChunkStrategyType |
35 | 35 | from graphrag.config.models.claim_extraction_config import ClaimExtractionConfig |
36 | 36 | from graphrag.config.models.cluster_graph_config import ClusterGraphConfig |
37 | 37 | from graphrag.config.models.community_reports_config import CommunityReportsConfig |
@@ -318,13 +318,16 @@ def hydrate_parallelization_params( |
318 | 318 | reader.envvar_prefix(Section.node2vec), |
319 | 319 | reader.use(values.get("embed_graph")), |
320 | 320 | ): |
| 321 | + use_lcc = reader.bool("use_lcc") |
321 | 322 | embed_graph_model = EmbedGraphConfig( |
322 | 323 | enabled=reader.bool(Fragment.enabled) or defs.NODE2VEC_ENABLED, |
| 324 | + dimensions=reader.int("dimensions") or defs.NODE2VEC_DIMENSIONS, |
323 | 325 | num_walks=reader.int("num_walks") or defs.NODE2VEC_NUM_WALKS, |
324 | 326 | walk_length=reader.int("walk_length") or defs.NODE2VEC_WALK_LENGTH, |
325 | 327 | window_size=reader.int("window_size") or defs.NODE2VEC_WINDOW_SIZE, |
326 | 328 | iterations=reader.int("iterations") or defs.NODE2VEC_ITERATIONS, |
327 | 329 | random_seed=reader.int("random_seed") or defs.NODE2VEC_RANDOM_SEED, |
| 330 | + use_lcc=use_lcc if use_lcc is not None else defs.USE_LCC, |
328 | 331 | ) |
329 | 332 | with reader.envvar_prefix(Section.input), reader.use(values.get("input")): |
330 | 333 | input_type = reader.str("type") |
@@ -412,12 +415,15 @@ def hydrate_parallelization_params( |
412 | 415 | encoding_model = ( |
413 | 416 | reader.str(Fragment.encoding_model) or global_encoding_model |
414 | 417 | ) |
415 | | - |
| 418 | + strategy = reader.str("strategy") |
416 | 419 | chunks_model = ChunkingConfig( |
417 | 420 | size=reader.int("size") or defs.CHUNK_SIZE, |
418 | 421 | overlap=reader.int("overlap") or defs.CHUNK_OVERLAP, |
419 | 422 | group_by_columns=group_by_columns, |
420 | 423 | encoding_model=encoding_model, |
| 424 | + strategy=ChunkStrategyType(strategy) |
| 425 | + if strategy |
| 426 | + else ChunkStrategyType.tokens, |
421 | 427 | ) |
422 | 428 | with ( |
423 | 429 | reader.envvar_prefix(Section.snapshot), |
@@ -522,8 +528,13 @@ def hydrate_parallelization_params( |
522 | 528 | ) |
523 | 529 |
|
524 | 530 | with reader.use(values.get("cluster_graph")): |
| 531 | + use_lcc = reader.bool("use_lcc") |
| 532 | + seed = reader.int("seed") |
525 | 533 | cluster_graph_model = ClusterGraphConfig( |
526 | | - max_cluster_size=reader.int("max_cluster_size") or defs.MAX_CLUSTER_SIZE |
| 534 | + max_cluster_size=reader.int("max_cluster_size") |
| 535 | + or defs.MAX_CLUSTER_SIZE, |
| 536 | + use_lcc=use_lcc if use_lcc is not None else defs.USE_LCC, |
| 537 | + seed=seed if seed is not None else defs.CLUSTER_GRAPH_SEED, |
527 | 538 | ) |
528 | 539 |
|
529 | 540 | with ( |
|
0 commit comments