Skip to content

Commit 0482878

Browse files
committed
fix config for reading chunk_overlap value
1 parent dd884c0 commit 0482878

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

graphrag/config/create_graphrag_config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,9 +411,12 @@ def hydrate_parallelization_params(
411411
reader.str(Fragment.encoding_model) or global_encoding_model
412412
)
413413
strategy = reader.str("strategy")
414+
overlap = reader.int("overlap")
415+
if overlap is None:
416+
overlap = defs.CHUNK_OVERLAP
414417
chunks_model = ChunkingConfig(
415418
size=reader.int("size") or defs.CHUNK_SIZE,
416-
overlap=reader.int("overlap") or defs.CHUNK_OVERLAP,
419+
overlap=overlap,
417420
group_by_columns=group_by_columns,
418421
encoding_model=encoding_model,
419422
strategy=ChunkStrategyType(strategy)

tests/unit/config/test_default_config.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,33 @@ def test_can_set_no_chunk_by_columns(self):
351351
parameters = create_graphrag_config()
352352
assert parameters.chunks.group_by_columns == []
353353

354+
@mock.patch.dict(
355+
os.environ,
356+
{"GRAPHRAG_CHUNK_OVERLAP": "0", "GRAPHRAG_API_KEY": "test"},
357+
clear=True,
358+
)
359+
def test_can_set_chunk_zero_overlap(self):
360+
parameters = create_graphrag_config()
361+
assert parameters.chunks.overlap == 0
362+
363+
@mock.patch.dict(
364+
os.environ,
365+
{"GRAPHRAG_API_KEY": "test"},
366+
clear=True,
367+
)
368+
def test_can_set_chunk_none_overlap(self):
369+
parameters = create_graphrag_config()
370+
assert parameters.chunks.overlap == 100
371+
372+
@mock.patch.dict(
373+
os.environ,
374+
{"GRAPHRAG_CHUNK_OVERLAP": "42", "GRAPHRAG_API_KEY": "test"},
375+
clear=True,
376+
)
377+
def test_can_set_chunk_value_overlap(self):
378+
parameters = create_graphrag_config()
379+
assert parameters.chunks.overlap == 42
380+
354381
def test_all_env_vars_is_accurate(self):
355382
env_var_docs_path = Path("docs/config/env_vars.md")
356383

@@ -533,6 +560,9 @@ def test_yaml_load_e2e():
533560
requests_per_minute: 900
534561
thread_count: 50
535562
concurrent_requests: 25
563+
564+
chunks:
565+
overlap: 0
536566
"""
537567
)
538568
# create default configuration pipeline parameters from the custom settings
@@ -544,6 +574,7 @@ def test_yaml_load_e2e():
544574
assert parameters.llm.api_base == "http://test"
545575
assert parameters.llm.api_version == "v1"
546576
assert parameters.llm.deployment_name == "test"
577+
assert parameters.chunks.overlap == 0
547578

548579
# generate the pipeline from the default parameters
549580
pipeline_config = create_pipeline_config(parameters, True)

0 commit comments

Comments
 (0)