Skip to content

Commit b32f403

Browse files
committed
Fix tokenizer removal from chunker
1 parent 90479c0 commit b32f403

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

packages/graphrag/graphrag/chunking/chunker_factory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def register_chunker(
3838

3939
def create_chunker(
4040
config: ChunkingConfig,
41-
encode: Callable[[str], list[int]] | None,
42-
decode: Callable[[list[int]], str] | None,
41+
encode: Callable[[str], list[int]] | None = None,
42+
decode: Callable[[list[int]], str] | None = None,
4343
) -> Chunker:
4444
"""Create a chunker implementation based on the given configuration.
4545

packages/graphrag/graphrag/prompt_tune/loader/input.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ async def load_docs_in_chunks(
6262
cache=NoopCache(),
6363
)
6464
tokenizer = get_tokenizer(embeddings_llm_settings)
65-
chunker = create_chunker(config.chunks, tokenizer)
65+
chunker = create_chunker(config.chunks, tokenizer.encode, tokenizer.decode)
6666
input_storage = create_storage(config.input.storage)
6767
input_reader = InputReaderFactory().create(
6868
config.input.file_type,

tests/unit/chunking/test_chunker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def test_basic_functionality(self, mock_get_encoding):
6666
strategy=ChunkStrategyType.Tokens,
6767
)
6868

69-
chunker = create_chunker(config, tokenizer=tokenizer)
69+
chunker = create_chunker(config, mock_encoder.encode, mock_encoder.decode)
7070
chunks = chunker.chunk(input)
7171

7272
assert len(chunks) > 0

0 commit comments

Comments
 (0)