microsoft
diff --git a/‎packages/graphrag-common/graphrag_common/types/__init__.py‎
Lines changed: 0 additions & 8 deletions b/‎packages/graphrag-common/graphrag_common/types/__init__.py‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎packages/graphrag/graphrag/chunking/chunker_factory.py‎
Lines changed: 7 additions & 4 deletions b/‎packages/graphrag/graphrag/chunking/chunker_factory.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎packages/graphrag/graphrag/chunking/token_chunker.py‎
Lines changed: 8 additions & 12 deletions b/‎packages/graphrag/graphrag/chunking/token_chunker.py‎
Lines changed: 8 additions & 12 deletions
diff --git a/‎packages/graphrag/graphrag/index/operations/embed_text/embed_text.py‎
Lines changed: 1 addition & 1 deletion b/‎packages/graphrag/graphrag/index/operations/embed_text/embed_text.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/graphrag/graphrag/index/operations/embed_text/run_embed_text.py‎
Lines changed: 1 addition & 1 deletion b/‎packages/graphrag/graphrag/index/operations/embed_text/run_embed_text.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/graphrag/graphrag/index/operations/summarize_communities/build_mixed_context.py‎
Lines changed: 1 addition & 1 deletion b/‎packages/graphrag/graphrag/index/operations/summarize_communities/build_mixed_context.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/graphrag/graphrag/index/operations/summarize_communities/graph_context/context_builder.py‎
Lines changed: 1 addition & 1 deletion b/‎packages/graphrag/graphrag/index/operations/summarize_communities/graph_context/context_builder.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/graphrag/graphrag/index/operations/summarize_communities/graph_context/sort_context.py‎
Lines changed: 1 addition & 1 deletion b/‎packages/graphrag/graphrag/index/operations/summarize_communities/graph_context/sort_context.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/graphrag/graphrag/index/operations/summarize_communities/summarize_communities.py‎
Lines changed: 1 addition & 1 deletion b/‎packages/graphrag/graphrag/index/operations/summarize_communities/summarize_communities.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/graphrag/graphrag/index/operations/summarize_communities/text_unit_context/context_builder.py‎
Lines changed: 1 addition & 1 deletion b/‎packages/graphrag/graphrag/index/operations/summarize_communities/text_unit_context/context_builder.py‎
Lines changed: 1 addition & 1 deletion
@@ -6,7 +6,6 @@
 from collections.abc import Callable
 
 from graphrag_common.factory.factory import Factory, ServiceScope
-from graphrag_common.types.tokenizer import Tokenizer
 
 from graphrag.chunking.chunk_strategy_type import ChunkStrategyType
 from graphrag.chunking.chunker import Chunker
@@ -38,7 +37,9 @@ def register_chunker(
 
 
 def create_chunker(
-    config: ChunkingConfig, tokenizer: Tokenizer | None = None
+    config: ChunkingConfig,
+    encode: Callable[[str], list[int]] | None,
+    decode: Callable[[list[int]], str] | None,
 ) -> Chunker:
     """Create a chunker implementation based on the given configuration.
 
@@ -53,8 +54,10 @@ def create_chunker(
             The created chunker implementation.
     """
     config_model = config.model_dump()
-    if tokenizer is not None:
-        config_model["tokenizer"] = tokenizer
+    if encode is not None:
+        config_model["encode"] = encode
+    if decode is not None:
+        config_model["decode"] = decode
     chunker_strategy = config.strategy
 
     if chunker_strategy not in chunker_factory:
 
@@ -6,14 +6,8 @@
 from collections.abc import Callable
 from typing import Any
 
-from graphrag_common.types.tokenizer import Tokenizer
-
 from graphrag.chunking.chunker import Chunker
 
-EncodedText = list[int]
-DecodeFn = Callable[[EncodedText], str]
-EncodeFn = Callable[[str], EncodedText]
-
 
 class TokenChunker(Chunker):
     """A chunker that splits text into token-based chunks."""
@@ -22,31 +16,33 @@ def __init__(
         self,
         size: int,
         overlap: int,
-        tokenizer: Tokenizer,
+        encode: Callable[[str], list[int]],
+        decode: Callable[[list[int]], str],
         **kwargs: Any,
     ) -> None:
         """Create a token chunker instance."""
         self._size = size
         self._overlap = overlap
-        self._tokenizer = tokenizer
+        self._encode = encode
+        self._decode = decode
 
     def chunk(self, text: str) -> list[str]:
         """Chunk the text into token-based chunks."""
         return split_text_on_tokens(
             text,
             chunk_size=self._size,
             chunk_overlap=self._overlap,
-            encode=self._tokenizer.encode,
-            decode=self._tokenizer.decode,
+            encode=self._encode,
+            decode=self._decode,
         )
 
 
 def split_text_on_tokens(
     text: str,
     chunk_size: int,
     chunk_overlap: int,
-    encode: EncodeFn,
-    decode: DecodeFn,
+    encode: Callable[[str], list[int]],
+    decode: Callable[[list[int]], str],
 ) -> list[str]:
     """Split a single text and return chunks using the tokenizer."""
     result = []
 
@@ -7,11 +7,11 @@
 
 import numpy as np
 import pandas as pd
-from graphrag_common.types.tokenizer import Tokenizer
 
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
 from graphrag.index.operations.embed_text.run_embed_text import run_embed_text
 from graphrag.language_model.protocol.base import EmbeddingModel
+from graphrag.tokenizer.tokenizer import Tokenizer
 from graphrag.vector_stores.base import BaseVectorStore, VectorStoreDocument
 
 logger = logging.getLogger(__name__)
 
@@ -8,13 +8,13 @@
 from dataclasses import dataclass
 
 import numpy as np
-from graphrag_common.types.tokenizer import Tokenizer
 
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
 from graphrag.chunking.token_chunker import split_text_on_tokens
 from graphrag.index.utils.is_null import is_null
 from graphrag.language_model.protocol.base import EmbeddingModel
 from graphrag.logger.progress import ProgressTicker, progress_ticker
+from graphrag.tokenizer.tokenizer import Tokenizer
 
 logger = logging.getLogger(__name__)
 
 
@@ -4,12 +4,12 @@
 """A module containing build_mixed_context method definition."""
 
 import pandas as pd
-from graphrag_common.types.tokenizer import Tokenizer
 
 import graphrag.data_model.schemas as schemas
 from graphrag.index.operations.summarize_communities.graph_context.sort_context import (
     sort_context,
 )
+from graphrag.tokenizer.tokenizer import Tokenizer
 
 
 def build_mixed_context(
 
@@ -7,7 +7,6 @@
 from typing import cast
 
 import pandas as pd
-from graphrag_common.types.tokenizer import Tokenizer
 
 import graphrag.data_model.schemas as schemas
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
@@ -31,6 +30,7 @@
     where_column_equals,
 )
 from graphrag.logger.progress import progress_iterable
+from graphrag.tokenizer.tokenizer import Tokenizer
 
 logger = logging.getLogger(__name__)
 
 
@@ -3,9 +3,9 @@
 """Sort context by degree in descending order."""
 
 import pandas as pd
-from graphrag_common.types.tokenizer import Tokenizer
 
 import graphrag.data_model.schemas as schemas
+from graphrag.tokenizer.tokenizer import Tokenizer
 
 
 def sort_context(
 
@@ -7,7 +7,6 @@
 from collections.abc import Callable
 
 import pandas as pd
-from graphrag_common.types.tokenizer import Tokenizer
 
 import graphrag.data_model.schemas as schemas
 from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks
@@ -27,6 +26,7 @@
 from graphrag.index.utils.derive_from_rows import derive_from_rows
 from graphrag.language_model.protocol.base import ChatModel
 from graphrag.logger.progress import progress_ticker
+from graphrag.tokenizer.tokenizer import Tokenizer
 
 logger = logging.getLogger(__name__)
 
 
@@ -7,7 +7,6 @@
 from typing import cast
 
 import pandas as pd
-from graphrag_common.types.tokenizer import Tokenizer
 
 import graphrag.data_model.schemas as schemas
 from graphrag.index.operations.summarize_communities.build_mixed_context import (
@@ -19,6 +18,7 @@
 from graphrag.index.operations.summarize_communities.text_unit_context.sort_context import (
     sort_context,
 )
+from graphrag.tokenizer.tokenizer import Tokenizer
 
 logger = logging.getLogger(__name__)