microsoft
diff --git a/‎.semversioner/next-release/minor-20250813214258257076.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/minor-20250813214258257076.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎dictionary.txt‎
Lines changed: 3 additions & 0 deletions b/‎dictionary.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎graphrag/api/prompt_tune.py‎
Lines changed: 2 additions & 1 deletion b/‎graphrag/api/prompt_tune.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎graphrag/config/defaults.py‎
Lines changed: 34 additions & 0 deletions b/‎graphrag/config/defaults.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎graphrag/config/enums.py‎
Lines changed: 2 additions & 0 deletions b/‎graphrag/config/enums.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎graphrag/config/models/graph_rag_config.py‎
Lines changed: 53 additions & 0 deletions b/‎graphrag/config/models/graph_rag_config.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎graphrag/config/models/language_model_config.py‎
Lines changed: 67 additions & 3 deletions b/‎graphrag/config/models/language_model_config.py‎
Lines changed: 67 additions & 3 deletions
diff --git a/‎graphrag/factory/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎graphrag/factory/__init__.py‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,4 @@
+{
+  "type": "minor",
+  "description": "Add LiteLLM chat and embedding model providers."
+}
@@ -81,6 +81,7 @@ typer
 spacy
 kwargs
 ollama
+litellm
 
 # Library Methods
 iterrows
@@ -103,6 +104,8 @@ isin
 nocache
 nbconvert
 levelno
+acompletion
+aembedding
 
 # HTML
 nbsp
 
@@ -47,6 +47,7 @@
 from graphrag.prompt_tune.generator.persona import generate_persona
 from graphrag.prompt_tune.loader.input import load_docs_in_chunks
 from graphrag.prompt_tune.types import DocSelectionType
+from graphrag.tokenizer.get_tokenizer import get_tokenizer
 
 logger = logging.getLogger(__name__)
 
@@ -166,7 +167,7 @@ async def generate_indexing_prompts(
         examples=examples,
         language=language,
         json_mode=False,  # config.llm.model_supports_json should be used, but these prompts are used in non-json mode by the index engine
-        encoding_model=extract_graph_llm_settings.encoding_model,
+        tokenizer=get_tokenizer(model_config=extract_graph_llm_settings),
         max_token_count=max_tokens,
         min_examples_required=min_examples_required,
     )
 
@@ -3,6 +3,7 @@
 
 """Common default configuration values."""
 
+from collections.abc import Callable
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import ClassVar, Literal
@@ -23,6 +24,25 @@
 from graphrag.index.operations.build_noun_graph.np_extractors.stop_words import (
     EN_STOP_WORDS,
 )
+from graphrag.language_model.providers.litellm.services.rate_limiter.rate_limiter import (
+    RateLimiter,
+)
+from graphrag.language_model.providers.litellm.services.rate_limiter.static_rate_limiter import (
+    StaticRateLimiter,
+)
+from graphrag.language_model.providers.litellm.services.retry.exponential_retry import (
+    ExponentialRetry,
+)
+from graphrag.language_model.providers.litellm.services.retry.incremental_wait_retry import (
+    IncrementalWaitRetry,
+)
+from graphrag.language_model.providers.litellm.services.retry.native_wait_retry import (
+    NativeRetry,
+)
+from graphrag.language_model.providers.litellm.services.retry.random_wait_retry import (
+    RandomWaitRetry,
+)
+from graphrag.language_model.providers.litellm.services.retry.retry import Retry
 
 DEFAULT_OUTPUT_BASE_DIR = "output"
 DEFAULT_CHAT_MODEL_ID = "default_chat_model"
@@ -39,6 +59,18 @@
 COGNITIVE_SERVICES_AUDIENCE = "https://cognitiveservices.azure.com/.default"
 
 
+DEFAULT_RETRY_SERVICES: dict[str, Callable[..., Retry]] = {
+    "native": NativeRetry,
+    "exponential_backoff": ExponentialRetry,
+    "random_wait": RandomWaitRetry,
+    "incremental_wait": IncrementalWaitRetry,
+}
+
+DEFAULT_RATE_LIMITER_SERVICES: dict[str, Callable[..., RateLimiter]] = {
+    "static": StaticRateLimiter,
+}
+
+
 @dataclass
 class BasicSearchDefaults:
     """Default values for basic search."""
@@ -275,6 +307,7 @@ class LanguageModelDefaults:
 
     api_key: None = None
     auth_type: ClassVar[AuthType] = AuthType.APIKey
+    model_provider: str | None = None
     encoding_model: str = ""
     max_tokens: int | None = None
     temperature: float = 0
@@ -294,6 +327,7 @@ class LanguageModelDefaults:
     model_supports_json: None = None
     tokens_per_minute: Literal["auto"] = "auto"
     requests_per_minute: Literal["auto"] = "auto"
+    rate_limit_strategy: str | None = "static"
     retry_strategy: str = "native"
     max_retries: int = 10
     max_retry_wait: float = 10.0
 
@@ -86,10 +86,12 @@ class ModelType(str, Enum):
     # Embeddings
     OpenAIEmbedding = "openai_embedding"
     AzureOpenAIEmbedding = "azure_openai_embedding"
+    Embedding = "embedding"
 
     # Chat Completion
     OpenAIChat = "openai_chat"
     AzureOpenAIChat = "azure_openai_chat"
+    Chat = "chat"
 
     # Debug
     MockChat = "mock_chat"
 
@@ -37,6 +37,12 @@
 from graphrag.config.models.text_embedding_config import TextEmbeddingConfig
 from graphrag.config.models.umap_config import UmapConfig
 from graphrag.config.models.vector_store_config import VectorStoreConfig
+from graphrag.language_model.providers.litellm.services.rate_limiter.rate_limiter_factory import (
+    RateLimiterFactory,
+)
+from graphrag.language_model.providers.litellm.services.retry.retry_factory import (
+    RetryFactory,
+)
 
 
 class GraphRagConfig(BaseModel):
@@ -89,6 +95,47 @@ def _validate_models(self) -> None:
         if defs.DEFAULT_EMBEDDING_MODEL_ID not in self.models:
             raise LanguageModelConfigMissingError(defs.DEFAULT_EMBEDDING_MODEL_ID)
 
+    def _validate_retry_services(self) -> None:
+        """Validate the retry services configuration."""
+        retry_factory = RetryFactory()
+
+        for model_id, model in self.models.items():
+            if model.retry_strategy != "none":
+                if model.retry_strategy not in retry_factory:
+                    msg = f"Retry strategy '{model.retry_strategy}' for model '{model_id}' is not registered. Available strategies: {', '.join(retry_factory.keys())}"
+                    raise ValueError(msg)
+
+                _ = retry_factory.create(
+                    strategy=model.retry_strategy,
+                    max_attempts=model.max_retries,
+                    max_retry_wait=model.max_retry_wait,
+                )
+
+    def _validate_rate_limiter_services(self) -> None:
+        """Validate the rate limiter services configuration."""
+        rate_limiter_factory = RateLimiterFactory()
+
+        for model_id, model in self.models.items():
+            if model.rate_limit_strategy is not None:
+                if model.rate_limit_strategy not in rate_limiter_factory:
+                    msg = f"Rate Limiter strategy '{model.rate_limit_strategy}' for model '{model_id}' is not registered. Available strategies: {', '.join(rate_limiter_factory.keys())}"
+                    raise ValueError(msg)
+
+                rpm = (
+                    model.requests_per_minute
+                    if type(model.requests_per_minute) is int
+                    else None
+                )
+                tpm = (
+                    model.tokens_per_minute
+                    if type(model.tokens_per_minute) is int
+                    else None
+                )
+                if rpm is not None or tpm is not None:
+                    _ = rate_limiter_factory.create(
+                        strategy=model.rate_limit_strategy, rpm=rpm, tpm=tpm
+                    )
+
     input: InputConfig = Field(
         description="The input configuration.", default=InputConfig()
     )
@@ -300,6 +347,11 @@ def _validate_vector_store_db_uri(self) -> None:
                     raise ValueError(msg)
                 store.db_uri = str((Path(self.root_dir) / store.db_uri).resolve())
 
+    def _validate_factories(self) -> None:
+        """Validate the factories used in the configuration."""
+        self._validate_retry_services()
+        self._validate_rate_limiter_services()
+
     def get_language_model_config(self, model_id: str) -> LanguageModelConfig:
         """Get a model configuration by ID.
 
@@ -360,4 +412,5 @@ def _validate_model(self):
         self._validate_multi_output_base_dirs()
         self._validate_update_index_output_base_dir()
         self._validate_vector_store_db_uri()
+        self._validate_factories()
         return self
@@ -73,8 +73,11 @@ def _validate_auth_type(self) -> None:
         ConflictingSettingsError
             If the Azure authentication type conflicts with the model being used.
         """
-        if self.auth_type == AuthType.AzureManagedIdentity and (
-            self.type == ModelType.OpenAIChat or self.type == ModelType.OpenAIEmbedding
+        if (
+            self.auth_type == AuthType.AzureManagedIdentity
+            and self.type != ModelType.AzureOpenAIChat
+            and self.type != ModelType.AzureOpenAIEmbedding
+            and self.model_provider != "azure"  # indicates Litellm + AOI
         ):
             msg = f"auth_type of azure_managed_identity is not supported for model type {self.type}. Please rerun `graphrag init` and set the auth_type to api_key."
             raise ConflictingSettingsError(msg)
@@ -94,6 +97,27 @@ def _validate_type(self) -> None:
             msg = f"Model type {self.type} is not recognized, must be one of {ModelFactory.get_chat_models() + ModelFactory.get_embedding_models()}."
             raise KeyError(msg)
 
+    model_provider: str | None = Field(
+        description="The model provider to use.",
+        default=language_model_defaults.model_provider,
+    )
+
+    def _validate_model_provider(self) -> None:
+        """Validate the model provider.
+
+        Required when using Litellm.
+
+        Raises
+        ------
+        KeyError
+            If the model provider is not recognized.
+        """
+        if (self.type == ModelType.Chat or self.type == ModelType.Embedding) and (
+            self.model_provider is None or self.model_provider.strip() == ""
+        ):
+            msg = f"Model provider must be specified when using type == {self.type}."
+            raise KeyError(msg)
+
     model: str = Field(description="The LLM model to use.")
     encoding_model: str = Field(
         description="The encoding model to use",
@@ -103,12 +127,27 @@ def _validate_type(self) -> None:
     def _validate_encoding_model(self) -> None:
         """Validate the encoding model.
 
+        The default behavior is to use an encoding model that matches the LLM model.
+        LiteLLM supports 100+ models and their tokenization. There is no need to
+        set the encoding model when using the new LiteLLM provider as was done with fnllm provider.
+
+        Users can still manually specify a tiktoken based encoding model to use even with the LiteLLM provider
+        in which case the specified encoding model will be used regardless of the LLM model being used, even if
+        it is not an openai based model.
+
+        If not using LiteLLM provider, set the encoding model based on the LLM model name.
+        This is for backward compatibility with existing fnllm provider until fnllm is removed.
+
         Raises
         ------
         KeyError
             If the model name is not recognized.
         """
-        if self.encoding_model.strip() == "":
+        if (
+            self.type != ModelType.Chat
+            and self.type != ModelType.Embedding
+            and self.encoding_model.strip() == ""
+        ):
             self.encoding_model = tiktoken.encoding_name_for_model(self.model)
 
     api_base: str | None = Field(
@@ -129,6 +168,7 @@ def _validate_api_base(self) -> None:
         if (
             self.type == ModelType.AzureOpenAIChat
             or self.type == ModelType.AzureOpenAIEmbedding
+            or self.model_provider == "azure"  # indicates Litellm + AOI
         ) and (self.api_base is None or self.api_base.strip() == ""):
             raise AzureApiBaseMissingError(self.type)
 
@@ -150,6 +190,7 @@ def _validate_api_version(self) -> None:
         if (
             self.type == ModelType.AzureOpenAIChat
             or self.type == ModelType.AzureOpenAIEmbedding
+            or self.model_provider == "azure"  # indicates Litellm + AOI
         ) and (self.api_version is None or self.api_version.strip() == ""):
             raise AzureApiVersionMissingError(self.type)
 
@@ -171,6 +212,7 @@ def _validate_deployment_name(self) -> None:
         if (
             self.type == ModelType.AzureOpenAIChat
             or self.type == ModelType.AzureOpenAIEmbedding
+            or self.model_provider == "azure"  # indicates Litellm + AOI
         ) and (self.deployment_name is None or self.deployment_name.strip() == ""):
             raise AzureDeploymentNameMissingError(self.type)
 
@@ -212,6 +254,14 @@ def _validate_tokens_per_minute(self) -> None:
             msg = f"Tokens per minute must be a non zero positive number, 'auto' or null. Suggested value: {language_model_defaults.tokens_per_minute}."
             raise ValueError(msg)
 
+        if (
+            (self.type == ModelType.Chat or self.type == ModelType.Embedding)
+            and self.rate_limit_strategy is not None
+            and self.tokens_per_minute == "auto"
+        ):
+            msg = f"tokens_per_minute cannot be set to 'auto' when using type '{self.type}'. Please set it to a positive integer or null to disable."
+            raise ValueError(msg)
+
     requests_per_minute: int | Literal["auto"] | None = Field(
         description="The number of requests per minute to use for the LLM service.",
         default=language_model_defaults.requests_per_minute,
@@ -230,6 +280,19 @@ def _validate_requests_per_minute(self) -> None:
             msg = f"Requests per minute must be a non zero positive number, 'auto' or null. Suggested value: {language_model_defaults.requests_per_minute}."
             raise ValueError(msg)
 
+        if (
+            (self.type == ModelType.Chat or self.type == ModelType.Embedding)
+            and self.rate_limit_strategy is not None
+            and self.requests_per_minute == "auto"
+        ):
+            msg = f"requests_per_minute cannot be set to 'auto' when using type '{self.type}'. Please set it to a positive integer or null to disable."
+            raise ValueError(msg)
+
+    rate_limit_strategy: str | None = Field(
+        description="The rate limit strategy to use for the LLM service.",
+        default=language_model_defaults.rate_limit_strategy,
+    )
+
     retry_strategy: str = Field(
         description="The retry strategy to use for the LLM service.",
         default=language_model_defaults.retry_strategy,
@@ -318,6 +381,7 @@ def _validate_azure_settings(self) -> None:
     @model_validator(mode="after")
     def _validate_model(self):
         self._validate_type()
+        self._validate_model_provider()
         self._validate_auth_type()
         self._validate_api_key()
         self._validate_tokens_per_minute()
 
@@ -0,0 +1,4 @@
+# Copyright (c) 2025 Microsoft Corporation.
+# Licensed under the MIT License
+
+"""Factory module."""
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "type": "minor",
 +  "description": "Add LiteLLM chat and embedding model providers."
 +}