microsoft
diff --git a/‎.semversioner/next-release/patch-20250213164253709667.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20250213164253709667.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎graphrag/api/prompt_tune.py‎
Lines changed: 8 additions & 1 deletion b/‎graphrag/api/prompt_tune.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎graphrag/config/defaults.py‎
Lines changed: 2 additions & 5 deletions b/‎graphrag/config/defaults.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎graphrag/config/init_content.py‎
Lines changed: 24 additions & 15 deletions b/‎graphrag/config/init_content.py‎
Lines changed: 24 additions & 15 deletions
diff --git a/‎graphrag/config/models/community_reports_config.py‎
Lines changed: 1 addition & 2 deletions b/‎graphrag/config/models/community_reports_config.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎graphrag/config/models/extract_claims_config.py‎
Lines changed: 1 addition & 2 deletions b/‎graphrag/config/models/extract_claims_config.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎graphrag/config/models/extract_graph_config.py‎
Lines changed: 1 addition & 2 deletions b/‎graphrag/config/models/extract_graph_config.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎graphrag/config/models/extract_graph_nlp_config.py‎
Lines changed: 2 additions & 2 deletions b/‎graphrag/config/models/extract_graph_nlp_config.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎graphrag/config/models/language_model_config.py‎
Lines changed: 5 additions & 13 deletions b/‎graphrag/config/models/language_model_config.py‎
Lines changed: 5 additions & 13 deletions
diff --git a/‎graphrag/config/models/summarize_descriptions_config.py‎
Lines changed: 1 addition & 2 deletions b/‎graphrag/config/models/summarize_descriptions_config.py‎
Lines changed: 1 addition & 2 deletions
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "add dynamic retry logic."
+}
@@ -13,6 +13,7 @@
 
 from pydantic import PositiveInt, validate_call
 
+import graphrag.config.defaults as defs
 from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks
 from graphrag.config.models.graph_rag_config import GraphRagConfig
 from graphrag.index.llm.load_llm import load_llm
@@ -95,8 +96,14 @@ async def generate_indexing_prompts(
     )
 
     # Create LLM from config
-    # TODO: Expose way to specify Prompt Tuning model ID through config
+    # TODO: Expose a way to specify Prompt Tuning model ID through config
     default_llm_settings = config.get_language_model_config(PROMPT_TUNING_MODEL_ID)
+
+    # if max_retries is not set, inject a dynamically assigned value based on the number of expected LLM calls
+    # to be made or fallback to a default value in the worst case
+    if default_llm_settings.max_retries == -1:
+        default_llm_settings.max_retries = min(len(doc_list), defs.LLM_MAX_RETRIES)
+
     llm = load_llm(
         "prompt_tuning",
         default_llm_settings,
 
@@ -24,7 +24,7 @@
 DEFAULT_EMBEDDING_MODEL_ID = "default_embedding_model"
 ASYNC_MODE = AsyncType.Threaded
 ENCODING_MODEL = "cl100k_base"
-AZURE_AUDIENCE = "https://cognitiveservices.azure.com/.default"
+COGNITIVE_SERVICES_AUDIENCE = "https://cognitiveservices.azure.com/.default"
 AUTH_TYPE = AuthType.APIKey
 #
 # LLM Parameters
@@ -39,15 +39,12 @@
 LLM_REQUEST_TIMEOUT = 180.0
 LLM_TOKENS_PER_MINUTE = 50_000
 LLM_REQUESTS_PER_MINUTE = 1_000
+RETRY_STRATEGY = "native"
 LLM_MAX_RETRIES = 10
 LLM_MAX_RETRY_WAIT = 10.0
 LLM_PRESENCE_PENALTY = 0.0
-LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION = True
 LLM_CONCURRENT_REQUESTS = 25
 
-PARALLELIZATION_STAGGER = 0.3
-PARALLELIZATION_NUM_THREADS = 50
-
 #
 # Text embedding
 #
 
@@ -14,32 +14,41 @@
 
 models:
   {defs.DEFAULT_CHAT_MODEL_ID}:
-    api_key: ${{GRAPHRAG_API_KEY}} # set this in the generated .env file
     type: {defs.LLM_TYPE.value} # or azure_openai_chat
+    # api_base: https://<instance>.openai.azure.com
+    # api_version: 2024-05-01-preview
     auth_type: {defs.AUTH_TYPE.value} # or azure_managed_identity
-    model: {defs.LLM_MODEL}
-    model_supports_json: true # recommended if this is available for your model.
-    parallelization_num_threads: {defs.PARALLELIZATION_NUM_THREADS}
-    parallelization_stagger: {defs.PARALLELIZATION_STAGGER}
-    async_mode: {defs.ASYNC_MODE.value} # or asyncio
+    api_key: ${{GRAPHRAG_API_KEY}} # set this in the generated .env file
     # audience: "https://cognitiveservices.azure.com/.default"
-    # api_base: https://<instance>.openai.azure.com
-    # api_version: 2024-02-15-preview
     # organization: <organization_id>
+    model: {defs.LLM_MODEL}
     # deployment_name: <azure_model_deployment_name>
+    # encoding_model: {defs.ENCODING_MODEL} # automatically set by tiktoken if left undefined
+    model_supports_json: true # recommended if this is available for your model.
+    concurrent_requests: {defs.LLM_CONCURRENT_REQUESTS} # max number of simultaneous LLM requests allowed
+    async_mode: {defs.ASYNC_MODE.value} # or asyncio
+    retry_strategy: native
+    max_retries: -1                   # set to -1 for dynamic retry logic (most optimal setting based on server response)
+    tokens_per_minute: 0              # set to 0 to disable rate limiting
+    requests_per_minute: 0            # set to 0 to disable rate limiting
   {defs.DEFAULT_EMBEDDING_MODEL_ID}:
-    api_key: ${{GRAPHRAG_API_KEY}}
     type: {defs.EMBEDDING_TYPE.value} # or azure_openai_embedding
-    auth_type: {defs.AUTH_TYPE.value} # or azure_managed_identity
-    model: {defs.EMBEDDING_MODEL}
-    parallelization_num_threads: {defs.PARALLELIZATION_NUM_THREADS}
-    parallelization_stagger: {defs.PARALLELIZATION_STAGGER}
-    async_mode: {defs.ASYNC_MODE.value} # or asyncio
     # api_base: https://<instance>.openai.azure.com
-    # api_version: 2024-02-15-preview
+    # api_version: 2024-05-01-preview
+    auth_type: {defs.AUTH_TYPE.value} # or azure_managed_identity
+    api_key: ${{GRAPHRAG_API_KEY}}
     # audience: "https://cognitiveservices.azure.com/.default"
     # organization: <organization_id>
+    model: {defs.EMBEDDING_MODEL}
     # deployment_name: <azure_model_deployment_name>
+    # encoding_model: {defs.ENCODING_MODEL} # automatically set by tiktoken if left undefined
+    model_supports_json: true # recommended if this is available for your model.
+    concurrent_requests: {defs.LLM_CONCURRENT_REQUESTS} # max number of simultaneous LLM requests allowed
+    async_mode: {defs.ASYNC_MODE.value} # or asyncio
+    retry_strategy: native
+    max_retries: -1                   # set to -1 for dynamic retry logic (most optimal setting based on server response)
+    tokens_per_minute: 0              # set to 0 to disable rate limiting
+    requests_per_minute: 0            # set to 0 to disable rate limiting
 
 vector_store:
   {defs.VECTOR_STORE_DEFAULT_ID}:
 
@@ -49,8 +49,7 @@ def resolved_strategy(
         return self.strategy or {
             "type": CreateCommunityReportsStrategyType.graph_intelligence,
             "llm": model_config.model_dump(),
-            "stagger": model_config.parallelization_stagger,
-            "num_threads": model_config.parallelization_num_threads,
+            "num_threads": model_config.concurrent_requests,
             "graph_prompt": (Path(root_dir) / self.graph_prompt).read_text(
                 encoding="utf-8"
             )
 
@@ -46,8 +46,7 @@ def resolved_strategy(
         """Get the resolved claim extraction strategy."""
         return self.strategy or {
             "llm": model_config.model_dump(),
-            "stagger": model_config.parallelization_stagger,
-            "num_threads": model_config.parallelization_num_threads,
+            "num_threads": model_config.concurrent_requests,
             "extraction_prompt": (Path(root_dir) / self.prompt).read_text(
                 encoding="utf-8"
             )
 
@@ -47,8 +47,7 @@ def resolved_strategy(
         return self.strategy or {
             "type": ExtractEntityStrategyType.graph_intelligence,
             "llm": model_config.model_dump(),
-            "stagger": model_config.parallelization_stagger,
-            "num_threads": model_config.parallelization_num_threads,
+            "num_threads": model_config.concurrent_requests,
             "extraction_prompt": (Path(root_dir) / self.prompt).read_text(
                 encoding="utf-8"
             )
 
@@ -64,7 +64,7 @@ class ExtractGraphNLPConfig(BaseModel):
     text_analyzer: TextAnalyzerConfig = Field(
         description="The text analyzer configuration.", default=TextAnalyzerConfig()
     )
-    parallelization_num_threads: int = Field(
+    concurrent_requests: int = Field(
         description="The number of threads to use for the extraction process.",
-        default=defs.PARALLELIZATION_NUM_THREADS,
+        default=defs.LLM_CONCURRENT_REQUESTS,
     )
@@ -31,7 +31,7 @@ def _validate_api_key(self) -> None:
         API Key is required when using OpenAI API
         or when using Azure API with API Key authentication.
         For the time being, this check is extra verbose for clarity.
-        It will also through an exception if an API Key is provided
+        It will also raise an exception if an API Key is provided
         when one is not expected such as the case of using Azure
         Managed Identity.
 
@@ -199,6 +199,10 @@ def _validate_deployment_name(self) -> None:
         description="The number of requests per minute to use for the LLM service.",
         default=defs.LLM_REQUESTS_PER_MINUTE,
     )
+    retry_strategy: str = Field(
+        description="The retry strategy to use for the LLM service.",
+        default=defs.RETRY_STRATEGY,
+    )
     max_retries: int = Field(
         description="The maximum number of retries to use for the LLM service.",
         default=defs.LLM_MAX_RETRIES,
@@ -207,25 +211,13 @@ def _validate_deployment_name(self) -> None:
         description="The maximum retry wait to use for the LLM service.",
         default=defs.LLM_MAX_RETRY_WAIT,
     )
-    sleep_on_rate_limit_recommendation: bool = Field(
-        description="Whether to sleep on rate limit recommendations.",
-        default=defs.LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION,
-    )
     concurrent_requests: int = Field(
         description="Whether to use concurrent requests for the LLM service.",
         default=defs.LLM_CONCURRENT_REQUESTS,
     )
     responses: list[str | BaseModel] | None = Field(
         default=None, description="Static responses to use in mock mode."
     )
-    parallelization_stagger: float = Field(
-        description="The stagger to use for the LLM service.",
-        default=defs.PARALLELIZATION_STAGGER,
-    )
-    parallelization_num_threads: int = Field(
-        description="The number of threads to use for the LLM service.",
-        default=defs.PARALLELIZATION_NUM_THREADS,
-    )
     async_mode: AsyncType = Field(
         description="The async mode to use.", default=defs.ASYNC_MODE
     )
 
@@ -40,8 +40,7 @@ def resolved_strategy(
         return self.strategy or {
             "type": SummarizeStrategyType.graph_intelligence,
             "llm": model_config.model_dump(),
-            "stagger": model_config.parallelization_stagger,
-            "num_threads": model_config.parallelization_num_threads,
+            "num_threads": model_config.concurrent_requests,
             "summarize_prompt": (Path(root_dir) / self.prompt).read_text(
                 encoding="utf-8"
             )
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "type": "patch",
 +  "description": "add dynamic retry logic."
 +}
Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@ class ExtractGraphNLPConfig(BaseModel):`
`64`	`64`	`text_analyzer: TextAnalyzerConfig = Field(`
`65`	`65`	`description="The text analyzer configuration.", default=TextAnalyzerConfig()`
`66`	`66`	`)`
`67`		`- parallelization_num_threads: int = Field(`
	`67`	`+ concurrent_requests: int = Field(`
`68`	`68`	`description="The number of threads to use for the extraction process.",`
`69`		`- default=defs.PARALLELIZATION_NUM_THREADS,`
	`69`	`+ default=defs.LLM_CONCURRENT_REQUESTS,`
`70`	`70`	`)`