microsoft
diff --git a/‎graphrag/config/models/graph_rag_config.py‎
Lines changed: 2 additions & 1 deletion b/‎graphrag/config/models/graph_rag_config.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎graphrag/index/flows/create_final_community_reports.py‎
Lines changed: 0 additions & 3 deletions b/‎graphrag/index/flows/create_final_community_reports.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎graphrag/index/flows/create_final_covariates.py‎
Lines changed: 0 additions & 3 deletions b/‎graphrag/index/flows/create_final_covariates.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎graphrag/index/flows/extract_graph.py‎
Lines changed: 0 additions & 4 deletions b/‎graphrag/index/flows/extract_graph.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎graphrag/index/flows/generate_text_embeddings.py‎
Lines changed: 0 additions & 5 deletions b/‎graphrag/index/flows/generate_text_embeddings.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎graphrag/index/operations/embed_text/embed_text.py‎
Lines changed: 2 additions & 8 deletions b/‎graphrag/index/operations/embed_text/embed_text.py‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎graphrag/index/operations/embed_text/strategies/mock.py‎
Lines changed: 0 additions & 2 deletions b/‎graphrag/index/operations/embed_text/strategies/mock.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎graphrag/index/operations/embed_text/strategies/openai.py‎
Lines changed: 3 additions & 7 deletions b/‎graphrag/index/operations/embed_text/strategies/openai.py‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎graphrag/index/operations/embed_text/strategies/typing.py‎
Lines changed: 0 additions & 2 deletions b/‎graphrag/index/operations/embed_text/strategies/typing.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎graphrag/index/operations/extract_covariates/extract_covariates.py‎
Lines changed: 3 additions & 8 deletions b/‎graphrag/index/operations/extract_covariates/extract_covariates.py‎
Lines changed: 3 additions & 8 deletions
@@ -212,7 +212,8 @@ def get_language_model_config(self, model_id: str) -> LanguageModelConfig:
         if model_id not in self.models:
             err_msg = f"Model ID {model_id} not found in configuration."
             raise ValueError(err_msg)
-        return self.models[model_id]
+        # TODO: shouldn't self.models be validated already?
+        return LanguageModelConfig.model_construct(**dict(self.models[model_id]))  # type: ignore
 
     @model_validator(mode="after")
     def _validate_model(self):
 
@@ -10,7 +10,6 @@
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
 from graphrag.config.enums import AsyncType
-from graphrag.config.models.graph_rag_config import GraphRagConfig
 from graphrag.index.operations.summarize_communities import (
     prepare_community_reports,
     restore_community_hierarchy,
@@ -47,7 +46,6 @@ async def create_final_community_reports(
     callbacks: WorkflowCallbacks,
     cache: PipelineCache,
     summarization_strategy: dict,
-    config: GraphRagConfig,
     async_mode: AsyncType = AsyncType.AsyncIO,
     num_threads: int = 4,
 ) -> pd.DataFrame:
@@ -80,7 +78,6 @@ async def create_final_community_reports(
         strategy=summarization_strategy,
         async_mode=async_mode,
         num_threads=num_threads,
-        config=config,
     )
 
     community_reports["community"] = community_reports["community"].astype(int)
 
@@ -11,7 +11,6 @@
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
 from graphrag.config.enums import AsyncType
-from graphrag.config.models.graph_rag_config import GraphRagConfig
 from graphrag.index.operations.extract_covariates.extract_covariates import (
     extract_covariates,
 )
@@ -23,7 +22,6 @@ async def create_final_covariates(
     cache: PipelineCache,
     covariate_type: str,
     extraction_strategy: dict[str, Any] | None,
-    config: GraphRagConfig,
     async_mode: AsyncType = AsyncType.AsyncIO,
     entity_types: list[str] | None = None,
     num_threads: int = 4,
@@ -42,7 +40,6 @@ async def create_final_covariates(
         async_mode=async_mode,
         entity_types=entity_types,
         num_threads=num_threads,
-        config=config,
     )
     text_units.drop(columns=["text_unit_id"], inplace=True)  # don't pollute the global
     covariates["id"] = covariates["covariate_type"].apply(lambda _x: str(uuid4()))
 
@@ -11,7 +11,6 @@
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
 from graphrag.config.enums import AsyncType
-from graphrag.config.models.graph_rag_config import GraphRagConfig
 from graphrag.index.operations.extract_entities import extract_entities
 from graphrag.index.operations.summarize_descriptions import (
     summarize_descriptions,
@@ -22,7 +21,6 @@ async def extract_graph(
     text_units: pd.DataFrame,
     callbacks: WorkflowCallbacks,
     cache: PipelineCache,
-    config: GraphRagConfig,
     extraction_strategy: dict[str, Any] | None = None,
     extraction_num_threads: int = 4,
     extraction_async_mode: AsyncType = AsyncType.AsyncIO,
@@ -42,7 +40,6 @@ async def extract_graph(
         async_mode=extraction_async_mode,
         entity_types=entity_types,
         num_threads=extraction_num_threads,
-        config=config,
     )
 
     if not _validate_data(entities):
@@ -64,7 +61,6 @@ async def extract_graph(
         cache=cache,
         strategy=summarization_strategy,
         num_threads=summarization_num_threads,
-        config=config,
     )
 
     base_relationship_edges = _prep_edges(relationships, relationship_summaries)
 
@@ -9,7 +9,6 @@
 
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
-from graphrag.config.models.graph_rag_config import GraphRagConfig
 from graphrag.index.config.embeddings import (
     community_full_content_embedding,
     community_summary_embedding,
@@ -38,7 +37,6 @@ async def generate_text_embeddings(
     storage: PipelineStorage,
     text_embed_config: dict,
     embedded_fields: set[str],
-    config: GraphRagConfig,
     snapshot_embeddings_enabled: bool = False,
 ) -> None:
     """All the steps to generate all embeddings."""
@@ -104,7 +102,6 @@ async def generate_text_embeddings(
             storage=storage,
             text_embed_config=text_embed_config,
             snapshot_embeddings_enabled=snapshot_embeddings_enabled,
-            config=config,
             **embedding_param_map[field],
         )
 
@@ -118,7 +115,6 @@ async def _run_and_snapshot_embeddings(
     storage: PipelineStorage,
     text_embed_config: dict,
     snapshot_embeddings_enabled: bool,
-    config: GraphRagConfig,
 ) -> None:
     """All the steps to generate single embedding."""
     if text_embed_config:
@@ -129,7 +125,6 @@ async def _run_and_snapshot_embeddings(
             embed_column=embed_column,
             embedding_name=name,
             strategy=text_embed_config["strategy"],
-            config=config,
         )
 
         if snapshot_embeddings_enabled is True:
 
@@ -12,7 +12,6 @@
 
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
-from graphrag.config.models.graph_rag_config import GraphRagConfig
 from graphrag.index.operations.embed_text.strategies.typing import TextEmbeddingStrategy
 from graphrag.utils.embeddings import create_collection_name
 from graphrag.vector_stores.base import BaseVectorStore, VectorStoreDocument
@@ -43,7 +42,6 @@ async def embed_text(
     embed_column: str,
     strategy: dict,
     embedding_name: str,
-    config: GraphRagConfig,
     id_column: str = "id",
     title_column: str | None = None,
 ):
@@ -98,7 +96,6 @@ async def embed_text(
             vector_store_config=vector_store_workflow_config,
             id_column=id_column,
             title_column=title_column,
-            config=config,
         )
 
     return await _text_embed_in_memory(
@@ -107,7 +104,6 @@ async def embed_text(
         cache=cache,
         embed_column=embed_column,
         strategy=strategy,
-        config=config,
     )
 
 
@@ -117,14 +113,13 @@ async def _text_embed_in_memory(
     cache: PipelineCache,
     embed_column: str,
     strategy: dict,
-    config: GraphRagConfig,
 ):
     strategy_type = strategy["type"]
     strategy_exec = load_strategy(strategy_type)
     strategy_args = {**strategy}
 
     texts: list[str] = input[embed_column].to_numpy().tolist()
-    result = await strategy_exec(texts, callbacks, cache, strategy_args, config)
+    result = await strategy_exec(texts, callbacks, cache, strategy_args)
 
     return result.embeddings
 
@@ -137,7 +132,6 @@ async def _text_embed_with_vector_store(
     strategy: dict[str, Any],
     vector_store: BaseVectorStore,
     vector_store_config: dict,
-    config: GraphRagConfig,
     id_column: str = "id",
     title_column: str | None = None,
 ):
@@ -182,7 +176,7 @@ async def _text_embed_with_vector_store(
         texts: list[str] = batch[embed_column].to_numpy().tolist()
         titles: list[str] = batch[title].to_numpy().tolist()
         ids: list[str] = batch[id_column].to_numpy().tolist()
-        result = await strategy_exec(texts, callbacks, cache, strategy_args, config)
+        result = await strategy_exec(texts, callbacks, cache, strategy_args)
         if result.embeddings:
             embeddings = [
                 embedding for embedding in result.embeddings if embedding is not None
 
@@ -9,7 +9,6 @@
 
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
-from graphrag.config.models.graph_rag_config import GraphRagConfig
 from graphrag.index.operations.embed_text.strategies.typing import TextEmbeddingResult
 from graphrag.logger.progress import ProgressTicker, progress_ticker
 
@@ -19,7 +18,6 @@ async def run(  # noqa RUF029 async is required for interface
     callbacks: WorkflowCallbacks,
     cache: PipelineCache,
     _args: dict[str, Any],
-    _config: GraphRagConfig,
 ) -> TextEmbeddingResult:
     """Run the Claim extraction chain."""
     input = input if isinstance(input, Iterable) else [input]
 
@@ -12,7 +12,6 @@
 
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
-from graphrag.config.models.graph_rag_config import GraphRagConfig
 from graphrag.config.models.language_model_config import LanguageModelConfig
 from graphrag.index.llm.load_llm import load_llm_embeddings
 from graphrag.index.operations.embed_text.strategies.typing import TextEmbeddingResult
@@ -28,19 +27,16 @@ async def run(
     callbacks: WorkflowCallbacks,
     cache: PipelineCache,
     args: dict[str, Any],
-    config: GraphRagConfig,
 ) -> TextEmbeddingResult:
     """Run the Claim extraction chain."""
     if is_null(input):
         return TextEmbeddingResult(embeddings=None)
 
     batch_size = args.get("batch_size", 16)
     batch_max_tokens = args.get("batch_max_tokens", 8191)
-    embeddings_llm_settings = config.get_language_model_config(
-        config.embeddings.model_id
-    )
-    splitter = _get_splitter(embeddings_llm_settings, batch_max_tokens)
-    llm = _get_llm(embeddings_llm_settings, callbacks, cache)
+    llm_config = args["llm"]
+    splitter = _get_splitter(llm_config, batch_max_tokens)
+    llm = _get_llm(llm_config, callbacks, cache)
     semaphore: asyncio.Semaphore = asyncio.Semaphore(args.get("num_threads", 4))
 
     # Break up the input texts. The sizes here indicate how many snippets are in each input text
 
@@ -8,7 +8,6 @@
 
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
-from graphrag.config.models.graph_rag_config import GraphRagConfig
 
 
 @dataclass
@@ -24,7 +23,6 @@ class TextEmbeddingResult:
         WorkflowCallbacks,
         PipelineCache,
         dict,
-        GraphRagConfig,
     ],
     Awaitable[TextEmbeddingResult],
 ]
@@ -14,7 +14,7 @@
 from graphrag.cache.pipeline_cache import PipelineCache
 from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
 from graphrag.config.enums import AsyncType
-from graphrag.config.models.graph_rag_config import GraphRagConfig
+from graphrag.config.models.language_model_config import LanguageModelConfig
 from graphrag.index.llm.load_llm import load_llm
 from graphrag.index.operations.extract_covariates.claim_extractor import ClaimExtractor
 from graphrag.index.operations.extract_covariates.typing import (
@@ -35,7 +35,6 @@ async def extract_covariates(
     cache: PipelineCache,
     column: str,
     covariate_type: str,
-    config: GraphRagConfig,
     strategy: dict[str, Any] | None,
     async_mode: AsyncType = AsyncType.AsyncIO,
     entity_types: list[str] | None = None,
@@ -60,7 +59,6 @@ async def run_strategy(row):
             callbacks=callbacks,
             cache=cache,
             strategy_config=strategy_config,
-            config=config,
         )
         return [
             create_row_from_claim_data(row, item, covariate_type)
@@ -89,15 +87,12 @@ async def run_claim_extraction(
     callbacks: WorkflowCallbacks,
     cache: PipelineCache,
     strategy_config: dict[str, Any],
-    config: GraphRagConfig,
 ) -> CovariateExtractionResult:
     """Run the Claim extraction chain."""
-    claim_extraction_llm_settings = config.get_language_model_config(
-        config.claim_extraction.model_id
-    )
+    llm_config = LanguageModelConfig.model_construct(**strategy_config["llm"])
     llm = load_llm(
         "claim_extraction",
-        claim_extraction_llm_settings,
+        llm_config,
         callbacks=callbacks,
         cache=cache,
     )