microsoft
diff --git a/‎.github/workflows/gh-pages.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/gh-pages.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/python-checks.yml‎
Lines changed: 78 additions & 0 deletions b/‎.github/workflows/python-checks.yml‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎.github/workflows/python-integration-tests.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/python-integration-tests.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/python-notebook-tests.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/python-notebook-tests.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/python-publish.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/python-publish.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/python-smoke-tests.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/python-smoke-tests.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/python-ci.yml‎ ‎.github/workflows/python-unit-tests.yml‎.github/workflows/python-ci.yml renamed to .github/workflows/python-unit-tests.yml
Lines changed: 2 additions & 11 deletions b/‎.github/workflows/python-ci.yml‎ ‎.github/workflows/python-unit-tests.yml‎.github/workflows/python-ci.yml renamed to .github/workflows/python-unit-tests.yml
Lines changed: 2 additions & 11 deletions
diff --git a/‎docs/config/models.md‎
Lines changed: 8 additions & 8 deletions b/‎docs/config/models.md‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎docs/config/yaml.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/config/yaml.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/developing.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/developing.md‎
Lines changed: 1 addition & 1 deletion
@@ -6,7 +6,7 @@ permissions:
   contents: write
 
 env:
-  PYTHON_VERSION: "3.11"
+  PYTHON_VERSION: "3.12"
 
 jobs:
   build:
 
@@ -0,0 +1,78 @@
+name: Python Build and Type Check
+on:
+  push:
+    branches:
+      - "**/main" # match branches like feature/main
+      - "main"    # match the main branch
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+    branches:
+      - "**/main"
+      - "main"
+    paths-ignore:
+      - "**/*.md"
+      - ".semversioner/**"
+
+permissions:
+  contents: read
+  pull-requests: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  # Only run the for the latest commit
+  cancel-in-progress: true
+
+jobs:
+  python-ci:
+    # skip draft PRs
+    if: github.event.pull_request.draft == false
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12"]
+        os: [ubuntu-latest, windows-latest]
+      fail-fast: false # Continue running all jobs even if one fails
+    env:
+      DEBUG: 1
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: dorny/paths-filter@v3
+        id: changes
+        with:
+          filters: |
+            python:
+              - 'graphrag/**/*'
+              - 'uv.lock'
+              - 'pyproject.toml'
+              - '**/*.py'
+              - '**/*.toml'
+              - '**/*.ipynb'
+              - '.github/workflows/python*.yml'
+              - 'tests/**/*'
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          uv sync --all-packages
+
+      - name: Check
+        run: |
+          uv run poe check
+
+      - name: Build
+        run: |
+          uv build --all-packages
@@ -32,7 +32,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
         os: [ubuntu-latest, windows-latest]
       fail-fast: false # continue running all jobs even if one fails
     env:
@@ -68,7 +68,6 @@ jobs:
         shell: bash
         run: |
           uv sync --all-packages
-          uv pip install gensim
 
       - name: Build
         run: |
 
@@ -32,7 +32,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
         os: [ubuntu-latest, windows-latest]
       fail-fast: false # Continue running all jobs even if one fails
     env:
@@ -68,7 +68,6 @@ jobs:
         shell: bash
         run: |
           uv sync --all-packages
-          uv pip install gensim
 
       - name: Notebook Test
         run: |
 
@@ -6,7 +6,7 @@ on:
     branches: [main]
 
 env:
-  PYTHON_VERSION: "3.10"
+  PYTHON_VERSION: "3.12"
 
 jobs:
   publish:
 
@@ -32,7 +32,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
         os: [ubuntu-latest, windows-latest]
       fail-fast: false # Continue running all jobs even if one fails
     env:
@@ -73,7 +73,6 @@ jobs:
         shell: bash
         run: |
           uv sync --all-packages
-          uv pip install gensim
 
       - name: Build
         run: |
 
@@ -1,4 +1,4 @@
-name: Python CI
+name: Python Unit Tests
 on:
   push:
     branches:
@@ -32,7 +32,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        python-version: ["3.10", "3.11"] # add 3.12 once gensim supports it. TODO: watch this issue - https://github.com/piskvorky/gensim/issues/3510
+        python-version: ["3.12"]
         os: [ubuntu-latest, windows-latest]
       fail-fast: false # Continue running all jobs even if one fails
     env:
@@ -68,15 +68,6 @@ jobs:
         shell: bash
         run: |
           uv sync --all-packages
-          uv pip install gensim
-
-      - name: Check
-        run: |
-          uv run poe check
-
-      - name: Build
-        run: |
-          uv build --all-packages
 
       - name: Unit Test
         run: |
 
@@ -6,9 +6,9 @@ This page contains information on selecting a model to use and options to supply
 
 GraphRAG was built and tested using OpenAI models, so this is the default model set we support. This is not intended to be a limiter or statement of quality or fitness for your use case, only that it's the set we are most familiar with for prompting, tuning, and debugging.
 
-Starting with version 2.6.0, GraphRAG supports using [LiteLLM](https://docs.litellm.ai/) for calling language models. LiteLLM provides support for 100+ models though it is important to note that when choosing a model it must support returning [structured outputs](https://openai.com/index/introducing-structured-outputs-in-the-api/) adhering to a [JSON schema](https://docs.litellm.ai/docs/completion/json_mode). 
+GraphRAG uses [LiteLLM](https://docs.litellm.ai/) for calling language models. LiteLLM provides support for 100+ models though it is important to note that when choosing a model it must support returning [structured outputs](https://openai.com/index/introducing-structured-outputs-in-the-api/) adhering to a [JSON schema](https://docs.litellm.ai/docs/completion/json_mode). 
 
-Example using LiteLLm as the language model tool for GraphRAG:
+Example using LiteLLM as the language model manager for GraphRAG:
 
 ```yaml
 models:
@@ -37,7 +37,7 @@ See [Detailed Configuration](yaml.md) for more details on configuration. [View L
 
 ## Model Selection Considerations
 
-GraphRAG has been most thoroughly tested with the gpt-4 series of models from OpenAI, including gpt-4 gpt-4-turbo, gpt-4o, and gpt-4o-mini. Our [arXiv paper](https://arxiv.org/abs/2404.16130), for example, performed quality evaluation using gpt-4-turbo. As stated above, non-OpenAI models are now supported with GraphRAG 2.6.0 and onwards through the use of LiteLLM but the suite of gpt-4 series of models from OpenAI remain the most tested and supported suite of models for GraphRAG.
+GraphRAG has been most thoroughly tested with the gpt-4 series of models from OpenAI, including gpt-4 gpt-4-turbo, gpt-4o, and gpt-4o-mini. Our [arXiv paper](https://arxiv.org/abs/2404.16130), for example, performed quality evaluation using gpt-4-turbo. As stated above, non-OpenAI models are supported through the use of LiteLLM but the suite of gpt-4 series of models from OpenAI remain the most tested and supported suite of models for GraphRAG – in other words, these are the models we know best and can help resolve issues with.
 
 Versions of GraphRAG before 2.2.0 made extensive use of `max_tokens` and `logit_bias` to control generated response length or content. The introduction of the o-series of models added new, non-compatible parameters because these models include a reasoning component that has different consumption patterns and response generation attributes than non-reasoning models. GraphRAG 2.2.0 now supports these models, but there are important differences that need to be understood before you switch.
 
@@ -85,30 +85,30 @@ global_search:
 
 Another option would be to avoid using a language model at all for the graph extraction, instead using the `fast` [indexing method](../index/methods.md) that uses NLP for portions of the indexing phase in lieu of LLM APIs.
 
-## Using Non-OpenAI Models
+## Using Custom Models
 
-As shown above, non-OpenAI models may be used via LiteLLM starting with GraphRAG version 2.6.0 but cases may still exist in which some users wish to use models not supported by LiteLLM. There are two approaches one can use to connect to unsupported models:
+LiteLLM supports hundreds of models, but cases may still exist in which some users wish to use models not supported by LiteLLM. There are two approaches one can use to connect to unsupported models:
 
 ### Proxy APIs
 
 Many users have used platforms such as [ollama](https://ollama.com/) and [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy) to proxy the underlying model HTTP calls to a different model provider. This seems to work reasonably well, but we frequently see issues with malformed responses (especially JSON), so if you do this please understand that your model needs to reliably return the specific response formats that GraphRAG expects. If you're having trouble with a model, you may need to try prompting to coax the format, or intercepting the response within your proxy to try and handle malformed responses.
 
 ### Model Protocol
 
-As of GraphRAG 2.0.0, we support model injection through the use of a standard chat and embedding Protocol and an accompanying factories that you can use to register your model implementation. This is not supported with the CLI, so you'll need to use GraphRAG as a library.
+We support model injection through the use of a standard chat and embedding Protocol and accompanying factories that you can use to register your model implementation. This is not supported with the CLI, so you'll need to use GraphRAG as a library.
 
 - Our Protocol is [defined here](https://github.com/microsoft/graphrag/blob/main/graphrag/language_model/protocol/base.py)
 - We have a simple mock implementation in our tests that you can [reference here](https://github.com/microsoft/graphrag/blob/main/tests/mock_provider.py)
 
 Once you have a model implementation, you need to register it with our ChatModelFactory or EmbeddingModelFactory:
 
 ```python
-class MyCustomModel:
+class MyCustomChatModel:
     ...
     # implementation
 
 # elsewhere...
-ChatModelFactory.register("my-custom-chat-model", lambda **kwargs: MyCustomModel(**kwargs))
+ChatModelFactory.register("my-custom-chat-model", MyCustomChatModel)
 ```
 
 Then in your config you can reference the type name you used:
 
@@ -21,7 +21,7 @@ default_chat_model:
 
 ### models
 
-This is a dict of model configurations. The dict key is used to reference this configuration elsewhere when a model instance is desired. In this way, you can specify as many different models as you need, and reference them differentially in the workflow steps.
+This is a dict of model configurations. The dict key is used to reference this configuration elsewhere when a model instance is desired. In this way, you can specify as many different models as you need, and reference them independently in the workflow steps.
 
 For example:
 ```yml
@@ -173,7 +173,7 @@ Where to put all vectors for the system. Configured for lancedb by default. This
 - `audience` **str** (only for AI Search) - Audience for managed identity token if managed identity authentication is used.
 - `index_prefix` **str** - (optional) A prefix for the indexes you will create for embeddings. This stores all indexes (tables) for a given dataset ingest.
 - `database_name` **str** - (cosmosdb only) Name of the database.
-- `embeddings_schema` **list[dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
+- `embeddings_schema` **dict[str, dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
   - `<supported_embedding>`: 
     - `index_name` **str**: (optional) - Name for the specific embedding index table.
     - `id_field` **str**: (optional) - Field name to be used as id. Default=`id`
@@ -332,7 +332,7 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 #### Fields
 
 - `embeddings` **bool** - Export embeddings snapshots to parquet.
-- `graphml` **bool** - Export graph snapshots to GraphML.
+- `graphml` **bool** - Export graph snapshot to GraphML.
 
 ## Query
 
 
@@ -13,7 +13,7 @@
 
 ```sh
 # install python dependencies
-uv sync
+uv sync --all-packages
 ```
 
 ## Execute the Indexing Engine