VectorInstitute
diff --git a/‎.github/workflows/code_checks.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/code_checks.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/docker.yml‎
Lines changed: 37 additions & 13 deletions b/‎.github/workflows/docker.yml‎
Lines changed: 37 additions & 13 deletions
diff --git a/‎.github/workflows/docs.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/docs.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/publish.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/unit_tests.yml‎
Lines changed: 11 additions & 1 deletion b/‎.github/workflows/unit_tests.yml‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎MODEL_TRACKING.md‎
Lines changed: 80 additions & 10 deletions b/‎MODEL_TRACKING.md‎
Lines changed: 80 additions & 10 deletions
@@ -28,7 +28,7 @@ jobs:
   run-code-check:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v5.0.0
+      - uses: actions/checkout@v6.0.1
       - name: Install uv
         uses: astral-sh/setup-uv@v7
         with:
 
@@ -7,33 +7,56 @@ on:
     branches:
       - main
     paths:
-      - Dockerfile
+      - vllm.Dockerfile
+      - sglang.Dockerfile
       - .github/workflows/docker.yml
       - uv.lock
   pull_request:
     branches:
       - main
+      - f/sglang-support
     paths:
-      - Dockerfile
+      - vllm.Dockerfile
+      - sglang.Dockerfile
       - .github/workflows/docker.yml
       - uv.lock
 
 jobs:
   push_to_registry:
-    name: Push Docker image to Docker Hub
+    name: Build and push Docker images
     runs-on:
-      - self-hosted
-      - docker
+      - ubuntu-latest
+    strategy:
+      matrix:
+        backend: [vllm, sglang]
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v5.0.0
+        uses: actions/checkout@v6.0.1
 
-      - name: Extract vLLM version
-        id: vllm-version
+      - name: Extract backend version
+        id: backend-version
         run: |
-          VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
+          VERSION=$(grep -A 1 "name = \"${{ matrix.backend }}\"" uv.lock | grep version | cut -d '"' -f 2)
           echo "version=$VERSION" >> $GITHUB_OUTPUT
 
+      - name: Maximize build space
+        run: |
+          echo "Disk space before cleanup:"
+          df -h
+          # Remove unnecessary pre-installed software
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo rm -rf /usr/local/share/boost
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+          # Clean apt cache
+          sudo apt-get clean
+          # Remove docker images
+          docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
+          echo "Disk space after cleanup:"
+          df -h
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -45,17 +68,18 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893
+        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051
         with:
-          images: vectorinstitute/vector-inference
+          images: vectorinstitute/vector-inference-${{ matrix.backend }}
 
       - name: Build and push Docker image
         uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
         with:
           context: .
-          file: ./Dockerfile
+          file: ./${{ matrix.backend }}.Dockerfile
           push: true
           tags: |
             ${{ steps.meta.outputs.tags }}
-            vectorinstitute/vector-inference:${{ steps.vllm-version.outputs.version }}
+            vectorinstitute/vector-inference-${{ matrix.backend }}:${{ steps.backend-version.outputs.version }}
+            vectorinstitute/vector-inference-${{ matrix.backend }}:latest
           labels: ${{ steps.meta.outputs.labels }}
@@ -51,7 +51,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
-        uses: actions/checkout@v5.0.0
+        uses: actions/checkout@v6.0.1
         with:
           fetch-depth: 0  # Fetch all history for proper versioning
 
@@ -67,7 +67,7 @@ jobs:
           python-version-file: ".python-version"
 
       - name: Install the project
-        run: uv sync --all-extras --group docs --prerelease=allow
+        run: uv sync --group docs --prerelease=allow
 
       - name: Build docs
         run: uv run --frozen mkdocs build
@@ -88,7 +88,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
-        uses: actions/checkout@v5.0.0
+        uses: actions/checkout@v6.0.1
         with:
           fetch-depth: 0  # Fetch all history for proper versioning
 
@@ -104,15 +104,15 @@ jobs:
           python-version-file: ".python-version"
 
       - name: Install the project
-        run: uv sync --all-extras --group docs --frozen
+        run: uv sync --group docs --frozen
 
       - name: Configure Git Credentials
         run: |
           git config user.name github-actions[bot]
           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
 
       - name: Download artifact
-        uses: actions/download-artifact@v6
+        uses: actions/download-artifact@v7
         with:
           name: docs-site
           path: site
 
@@ -13,7 +13,7 @@ jobs:
           sudo apt-get update
           sudo apt-get install libcurl4-openssl-dev libssl-dev
 
-      - uses: actions/checkout@v5.0.0
+      - uses: actions/checkout@v6.0.1
 
       - name: Install uv
         uses: astral-sh/setup-uv@v7
 
@@ -43,7 +43,7 @@ jobs:
       matrix:
         python-version: ["3.10", "3.11", "3.12"]
     steps:
-      - uses: actions/checkout@v5.0.0
+      - uses: actions/checkout@v6.0.1
 
       - name: Install uv
         uses: astral-sh/setup-uv@v7
@@ -58,16 +58,26 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Install the project
+        env:
+          # Ensure uv uses the matrix interpreter instead of `.python-version` (3.10),
+          # otherwise the "3.11"/"3.12" jobs silently run on 3.10.
+          UV_PYTHON: ${{ matrix.python-version }}
         run: uv sync --dev --prerelease=allow
 
       - name: Install dependencies and check code
+        env:
+          UV_PYTHON: ${{ matrix.python-version }}
         run: |
           uv run --frozen pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
 
       - name: Install the core package only
+        env:
+          UV_PYTHON: ${{ matrix.python-version }}
         run: uv sync --no-dev
 
       - name: Run package import tests
+        env:
+          UV_PYTHON: ${{ matrix.python-version }}
         run: |
           uv run --frozen pytest tests/test_imports.py
 
 
@@ -17,7 +17,7 @@ repos:
     - id: check-toml
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: 'v0.14.5'
+    rev: 'v0.14.10'
     hooks:
     - id: ruff
       args: [--fix, --exit-non-zero-on-fix]
@@ -26,7 +26,7 @@ repos:
       types_or: [python, jupyter]
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.18.2
+    rev: v1.19.1
     hooks:
     - id: mypy
       entry: python3 -m mypy --config-file pyproject.toml
 
@@ -94,6 +94,7 @@ This document tracks all model weights available in the `/model-weights` directo
 | Model | Configuration |
 |:------|:-------------|
 | `Llama-4-Scout-17B-16E-Instruct` | ❌ |
+| `Llama-4-Maverick-17B-128E-Instruct` | ❌ |
 
 ### Mistral AI: Mistral
 | Model | Configuration |
@@ -128,6 +129,7 @@ This document tracks all model weights available in the `/model-weights` directo
 |:------|:-------------|
 | `Qwen2.5-0.5B-Instruct` | ✅ |
 | `Qwen2.5-1.5B-Instruct` | ✅ |
+| `Qwen2.5-3B` | ❌ |
 | `Qwen2.5-3B-Instruct` | ✅ |
 | `Qwen2.5-7B-Instruct` | ✅ |
 | `Qwen2.5-14B-Instruct` | ✅ |
@@ -138,12 +140,14 @@ This document tracks all model weights available in the `/model-weights` directo
 | Model | Configuration |
 |:------|:-------------|
 | `Qwen2.5-Math-1.5B-Instruct` | ✅ |
+| `Qwen2.5-Math-7B` | ❌ |
 | `Qwen2.5-Math-7B-Instruct` | ✅ |
 | `Qwen2.5-Math-72B-Instruct` | ✅ |
 
 ### Qwen: Qwen2.5-Coder
 | Model | Configuration |
 |:------|:-------------|
+| `Qwen2.5-Coder-3B-Instruct` | ✅ |
 | `Qwen2.5-Coder-7B-Instruct` | ✅ |
 
 ### Qwen: QwQ
@@ -162,6 +166,12 @@ This document tracks all model weights available in the `/model-weights` directo
 | `Qwen2-Math-72B-Instruct` | ❌ |
 | `Qwen2-VL-7B-Instruct` | ❌ |
 
+### Qwen: Qwen2.5-VL
+| Model | Configuration |
+|:------|:-------------|
+| `Qwen2.5-VL-3B-Instruct` | ❌ |
+| `Qwen2.5-VL-7B-Instruct` | ✅ |
+
 ### Qwen: Qwen3
 | Model | Configuration |
 |:------|:-------------|
@@ -191,27 +201,76 @@ This document tracks all model weights available in the `/model-weights` directo
 | Model | Configuration |
 |:------|:-------------|
 | `gpt-oss-120b` | ✅ |
+| `gpt-oss-20b` | ✅ |
 
-### Other LLM Models
+
+#### AI21: Jamba
 | Model | Configuration |
 |:------|:-------------|
 | `AI21-Jamba-1.5-Mini` | ❌ |
-| `aya-expanse-32b` | ✅ (as Aya-Expanse-32B) |
+
+#### Cohere for AI: Aya
+| Model | Configuration |
+|:------|:-------------|
+| `aya-expanse-32b` | ✅ |
+
+#### OpenAI: GPT-2
+| Model | Configuration |
+|:------|:-------------|
 | `gpt2-large` | ❌ |
 | `gpt2-xl` | ❌ |
-| `gpt-oss-120b` | ❌ |
-| `instructblip-vicuna-7b` | ❌ |
+
+#### InternLM: InternLM2
+| Model | Configuration |
+|:------|:-------------|
 | `internlm2-math-plus-7b` | ❌ |
+
+#### Janus
+| Model | Configuration |
+|:------|:-------------|
 | `Janus-Pro-7B` | ❌ |
+
+#### Moonshot AI: Kimi
+| Model | Configuration |
+|:------|:-------------|
 | `Kimi-K2-Instruct` | ❌ |
+
+#### Mistral AI: Ministral
+| Model | Configuration |
+|:------|:-------------|
 | `Ministral-8B-Instruct-2410` | ❌ |
-| `Molmo-7B-D-0924` | ✅ |
+
+#### AI2: OLMo
+| Model | Configuration |
+|:------|:-------------|
 | `OLMo-1B-hf` | ❌ |
 | `OLMo-7B-hf` | ❌ |
 | `OLMo-7B-SFT` | ❌ |
+
+#### EleutherAI: Pythia
+| Model | Configuration |
+|:------|:-------------|
 | `pythia` | ❌ |
+
+#### Qwen: Qwen1.5
+| Model | Configuration |
+|:------|:-------------|
 | `Qwen1.5-72B-Chat` | ❌ |
+
+#### ReasonFlux
+| Model | Configuration |
+|:------|:-------------|
 | `ReasonFlux-PRM-7B` | ❌ |
+
+#### LMSYS: Vicuna
+| Model | Configuration |
+|:------|:-------------|
+| `vicuna-13b-v1.5` | ❌ |
+
+#### Google: T5 (Encoder-Decoder Models)
+**Note**: These are encoder-decoder (T5) models, not decoder-only LLMs.
+| Model | Configuration |
+|:------|:-------------|
 | `t5-large-lm-adapt` | ❌ |
 | `t5-xl-lm-adapt` | ❌ |
 | `mt5-xl-lm-adapt` | ❌ |
@@ -238,10 +297,10 @@ This document tracks all model weights available in the `/model-weights` directo
 ### Meta: Llama 3.2 Vision
 | Model | Configuration |
 |:------|:-------------|
-| `Llama-3.2-11B-Vision` | ✅ |
-| `Llama-3.2-11B-Vision-Instruct` | ✅ |
-| `Llama-3.2-90B-Vision` | ✅ |
-| `Llama-3.2-90B-Vision-Instruct` | ✅ |
+| `Llama-3.2-11B-Vision` | ❌ |
+| `Llama-3.2-11B-Vision-Instruct` | ✅ | (SGLang only)
+| `Llama-3.2-90B-Vision` | ❌ |
+| `Llama-3.2-90B-Vision-Instruct` | ✅ | (SGLang only)
 
 ### Mistral: Pixtral
 | Model | Configuration |
@@ -266,10 +325,19 @@ This document tracks all model weights available in the `/model-weights` directo
 | `deepseek-vl2` | ✅ |
 | `deepseek-vl2-small` | ✅ |
 
+### Google: MedGemma
+| Model | Configuration |
+|:------|:-------------|
+| `medgemma-4b-it` | ✅ |
+| `medgemma-27b-it` | ✅ |
+| `medgemma-27b-text-it` | ❌ |
+
 ### Other VLM Models
 | Model | Configuration |
 |:------|:-------------|
+| `instructblip-vicuna-7b` | ❌ |
 | `MiniCPM-Llama3-V-2_5` | ❌ |
+| `Molmo-7B-D-0924` | ✅ |
 
 ---
 
@@ -298,6 +366,8 @@ This document tracks all model weights available in the `/model-weights` directo
 | `data2vec` | ❌ |
 | `gte-modernbert-base` | ❌ |
 | `gte-Qwen2-7B-instruct` | ❌ |
+| `KaLM-Embedding-Gemma3-12B-2511` | ❌ |
+| `llama-embed-nemotron-8b` | ❌ |
 | `m2-bert-80M-32k-retrieval` | ❌ |
 | `m2-bert-80M-8k-retrieval` | ❌ |
 
@@ -313,7 +383,7 @@ This document tracks all model weights available in the `/model-weights` directo
 
 ---
 
-## Multimodal Models
+## Vision Models
 
 ### CLIP
 | Model | Configuration |