Skip to content

Commit 423b8ec

Browse files
authored
Merge branch 'main' into dependabot/github_actions/actions/upload-artifact-6
2 parents 7a01c0c + ed51a53 commit 423b8ec

36 files changed

+5457
-2225
lines changed

.github/workflows/code_checks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
run-code-check:
2929
runs-on: ubuntu-latest
3030
steps:
31-
- uses: actions/checkout@v5.0.0
31+
- uses: actions/checkout@v6.0.1
3232
- name: Install uv
3333
uses: astral-sh/setup-uv@v7
3434
with:

.github/workflows/docker.yml

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,33 +7,56 @@ on:
77
branches:
88
- main
99
paths:
10-
- Dockerfile
10+
- vllm.Dockerfile
11+
- sglang.Dockerfile
1112
- .github/workflows/docker.yml
1213
- uv.lock
1314
pull_request:
1415
branches:
1516
- main
17+
- f/sglang-support
1618
paths:
17-
- Dockerfile
19+
- vllm.Dockerfile
20+
- sglang.Dockerfile
1821
- .github/workflows/docker.yml
1922
- uv.lock
2023

2124
jobs:
2225
push_to_registry:
23-
name: Push Docker image to Docker Hub
26+
name: Build and push Docker images
2427
runs-on:
25-
- self-hosted
26-
- docker
28+
- ubuntu-latest
29+
strategy:
30+
matrix:
31+
backend: [vllm, sglang]
2732
steps:
2833
- name: Checkout repository
29-
uses: actions/checkout@v5.0.0
34+
uses: actions/checkout@v6.0.1
3035

31-
- name: Extract vLLM version
32-
id: vllm-version
36+
- name: Extract backend version
37+
id: backend-version
3338
run: |
34-
VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
39+
VERSION=$(grep -A 1 "name = \"${{ matrix.backend }}\"" uv.lock | grep version | cut -d '"' -f 2)
3540
echo "version=$VERSION" >> $GITHUB_OUTPUT
3641
42+
- name: Maximize build space
43+
run: |
44+
echo "Disk space before cleanup:"
45+
df -h
46+
# Remove unnecessary pre-installed software
47+
sudo rm -rf /usr/share/dotnet
48+
sudo rm -rf /usr/local/lib/android
49+
sudo rm -rf /opt/ghc
50+
sudo rm -rf /opt/hostedtoolcache/CodeQL
51+
sudo rm -rf /usr/local/share/boost
52+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
53+
# Clean apt cache
54+
sudo apt-get clean
55+
# Remove docker images
56+
docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
57+
echo "Disk space after cleanup:"
58+
df -h
59+
3760
- name: Set up Docker Buildx
3861
uses: docker/setup-buildx-action@v3
3962

@@ -45,17 +68,18 @@ jobs:
4568

4669
- name: Extract metadata (tags, labels) for Docker
4770
id: meta
48-
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893
71+
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051
4972
with:
50-
images: vectorinstitute/vector-inference
73+
images: vectorinstitute/vector-inference-${{ matrix.backend }}
5174

5275
- name: Build and push Docker image
5376
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
5477
with:
5578
context: .
56-
file: ./Dockerfile
79+
file: ./${{ matrix.backend }}.Dockerfile
5780
push: true
5881
tags: |
5982
${{ steps.meta.outputs.tags }}
60-
vectorinstitute/vector-inference:${{ steps.vllm-version.outputs.version }}
83+
vectorinstitute/vector-inference-${{ matrix.backend }}:${{ steps.backend-version.outputs.version }}
84+
vectorinstitute/vector-inference-${{ matrix.backend }}:latest
6185
labels: ${{ steps.meta.outputs.labels }}

.github/workflows/docs.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
runs-on: ubuntu-latest
5252
steps:
5353
- name: Checkout code
54-
uses: actions/checkout@v5.0.0
54+
uses: actions/checkout@v6.0.1
5555
with:
5656
fetch-depth: 0 # Fetch all history for proper versioning
5757

@@ -67,7 +67,7 @@ jobs:
6767
python-version-file: ".python-version"
6868

6969
- name: Install the project
70-
run: uv sync --all-extras --group docs --prerelease=allow
70+
run: uv sync --group docs --prerelease=allow
7171

7272
- name: Build docs
7373
run: uv run --frozen mkdocs build
@@ -88,7 +88,7 @@ jobs:
8888
runs-on: ubuntu-latest
8989
steps:
9090
- name: Checkout code
91-
uses: actions/checkout@v5.0.0
91+
uses: actions/checkout@v6.0.1
9292
with:
9393
fetch-depth: 0 # Fetch all history for proper versioning
9494

@@ -104,15 +104,15 @@ jobs:
104104
python-version-file: ".python-version"
105105

106106
- name: Install the project
107-
run: uv sync --all-extras --group docs --frozen
107+
run: uv sync --group docs --frozen
108108

109109
- name: Configure Git Credentials
110110
run: |
111111
git config user.name github-actions[bot]
112112
git config user.email 41898282+github-actions[bot]@users.noreply.github.com
113113
114114
- name: Download artifact
115-
uses: actions/download-artifact@v6
115+
uses: actions/download-artifact@v7
116116
with:
117117
name: docs-site
118118
path: site

.github/workflows/publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
sudo apt-get update
1414
sudo apt-get install libcurl4-openssl-dev libssl-dev
1515
16-
- uses: actions/checkout@v5.0.0
16+
- uses: actions/checkout@v6.0.1
1717

1818
- name: Install uv
1919
uses: astral-sh/setup-uv@v7

.github/workflows/unit_tests.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
matrix:
4444
python-version: ["3.10", "3.11", "3.12"]
4545
steps:
46-
- uses: actions/checkout@v5.0.0
46+
- uses: actions/checkout@v6.0.1
4747

4848
- name: Install uv
4949
uses: astral-sh/setup-uv@v7
@@ -58,16 +58,26 @@ jobs:
5858
python-version: ${{ matrix.python-version }}
5959

6060
- name: Install the project
61+
env:
62+
# Ensure uv uses the matrix interpreter instead of `.python-version` (3.10),
63+
# otherwise the "3.11"/"3.12" jobs silently run on 3.10.
64+
UV_PYTHON: ${{ matrix.python-version }}
6165
run: uv sync --dev --prerelease=allow
6266

6367
- name: Install dependencies and check code
68+
env:
69+
UV_PYTHON: ${{ matrix.python-version }}
6470
run: |
6571
uv run --frozen pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
6672
6773
- name: Install the core package only
74+
env:
75+
UV_PYTHON: ${{ matrix.python-version }}
6876
run: uv sync --no-dev
6977

7078
- name: Run package import tests
79+
env:
80+
UV_PYTHON: ${{ matrix.python-version }}
7181
run: |
7282
uv run --frozen pytest tests/test_imports.py
7383

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ repos:
1717
- id: check-toml
1818

1919
- repo: https://github.com/astral-sh/ruff-pre-commit
20-
rev: 'v0.14.5'
20+
rev: 'v0.14.10'
2121
hooks:
2222
- id: ruff
2323
args: [--fix, --exit-non-zero-on-fix]
@@ -26,7 +26,7 @@ repos:
2626
types_or: [python, jupyter]
2727

2828
- repo: https://github.com/pre-commit/mirrors-mypy
29-
rev: v1.18.2
29+
rev: v1.19.1
3030
hooks:
3131
- id: mypy
3232
entry: python3 -m mypy --config-file pyproject.toml

MODEL_TRACKING.md

Lines changed: 80 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ This document tracks all model weights available in the `/model-weights` directo
9494
| Model | Configuration |
9595
|:------|:-------------|
9696
| `Llama-4-Scout-17B-16E-Instruct` ||
97+
| `Llama-4-Maverick-17B-128E-Instruct` ||
9798

9899
### Mistral AI: Mistral
99100
| Model | Configuration |
@@ -128,6 +129,7 @@ This document tracks all model weights available in the `/model-weights` directo
128129
|:------|:-------------|
129130
| `Qwen2.5-0.5B-Instruct` ||
130131
| `Qwen2.5-1.5B-Instruct` ||
132+
| `Qwen2.5-3B` ||
131133
| `Qwen2.5-3B-Instruct` ||
132134
| `Qwen2.5-7B-Instruct` ||
133135
| `Qwen2.5-14B-Instruct` ||
@@ -138,12 +140,14 @@ This document tracks all model weights available in the `/model-weights` directo
138140
| Model | Configuration |
139141
|:------|:-------------|
140142
| `Qwen2.5-Math-1.5B-Instruct` ||
143+
| `Qwen2.5-Math-7B` ||
141144
| `Qwen2.5-Math-7B-Instruct` ||
142145
| `Qwen2.5-Math-72B-Instruct` ||
143146

144147
### Qwen: Qwen2.5-Coder
145148
| Model | Configuration |
146149
|:------|:-------------|
150+
| `Qwen2.5-Coder-3B-Instruct` ||
147151
| `Qwen2.5-Coder-7B-Instruct` ||
148152

149153
### Qwen: QwQ
@@ -162,6 +166,12 @@ This document tracks all model weights available in the `/model-weights` directo
162166
| `Qwen2-Math-72B-Instruct` ||
163167
| `Qwen2-VL-7B-Instruct` ||
164168

169+
### Qwen: Qwen2.5-VL
170+
| Model | Configuration |
171+
|:------|:-------------|
172+
| `Qwen2.5-VL-3B-Instruct` ||
173+
| `Qwen2.5-VL-7B-Instruct` ||
174+
165175
### Qwen: Qwen3
166176
| Model | Configuration |
167177
|:------|:-------------|
@@ -191,27 +201,76 @@ This document tracks all model weights available in the `/model-weights` directo
191201
| Model | Configuration |
192202
|:------|:-------------|
193203
| `gpt-oss-120b` ||
204+
| `gpt-oss-20b` ||
194205

195-
### Other LLM Models
206+
207+
#### AI21: Jamba
196208
| Model | Configuration |
197209
|:------|:-------------|
198210
| `AI21-Jamba-1.5-Mini` ||
199-
| `aya-expanse-32b` | ✅ (as Aya-Expanse-32B) |
211+
212+
#### Cohere for AI: Aya
213+
| Model | Configuration |
214+
|:------|:-------------|
215+
| `aya-expanse-32b` ||
216+
217+
#### OpenAI: GPT-2
218+
| Model | Configuration |
219+
|:------|:-------------|
200220
| `gpt2-large` ||
201221
| `gpt2-xl` ||
202-
| `gpt-oss-120b` ||
203-
| `instructblip-vicuna-7b` ||
222+
223+
#### InternLM: InternLM2
224+
| Model | Configuration |
225+
|:------|:-------------|
204226
| `internlm2-math-plus-7b` ||
227+
228+
#### Janus
229+
| Model | Configuration |
230+
|:------|:-------------|
205231
| `Janus-Pro-7B` ||
232+
233+
#### Moonshot AI: Kimi
234+
| Model | Configuration |
235+
|:------|:-------------|
206236
| `Kimi-K2-Instruct` ||
237+
238+
#### Mistral AI: Ministral
239+
| Model | Configuration |
240+
|:------|:-------------|
207241
| `Ministral-8B-Instruct-2410` ||
208-
| `Molmo-7B-D-0924` ||
242+
243+
#### AI2: OLMo
244+
| Model | Configuration |
245+
|:------|:-------------|
209246
| `OLMo-1B-hf` ||
210247
| `OLMo-7B-hf` ||
211248
| `OLMo-7B-SFT` ||
249+
250+
#### EleutherAI: Pythia
251+
| Model | Configuration |
252+
|:------|:-------------|
212253
| `pythia` ||
254+
255+
#### Qwen: Qwen1.5
256+
| Model | Configuration |
257+
|:------|:-------------|
213258
| `Qwen1.5-72B-Chat` ||
259+
260+
#### ReasonFlux
261+
| Model | Configuration |
262+
|:------|:-------------|
214263
| `ReasonFlux-PRM-7B` ||
264+
265+
#### LMSYS: Vicuna
266+
| Model | Configuration |
267+
|:------|:-------------|
268+
| `vicuna-13b-v1.5` ||
269+
270+
#### Google: T5 (Encoder-Decoder Models)
271+
**Note**: These are encoder-decoder (T5) models, not decoder-only LLMs.
272+
| Model | Configuration |
273+
|:------|:-------------|
215274
| `t5-large-lm-adapt` ||
216275
| `t5-xl-lm-adapt` ||
217276
| `mt5-xl-lm-adapt` ||
@@ -238,10 +297,10 @@ This document tracks all model weights available in the `/model-weights` directo
238297
### Meta: Llama 3.2 Vision
239298
| Model | Configuration |
240299
|:------|:-------------|
241-
| `Llama-3.2-11B-Vision` | |
242-
| `Llama-3.2-11B-Vision-Instruct` ||
243-
| `Llama-3.2-90B-Vision` | |
244-
| `Llama-3.2-90B-Vision-Instruct` ||
300+
| `Llama-3.2-11B-Vision` | |
301+
| `Llama-3.2-11B-Vision-Instruct` | ✅ | (SGLang only)
302+
| `Llama-3.2-90B-Vision` | |
303+
| `Llama-3.2-90B-Vision-Instruct` | ✅ | (SGLang only)
245304

246305
### Mistral: Pixtral
247306
| Model | Configuration |
@@ -266,10 +325,19 @@ This document tracks all model weights available in the `/model-weights` directo
266325
| `deepseek-vl2` ||
267326
| `deepseek-vl2-small` ||
268327

328+
### Google: MedGemma
329+
| Model | Configuration |
330+
|:------|:-------------|
331+
| `medgemma-4b-it` ||
332+
| `medgemma-27b-it` ||
333+
| `medgemma-27b-text-it` ||
334+
269335
### Other VLM Models
270336
| Model | Configuration |
271337
|:------|:-------------|
338+
| `instructblip-vicuna-7b` ||
272339
| `MiniCPM-Llama3-V-2_5` ||
340+
| `Molmo-7B-D-0924` ||
273341

274342
---
275343

@@ -298,6 +366,8 @@ This document tracks all model weights available in the `/model-weights` directo
298366
| `data2vec` ||
299367
| `gte-modernbert-base` ||
300368
| `gte-Qwen2-7B-instruct` ||
369+
| `KaLM-Embedding-Gemma3-12B-2511` ||
370+
| `llama-embed-nemotron-8b` ||
301371
| `m2-bert-80M-32k-retrieval` ||
302372
| `m2-bert-80M-8k-retrieval` ||
303373

@@ -313,7 +383,7 @@ This document tracks all model weights available in the `/model-weights` directo
313383

314384
---
315385

316-
## Multimodal Models
386+
## Vision Models
317387

318388
### CLIP
319389
| Model | Configuration |

0 commit comments

Comments
 (0)