Skip to content

Commit 4d68912

Browse files
ganisbackJames
andauthored
upgrade vllm,sglang and ms-swift (#265)
Co-authored-by: James <xzgan@opencsg.com>
1 parent 1e7b71c commit 4d68912

File tree

12 files changed

+118
-18
lines changed

12 files changed

+118
-18
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
SET statement_timeout = 0;
2+
3+
--bun:split
4+
5+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'SmallThinker-3B-Preview';
6+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'internlm3-8b-instruct';
7+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'DeepSeek-R1';
8+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'DeepSeek-R1-Zero';
9+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'DeepSeek-R1-Distill-Qwen-1.5B';
10+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'DeepSeek-R1-Distill-Qwen-7B';
11+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'DeepSeek-R1-Distill-Qwen-14B';
12+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'DeepSeek-R1-Distill-Qwen-32B';
13+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'DeepSeek-R1-Distill-Llama-8B';
14+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'DeepSeek-R1-Distill-Llama-70B';
15+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'phi-4';
16+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'MiniMax-Text-01';
17+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'MiniCPM-V-2_6';
18+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'MiniCPM-o-2_6';
19+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'MiniMax-VL-01';
20+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'Qwen2.5-7B-Instruct-1M';
21+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'Qwen2.5-14B-Instruct-1M';
22+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'UI-TARS-2B-SFT';
23+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'UI-TARS-7B-SFT';
24+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'UI-TARS-7B-DPO';
25+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'UI-TARS-72B-SFT';
26+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'UI-TARS-72B-DPO';
27+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'Qwen2.5-VL-3B-Instruct';
28+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'Qwen2.5-VL-7B-Instruct';
29+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'Qwen2.5-VL-72B-Instruct';
30+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'Janus-Pro-1B';
31+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'Janus-Pro-7B';
32+
DELETE FROM resource_models WHERE engine_name = 'ms-swift' AND model_name = 'Qwen2.5-Math-7B-PRM800K';
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
SET statement_timeout = 0;
2+
3+
--bun:split
4+
5+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'SmallThinker-3B-Preview', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
6+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'internlm3-8b-instruct', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
7+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'DeepSeek-R1', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
8+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'DeepSeek-R1-Zero', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
9+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'DeepSeek-R1-Distill-Qwen-1.5B', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
10+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'DeepSeek-R1-Distill-Qwen-7B', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
11+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'DeepSeek-R1-Distill-Qwen-14B', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
12+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'DeepSeek-R1-Distill-Qwen-32B', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
13+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'DeepSeek-R1-Distill-Llama-8B', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
14+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'DeepSeek-R1-Distill-Llama-70B', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
15+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'phi-4', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
16+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'MiniMax-Text-01', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
17+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'MiniCPM-V-2_6', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
18+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'MiniCPM-o-2_6', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
19+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'MiniMax-VL-01', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
20+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'Qwen2.5-7B-Instruct-1M', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
21+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'Qwen2.5-14B-Instruct-1M', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
22+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'UI-TARS-2B-SFT', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
23+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'UI-TARS-7B-SFT', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
24+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'UI-TARS-7B-DPO', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
25+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'UI-TARS-72B-SFT', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
26+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'UI-TARS-72B-DPO', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
27+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'Qwen2.5-VL-3B-Instruct', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
28+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'Qwen2.5-VL-7B-Instruct', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
29+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'Qwen2.5-VL-72B-Instruct', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
30+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'Janus-Pro-1B', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
31+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'Janus-Pro-7B', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;
32+
INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', 'Qwen2.5-Math-7B-PRM800K', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;

common/utils/common/repo.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,12 @@ func GetSourceTypeAndPathFromURL(url string) (string, string, error) {
179179
// get built-int task from tags
180180
func GetBuiltInTaskFromTags(tags []database.Tag) string {
181181
for _, tag := range tags {
182-
if tag.BuiltIn && tag.Category == "task" {
182+
if tag.Name == string(types.TextGeneration) {
183+
return tag.Name
184+
}
185+
if tag.Name == string(types.Text2Image) {
183186
return tag.Name
184187
}
185188
}
186-
return ""
189+
return string(types.TextGeneration)
187190
}

component/model_ce_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ func TestModelComponent_Deploy(t *testing.T) {
5353
RepoID: 1,
5454
SKU: "123",
5555
Type: types.ServerlessType,
56+
Task: "text-generation",
5657
}).Return(111, nil)
5758

5859
id, err := mc.Deploy(ctx, types.DeployActReq{

docker/finetune/Dockerfile.ms-swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,12 @@ RUN pip install --no-cache-dir jupyterlab numpy==1.26.4 \
3535
gradio-client==1.4.0
3636
# Create a working directory
3737
WORKDIR /etc/csghub
38-
RUN git clone https://github.com/modelscope/ms-swift.git --branch v3.0.1 --single-branch
38+
RUN git clone https://github.com/modelscope/ms-swift.git --branch v3.1.0 --single-branch
3939
RUN cd ms-swift && pip install --no-cache-dir -e "."
40-
#because this library is update frequently, we use new line
40+
#Due to the frequent updates of this library, we use a new line
4141
RUN pip install --no-cache-dir vllm==v0.6.3.post1 transformers==4.47.1 timm==1.0.11 evalscope==0.5.5
4242
#install flash-attn
43-
RUN pip install --no-build-isolation --no-cache-dir ninja flash-attn
43+
RUN pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
4444
# setup supervisord
4545
RUN mkdir -p /var/log/supervisord
4646
COPY swift/supervisord.conf /etc/supervisor/conf.d/supervisord.conf

docker/finetune/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ docker buildx build --platform linux/amd64,linux/arm64 \
2424
```
2525
## Build Multi-Platform Images for swift
2626
```bash
27-
#opencsg-registry.cn-beijing.cr.aliyuncs.com/public/ms-swift:v3.0.1
27+
#opencsg-registry.cn-beijing.cr.aliyuncs.com/public/ms-swift:v3.1.0
2828
export BUILDX_NO_DEFAULT_ATTESTATIONS=1
29-
export IMAGE_TAG=v3.0.1
29+
export IMAGE_TAG=v3.1.0
3030
docker buildx build --platform linux/amd64,linux/arm64 \
3131
-t ${OPENCSG_ACR}/public/ms-swift:${IMAGE_TAG} \
3232
-t ${OPENCSG_ACR}/public/ms-swift:latest \
@@ -49,7 +49,7 @@ docker buildx build --platform linux/amd64,linux/arm64 \
4949
| Image Name | Version | CUDA Version | Fix
5050
| --- | --- | --- |--- |
5151
| llama-factory | 1.21-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2 | 12.1 |- |
52-
| ms-swift | v3.0.1 | 12.4 |- |
52+
| ms-swift | v3.1.0 | 12.4 |- |
5353

5454

5555
## Run Finetune Image Locally
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import Any, List
2+
import argparse
3+
from swift.llm import MODEL_MAPPING, TEMPLATE_MAPPING, ModelType, TemplateType
4+
5+
6+
def get_url_suffix(model_id):
7+
if ':' in model_id:
8+
return model_id.split(':')[0]
9+
return model_id
10+
11+
12+
def generate_model_sql():
13+
for template in TemplateType.get_template_name_list():
14+
assert template in TEMPLATE_MAPPING
15+
16+
for model_type in ModelType.get_model_name_list():
17+
model_meta = MODEL_MAPPING[model_type]
18+
template = model_meta.template
19+
for group in model_meta.model_groups:
20+
for model in group.models:
21+
hf_model_id = model.hf_model_id
22+
if hf_model_id is None:
23+
continue
24+
namespace_and_name = hf_model_id.split('/')
25+
# generate sql and save to file
26+
sql = f"INSERT INTO resource_models (resource_name, engine_name, model_name, type) VALUES ('nvidia', 'ms-swift', '{namespace_and_name[1]}', 'gpu') ON CONFLICT (engine_name, model_name) DO NOTHING;"
27+
with open("resource_model.sql", 'a') as file:
28+
file.write(sql + '\n')
29+
30+
31+
if __name__ == '__main__':
32+
generate_model_sql()

docker/inference/Dockerfile.sglang

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM lmsysorg/sglang:v0.4.1.post3-cu124-srt
1+
FROM lmsysorg/sglang:v0.4.2.post2-cu124-srt
22
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
33
RUN apt-get update && apt-get install -y dumb-init && apt-get clean && rm -rf /var/lib/apt/lists/*
44
RUN pip install --no-cache-dir csghub-sdk==0.4.6

docker/inference/Dockerfile.vllm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM vllm/vllm-openai:v0.6.3.post1
1+
FROM vllm/vllm-openai:v0.7.2
22
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
33
RUN pip install --no-cache-dir csghub-sdk==0.4.3 ray supervisor huggingface-hub==0.27.0
44
RUN apt-get update && apt-get install -y supervisor

docker/inference/README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAM
1212
```bash
1313
export BUILDX_NO_DEFAULT_ATTESTATIONS=1
1414

15-
# For vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:3.2
16-
export IMAGE_TAG=3.2
15+
# For vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:v0.7.2
16+
export IMAGE_TAG=v0.7.2
1717
docker buildx build --platform linux/amd64,linux/arm64 \
1818
-t ${OPENCSG_ACR}/public/vllm-local:${IMAGE_TAG} \
1919
-t ${OPENCSG_ACR}/public/vllm-local:latest \
@@ -36,8 +36,8 @@ docker buildx build --platform linux/amd64 \
3636
-f Dockerfile.tgi \
3737
--push .
3838

39-
# For sglang: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/sglang:v0.4.1.post3-cu124-srt
40-
export IMAGE_TAG=v0.4.1.post3-cu124-srt
39+
# For sglang: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/sglang:v0.4.2.post2-cu124-srt
40+
export IMAGE_TAG=v0.4.2.post2-cu124-srt
4141
docker buildx build --platform linux/amd64 \
4242
-t ${OPENCSG_ACR}/public/sglang:${IMAGE_TAG} \
4343
-t ${OPENCSG_ACR}/public/sglang:latest \
@@ -88,11 +88,11 @@ docker run -d \
8888
| Task| Image Name | Version | CUDA Version | Fix
8989
| --- | --- | --- | --- |--- |
9090
|text generation| vllm | 2.8 | 12.1 | - |
91-
|text generation| vllm | 3.2 | 12.4 |fix hf hub timestamp|
91+
|text generation| vllm | v0.7.1 | 12.4 |fix hf hub timestamp|
9292
|text generation| vllm-cpu | 2.4 | -|fix hf hub timestamp |
9393
|text generation| tgi | 2.2 | 12.1 |- |
9494
|text generation| tgi | 3.2 | 12.4 |fix hf hub timestamp|
95-
|image generation| hf-inference-toolkit | 0.3.5 | 12.1 |-|
95+
|image generation| hf-inference-toolkit | 0.5.3 | 12.1 |-|
9696
|text generation| sglang | v0.4.1.post3-cu124-srt | 12.4 |- |
9797

9898

0 commit comments

Comments
 (0)