generated from amazon-archives/__template_Apache-2.0
-
Notifications
You must be signed in to change notification settings - Fork 533
145 lines (129 loc) · 4.9 KB
/
pr-sglang.yml
File metadata and controls
145 lines (129 loc) · 4.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
name: PR - SGLang
on:
pull_request:
branches:
- main
paths:
- "docker/sglang/**"
permissions:
contents: read
concurrency:
group: pr-sglang-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
check-changes:
runs-on: ubuntu-latest
outputs:
sglang-sagemaker: ${{ steps.changes.outputs.sglang-sagemaker }}
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: pre-commit/action@v3.0.1
with:
extra_args: --all-files
- name: Detect file changes
id: changes
uses: dorny/paths-filter@v3
with:
filters: |
sglang-sagemaker:
- "docker/sglang/Dockerfile"
build-sglang-image:
needs: [check-changes]
if: needs.check-changes.outputs.sglang-sagemaker == 'true'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-build-runner
steps:
- uses: actions/checkout@v5
- run: .github/scripts/runner_setup.sh
- run: .github/scripts/buildkitd.sh
- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
- name: Resolve image URI for build
id: image_uri_build
run: |
IMAGE_URI=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:sglang-0.5.5-gpu-py312-cu129-ubuntu22.04-sagemaker-pr-${{ github.event.pull_request.number }}
echo "Image URI to build: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
- name: Build image
run: |
docker buildx build --progress plain \
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
--cache-to=type=inline \
--cache-from=type=registry,ref=$IMAGE_URI \
--tag $IMAGE_URI \
--target sglang-sagemaker \
-f docker/sglang/Dockerfile .
- name: Docker push and save image URI artifact
run: |
docker push $IMAGE_URI
docker rmi $IMAGE_URI
echo $IMAGE_URI > ${{ secrets.IMAGE_URI_FILE }}
- name: Upload image URI artifact
uses: actions/upload-artifact@v4
with:
name: sglang-sagemaker-image-uri
path: ${{ secrets.IMAGE_URI_FILE }}
sglang-local-benchmark-test:
needs: [build-sglang-image]
if: needs.build-sglang-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
- name: Container pull
uses: ./.github/actions/container-pull
with:
aws_region: ${{ secrets.AWS_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
image_uri_file: ${{ secrets.IMAGE_URI_FILE }}
artifact_name: sglang-sagemaker-image-uri
- name: Setup for SGLang datasets
run: |
mkdir -p /tmp/sglang/dataset
if [ ! -f /tmp/sglang/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
echo "Downloading ShareGPT dataset..."
wget -P /tmp/sglang/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
else
echo "ShareGPT dataset already exists. Skipping download."
fi
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v /tmp/sglang/dataset:/dataset \
-v ./sglang_source:/workdir --workdir /workdir \
-p 30000:30000 \
-e SM_SGLANG_MODEL_PATH=Qwen/Qwen3-0.6B \
-e SM_SGLANG_REASONING_PARSER=qwen3 \
-e SM_SGLANG_HOST=127.0.0.1 \
-e SM_SGLANG_PORT=30000 \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${IMAGE_URI})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
echo "Waiting for container startup ..."
sleep 300s
docker logs ${CONTAINER_ID}
- name: Run SGLang tests
run: |
docker exec ${CONTAINER_ID} python3 -m sglang.bench_serving \
--backend sglang \
--host 127.0.0.1 --port 30000 \
--num-prompts 1000 \
--model Qwen/Qwen3-0.6B \
--dataset-name sharegpt \
--dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json
- name: Cleanup SGLang datasets
run: |
rm -rf /tmp/sglang/dataset
- name: Cleanup container and images
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}