Skip to content

Commit f09a1d1

Browse files
authored
Build/Test workflow trigger based on file changes and job-level concurrency for SageMaker endpoint tests (#5507)
* use reusable workflow Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix ecr Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * set variable Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix bash Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * echo Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * rename ci image uri Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * remove echo Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * test only test-change Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix condition Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix condition Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix condition Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix condition Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix condition Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix concurrency Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix if Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix concurrency naming Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * revert file change Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * test echo Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * change conncurency Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * add job name Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * fix concurrency naming Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * add echo Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * remove name Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * change cleanup variable name Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * test cleanup Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * use ci account id Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> * vllm concurrency Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com> --------- Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com>
1 parent 6e3c309 commit f09a1d1

File tree

4 files changed

+430
-181
lines changed

4 files changed

+430
-181
lines changed
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
name: ECR Authentication
2-
description: Login to ECR, if image_uri is provided pull the image onto host runner.
2+
description: Login to ECR, if image-uri is provided pull the image onto host runner.
33

44
inputs:
5-
aws_region:
6-
description: AWS Region for docker image repository
7-
required: true
8-
aws_account_id:
5+
aws-account-id:
96
description: AWS Account ID for docker image registry
107
required: true
11-
image_uri:
8+
aws-region:
9+
description: AWS Region for docker image repository
10+
required: true
11+
image-uri:
1212
description: Docker image URI to pull from ECR
1313
required: false
1414

@@ -19,10 +19,10 @@ runs:
1919
- name: ECR login
2020
shell: bash
2121
run: |
22-
aws ecr get-login-password --region ${{ inputs.aws_region }} | docker login --username AWS --password-stdin ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_region }}.amazonaws.com
22+
aws ecr get-login-password --region ${{ inputs.aws-region }} | docker login --username AWS --password-stdin ${{ inputs.aws-account-id }}.dkr.ecr.${{ inputs.aws-region }}.amazonaws.com
2323
2424
- name: Pull image
25-
if: inputs.image_uri != ''
25+
if: inputs.image-uri != ''
2626
shell: bash
2727
run: |
28-
docker pull ${{ inputs.image_uri }}
28+
docker pull ${{ inputs.image-uri }}

.github/workflows/pr-sglang.yml

Lines changed: 108 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,46 +10,65 @@ on:
1010
permissions:
1111
contents: read
1212

13-
concurrency:
14-
group: pr-sglang-${{ github.event.pull_request.number }}
15-
cancel-in-progress: true
16-
1713
env:
1814
SGLANG_VERSION: "0.5.5"
1915
PYTHON_VERSION: "py312"
2016
CUDA_VERSION: "cu129"
2117
OS_VERSION: "ubuntu22.04"
18+
PROD_SAGEMAKER_IMAGE: sglang:0.5-gpu-py312
2219
FORCE_COLOR: "1"
2320

2421
jobs:
2522
check-changes:
2623
runs-on: ubuntu-latest
24+
concurrency:
25+
group: ${{ github.workflow }}-check-changes-${{ github.event.pull_request.number }}
26+
cancel-in-progress: true
2727
outputs:
28-
sglang-sagemaker: ${{ steps.changes.outputs.sglang-sagemaker }}
28+
build-change: ${{ steps.changes.outputs.build-change }}
29+
test-change: ${{ steps.changes.outputs.test-change }}
2930
steps:
30-
- uses: actions/checkout@v5
31-
- uses: actions/setup-python@v6
31+
- name: Checkout DLC source
32+
uses: actions/checkout@v5
33+
34+
- name: Setup python
35+
uses: actions/setup-python@v6
3236
with:
3337
python-version: "3.12"
34-
- uses: pre-commit/action@v3.0.1
38+
39+
- name: Run pre-commit
40+
uses: pre-commit/action@v3.0.1
3541
with:
3642
extra_args: --all-files
43+
3744
- name: Detect file changes
3845
id: changes
3946
uses: dorny/paths-filter@v3
4047
with:
4148
filters: |
42-
sglang-sagemaker:
43-
- "docker/sglang/Dockerfile"
49+
build-change:
50+
- "docker/sglang/**"
51+
- "scripts/sglang/**"
52+
- "scripts/common/**"
53+
- "scripts/telemetry/**"
54+
- ".github/workflows/pr-sglang*"
55+
test-change:
56+
- "test/sglang/**"
4457
58+
# ======================================================
59+
# =============== SGLang SageMaker jobs ================
60+
# ======================================================
4561
build-sglang-image:
4662
needs: [check-changes]
47-
if: needs.check-changes.outputs.sglang-sagemaker == 'true'
63+
if: needs.check-changes.outputs.build-change == 'true'
4864
runs-on:
4965
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
5066
fleet:x86-build-runner
67+
concurrency:
68+
group: ${{ github.workflow }}-build-sglang-image-${{ github.event.pull_request.number }}
69+
cancel-in-progress: true
5170
outputs:
52-
image-uri: ${{ steps.image-uri-build.outputs.IMAGE_URI }}
71+
ci-image: ${{ steps.image-uri-build.outputs.CI_IMAGE_URI }}
5372
steps:
5473
- uses: actions/checkout@v5
5574
- run: .github/scripts/runner_setup.sh
@@ -58,16 +77,16 @@ jobs:
5877
- name: ECR login
5978
uses: ./.github/actions/ecr-authenticate
6079
with:
61-
aws_region: ${{ vars.AWS_REGION }}
62-
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
80+
aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
81+
aws-region: ${{ vars.AWS_REGION }}
6382

6483
- name: Resolve image URI for build
6584
id: image-uri-build
6685
run: |
67-
IMAGE_URI=${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:sglang-${{ env.SGLANG_VERSION }}-gpu-${{ env.PYTHON_VERSION }}-${{ env.CUDA_VERSION }}-${{ env.OS_VERSION }}-sagemaker-pr-${{ github.event.pull_request.number }}
68-
echo "Image URI to build: $IMAGE_URI"
69-
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
70-
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_OUTPUT
86+
CI_IMAGE_URI=${{ vars.CI_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:sglang-${{ env.SGLANG_VERSION }}-gpu-${{ env.PYTHON_VERSION }}-${{ env.CUDA_VERSION }}-${{ env.OS_VERSION }}-sagemaker-pr-${{ github.event.pull_request.number }}
87+
echo "Image URI to build: ${CI_IMAGE_URI}"
88+
echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_ENV}
89+
echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_OUTPUT}
7190
7291
- name: Build image
7392
run: |
@@ -76,32 +95,69 @@ jobs:
7695
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
7796
--build-arg BASE_IMAGE="lmsysorg/sglang:v${{ env.SGLANG_VERSION }}-${{ env.CUDA_VERSION }}-amd64" \
7897
--cache-to=type=inline \
79-
--cache-from=type=registry,ref=${IMAGE_URI} \
80-
--tag ${IMAGE_URI} \
98+
--cache-from=type=registry,ref=${CI_IMAGE_URI} \
99+
--tag ${CI_IMAGE_URI} \
81100
--target sglang-sagemaker \
82101
-f docker/sglang/Dockerfile .
83102
84103
- name: Container push
85104
run: |
86-
docker push ${IMAGE_URI}
87-
docker rmi ${IMAGE_URI}
105+
docker push ${CI_IMAGE_URI}
106+
docker rmi ${CI_IMAGE_URI}
107+
108+
set-test-environment:
109+
needs: [check-changes, build-sglang-image]
110+
if: |
111+
always() && !failure() && !cancelled() &&
112+
(needs.build-sglang-image.result == 'success' ||
113+
(needs.check-changes.outputs.build-change == 'false') && (needs.check-changes.outputs.test-change == 'true'))
114+
runs-on: ubuntu-latest
115+
concurrency:
116+
group: ${{ github.workflow }}-set-test-environment-${{ github.event.pull_request.number }}
117+
cancel-in-progress: true
118+
outputs:
119+
aws-account-id: ${{ steps.set-env.outputs.AWS_ACCOUNT_ID }}
120+
image-uri: ${{ steps.set-env.outputs.IMAGE_URI }}
121+
steps:
122+
- name: Checkout code
123+
uses: actions/checkout@v4
124+
125+
- name: Set test environment
126+
id: set-env
127+
run: |
128+
if [[ "${{ needs.build-sglang-image.result }}" == "success" ]]; then
129+
AWS_ACCOUNT_ID=${{ vars.CI_AWS_ACCOUNT_ID }}
130+
IMAGE_URI=${{ needs.build-sglang-image.outputs.ci-image }}
131+
else
132+
AWS_ACCOUNT_ID=${{ vars.PROD_AWS_ACCOUNT_ID }}
133+
IMAGE_URI=${{ vars.PROD_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ env.PROD_SAGEMAKER_IMAGE }}
134+
fi
135+
136+
echo "Image URI to test: ${IMAGE_URI}"
137+
echo "AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID}" >> ${GITHUB_OUTPUT}
138+
echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT}
88139
89140
sglang-local-benchmark-test:
90-
needs: [build-sglang-image]
91-
if: needs.build-sglang-image.result == 'success'
141+
needs: [set-test-environment]
142+
if: |
143+
always() && !failure() && !cancelled() &&
144+
needs.set-test-environment.result == 'success'
92145
runs-on:
93146
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
94147
fleet:x86-g6xl-runner
148+
concurrency:
149+
group: ${{ github.workflow }}-sglang-local-benchmark-test-${{ github.event.pull_request.number }}
150+
cancel-in-progress: true
95151
steps:
96152
- name: Checkout DLC source
97153
uses: actions/checkout@v5
98154

99155
- name: Container pull
100156
uses: ./.github/actions/ecr-authenticate
101157
with:
102-
aws_region: ${{ vars.AWS_REGION }}
103-
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
104-
image_uri: ${{ needs.build-sglang-image.outputs.image-uri }}
158+
aws-account-id: ${{ needs.set-test-environment.outputs.aws-account-id }}
159+
aws-region: ${{ vars.AWS_REGION }}
160+
image-uri: ${{ needs.set-test-environment.outputs.image-uri }}
105161

106162
- name: Setup for SGLang datasets
107163
run: |
@@ -124,9 +180,9 @@ jobs:
124180
-e SM_SGLANG_HOST=127.0.0.1 \
125181
-e SM_SGLANG_PORT=30000 \
126182
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
127-
${{ needs.build-sglang-image.outputs.image-uri }})
128-
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
129-
echo "Waiting for container startup ..."
183+
${{ needs.set-test-environment.outputs.image-uri }})
184+
echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
185+
echo "Waiting for serving endpoint startup ..."
130186
sleep 60s
131187
docker logs ${CONTAINER_ID}
132188
@@ -144,24 +200,29 @@ jobs:
144200
if: always()
145201
uses: ./.github/actions/container-cleanup
146202
with:
147-
container_id: ${{ env.CONTAINER_ID }}
203+
container_id: ${CONTAINER_ID}
148204

149205
sglang-lang-test:
150-
needs: [build-sglang-image]
151-
if: needs.build-sglang-image.result == 'success'
206+
needs: [set-test-environment]
207+
if: |
208+
always() && !failure() && !cancelled() &&
209+
needs.set-test-environment.result == 'success'
152210
runs-on:
153211
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
154212
fleet:x86-g6exl-runner
213+
concurrency:
214+
group: ${{ github.workflow }}-sglang-lang-test-${{ github.event.pull_request.number }}
215+
cancel-in-progress: true
155216
steps:
156217
- name: Checkout DLC source
157218
uses: actions/checkout@v5
158219

159220
- name: Container pull
160221
uses: ./.github/actions/ecr-authenticate
161222
with:
162-
aws_region: ${{ vars.AWS_REGION }}
163-
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
164-
image_uri: ${{ needs.build-sglang-image.outputs.image-uri }}
223+
aws-account-id: ${{ needs.set-test-environment.outputs.aws-account-id }}
224+
aws-region: ${{ vars.AWS_REGION }}
225+
image-uri: ${{ needs.set-test-environment.outputs.image-uri }}
165226

166227
- name: Checkout SGLang tests
167228
uses: actions/checkout@v5
@@ -176,8 +237,8 @@ jobs:
176237
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
177238
-v ./sglang_source:/workdir --workdir /workdir \
178239
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
179-
${{ needs.build-sglang-image.outputs.image-uri }})
180-
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
240+
${{ needs.set-test-environment.outputs.image-uri }})
241+
echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
181242
182243
- name: Setup for SGLang tests
183244
run: |
@@ -202,14 +263,19 @@ jobs:
202263
if: always()
203264
uses: ./.github/actions/container-cleanup
204265
with:
205-
container_id: ${{ env.CONTAINER_ID }}
266+
container_id: ${CONTAINER_ID}
206267

207268
sglang-sagemaker-endpoint-test:
208-
needs: [build-sglang-image]
209-
if: needs.build-sglang-image.result == 'success'
269+
needs: [set-test-environment]
270+
if: |
271+
always() && !failure() && !cancelled() &&
272+
needs.set-test-environment.result == 'success'
210273
runs-on:
211274
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
212275
fleet:default-runner
276+
concurrency:
277+
group: ${{ github.workflow }}-sglang-sagemaker-endpoint-test-${{ github.event.pull_request.number }}
278+
cancel-in-progress: false
213279
steps:
214280
- name: Checkout DLC source
215281
uses: actions/checkout@v5
@@ -227,4 +293,4 @@ jobs:
227293
run: |
228294
source .venv/bin/activate
229295
cd test/
230-
python3 -m pytest -vs -rA --image-uri ${{ needs.build-sglang-image.outputs.image-uri }} sglang/sagemaker
296+
python3 -m pytest -vs -rA --image-uri ${{ needs.set-test-environment.outputs.image-uri }} sglang/sagemaker

0 commit comments

Comments
 (0)