Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions .github/actions/ecr-authenticate/action.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
name: ECR Authentication
description: Login to ECR, if image_uri is provided pull the image onto host runner.
description: Login to ECR, if image-uri is provided pull the image onto host runner.

inputs:
aws_region:
description: AWS Region for docker image repository
required: true
aws_account_id:
aws-account-id:
description: AWS Account ID for docker image registry
required: true
image_uri:
aws-region:
description: AWS Region for docker image repository
required: true
image-uri:
description: Docker image URI to pull from ECR
required: false

Expand All @@ -19,10 +19,10 @@ runs:
- name: ECR login
shell: bash
run: |
aws ecr get-login-password --region ${{ inputs.aws_region }} | docker login --username AWS --password-stdin ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_region }}.amazonaws.com
aws ecr get-login-password --region ${{ inputs.aws-region }} | docker login --username AWS --password-stdin ${{ inputs.aws-account-id }}.dkr.ecr.${{ inputs.aws-region }}.amazonaws.com

- name: Pull image
if: inputs.image_uri != ''
if: inputs.image-uri != ''
shell: bash
run: |
docker pull ${{ inputs.image_uri }}
docker pull ${{ inputs.image-uri }}
150 changes: 108 additions & 42 deletions .github/workflows/pr-sglang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,46 +10,65 @@ on:
permissions:
contents: read

concurrency:
group: pr-sglang-${{ github.event.pull_request.number }}
cancel-in-progress: true

env:
SGLANG_VERSION: "0.5.5"
PYTHON_VERSION: "py312"
CUDA_VERSION: "cu129"
OS_VERSION: "ubuntu22.04"
PROD_SAGEMAKER_IMAGE: sglang:0.5-gpu-py312
FORCE_COLOR: "1"

jobs:
check-changes:
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-check-changes-${{ github.event.pull_request.number }}
cancel-in-progress: true
outputs:
sglang-sagemaker: ${{ steps.changes.outputs.sglang-sagemaker }}
build-change: ${{ steps.changes.outputs.build-change }}
test-change: ${{ steps.changes.outputs.test-change }}
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
- name: Checkout DLC source
uses: actions/checkout@v5

- name: Setup python
uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: pre-commit/action@v3.0.1

- name: Run pre-commit
uses: pre-commit/action@v3.0.1
with:
extra_args: --all-files

- name: Detect file changes
id: changes
uses: dorny/paths-filter@v3
with:
filters: |
sglang-sagemaker:
- "docker/sglang/Dockerfile"
build-change:
- "docker/sglang/**"
- "scripts/sglang/**"
- "scripts/common/**"
- "scripts/telemetry/**"
- ".github/workflows/pr-sglang*"
test-change:
- "test/sglang/**"

# ======================================================
# =============== SGLang SageMaker jobs ================
# ======================================================
build-sglang-image:
needs: [check-changes]
if: needs.check-changes.outputs.sglang-sagemaker == 'true'
if: needs.check-changes.outputs.build-change == 'true'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-build-runner
concurrency:
group: ${{ github.workflow }}-build-sglang-image-${{ github.event.pull_request.number }}
cancel-in-progress: true
outputs:
image-uri: ${{ steps.image-uri-build.outputs.IMAGE_URI }}
ci-image: ${{ steps.image-uri-build.outputs.CI_IMAGE_URI }}
steps:
- uses: actions/checkout@v5
- run: .github/scripts/runner_setup.sh
Expand All @@ -58,16 +77,16 @@ jobs:
- name: ECR login
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
aws-region: ${{ vars.AWS_REGION }}

- name: Resolve image URI for build
id: image-uri-build
run: |
IMAGE_URI=${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:sglang-${{ env.SGLANG_VERSION }}-gpu-${{ env.PYTHON_VERSION }}-${{ env.CUDA_VERSION }}-${{ env.OS_VERSION }}-sagemaker-pr-${{ github.event.pull_request.number }}
echo "Image URI to build: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_OUTPUT
CI_IMAGE_URI=${{ vars.CI_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:sglang-${{ env.SGLANG_VERSION }}-gpu-${{ env.PYTHON_VERSION }}-${{ env.CUDA_VERSION }}-${{ env.OS_VERSION }}-sagemaker-pr-${{ github.event.pull_request.number }}
echo "Image URI to build: ${CI_IMAGE_URI}"
echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_ENV}
echo "CI_IMAGE_URI=${CI_IMAGE_URI}" >> ${GITHUB_OUTPUT}

- name: Build image
run: |
Expand All @@ -76,32 +95,69 @@ jobs:
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
--build-arg BASE_IMAGE="lmsysorg/sglang:v${{ env.SGLANG_VERSION }}-${{ env.CUDA_VERSION }}-amd64" \
--cache-to=type=inline \
--cache-from=type=registry,ref=${IMAGE_URI} \
--tag ${IMAGE_URI} \
--cache-from=type=registry,ref=${CI_IMAGE_URI} \
--tag ${CI_IMAGE_URI} \
--target sglang-sagemaker \
-f docker/sglang/Dockerfile .

- name: Container push
run: |
docker push ${IMAGE_URI}
docker rmi ${IMAGE_URI}
docker push ${CI_IMAGE_URI}
docker rmi ${CI_IMAGE_URI}

set-test-environment:
needs: [check-changes, build-sglang-image]
if: |
always() && !failure() && !cancelled() &&
(needs.build-sglang-image.result == 'success' ||
(needs.check-changes.outputs.build-change == 'false') && (needs.check-changes.outputs.test-change == 'true'))
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-set-test-environment-${{ github.event.pull_request.number }}
cancel-in-progress: true
outputs:
aws-account-id: ${{ steps.set-env.outputs.AWS_ACCOUNT_ID }}
image-uri: ${{ steps.set-env.outputs.IMAGE_URI }}
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set test environment
id: set-env
run: |
if [[ "${{ needs.build-sglang-image.result }}" == "success" ]]; then
AWS_ACCOUNT_ID=${{ vars.CI_AWS_ACCOUNT_ID }}
IMAGE_URI=${{ needs.build-sglang-image.outputs.ci-image }}
else
AWS_ACCOUNT_ID=${{ vars.PROD_AWS_ACCOUNT_ID }}
IMAGE_URI=${{ vars.PROD_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ env.PROD_SAGEMAKER_IMAGE }}
fi

echo "Image URI to test: ${IMAGE_URI}"
echo "AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID}" >> ${GITHUB_OUTPUT}
echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT}

sglang-local-benchmark-test:
needs: [build-sglang-image]
if: needs.build-sglang-image.result == 'success'
needs: [set-test-environment]
if: |
always() && !failure() && !cancelled() &&
needs.set-test-environment.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
concurrency:
group: ${{ github.workflow }}-sglang-local-benchmark-test-${{ github.event.pull_request.number }}
cancel-in-progress: true
steps:
- name: Checkout DLC source
uses: actions/checkout@v5

- name: Container pull
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
image_uri: ${{ needs.build-sglang-image.outputs.image-uri }}
aws-account-id: ${{ needs.set-test-environment.outputs.aws-account-id }}
aws-region: ${{ vars.AWS_REGION }}
image-uri: ${{ needs.set-test-environment.outputs.image-uri }}

- name: Setup for SGLang datasets
run: |
Expand All @@ -124,9 +180,9 @@ jobs:
-e SM_SGLANG_HOST=127.0.0.1 \
-e SM_SGLANG_PORT=30000 \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${{ needs.build-sglang-image.outputs.image-uri }})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
echo "Waiting for container startup ..."
${{ needs.set-test-environment.outputs.image-uri }})
echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
echo "Waiting for serving endpoint startup ..."
sleep 60s
docker logs ${CONTAINER_ID}

Expand All @@ -144,24 +200,29 @@ jobs:
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}
container_id: ${CONTAINER_ID}

sglang-lang-test:
needs: [build-sglang-image]
if: needs.build-sglang-image.result == 'success'
needs: [set-test-environment]
if: |
always() && !failure() && !cancelled() &&
needs.set-test-environment.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6exl-runner
concurrency:
group: ${{ github.workflow }}-sglang-lang-test-${{ github.event.pull_request.number }}
cancel-in-progress: true
steps:
- name: Checkout DLC source
uses: actions/checkout@v5

- name: Container pull
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
image_uri: ${{ needs.build-sglang-image.outputs.image-uri }}
aws-account-id: ${{ needs.set-test-environment.outputs.aws-account-id }}
aws-region: ${{ vars.AWS_REGION }}
image-uri: ${{ needs.set-test-environment.outputs.image-uri }}

- name: Checkout SGLang tests
uses: actions/checkout@v5
Expand All @@ -176,8 +237,8 @@ jobs:
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ./sglang_source:/workdir --workdir /workdir \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${{ needs.build-sglang-image.outputs.image-uri }})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
${{ needs.set-test-environment.outputs.image-uri }})
echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}

- name: Setup for SGLang tests
run: |
Expand All @@ -202,14 +263,19 @@ jobs:
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}
container_id: ${CONTAINER_ID}

sglang-sagemaker-endpoint-test:
needs: [build-sglang-image]
if: needs.build-sglang-image.result == 'success'
needs: [set-test-environment]
if: |
always() && !failure() && !cancelled() &&
needs.set-test-environment.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:default-runner
concurrency:
group: ${{ github.workflow }}-sglang-sagemaker-endpoint-test-${{ github.event.pull_request.number }}
cancel-in-progress: false
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
Expand All @@ -227,4 +293,4 @@ jobs:
run: |
source .venv/bin/activate
cd test/
python3 -m pytest -vs -rA --image-uri ${{ needs.build-sglang-image.outputs.image-uri }} sglang/sagemaker
python3 -m pytest -vs -rA --image-uri ${{ needs.set-test-environment.outputs.image-uri }} sglang/sagemaker
Loading