Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
499cf57
add integ tests executor link in PR template
HappyAmazonian Dec 31, 2024
d407f84
try lmi-distro workflow
HappyAmazonian Jan 2, 2025
3a6bea4
Merge branch 'master' of https://github.com/deepjavalibrary/djl-servi…
HappyAmazonian Jan 2, 2025
e1743ce
add default value for input.mode in nightly workflow
HappyAmazonian Jan 2, 2025
48e19cf
reset
HappyAmazonian Jan 2, 2025
6dc35fb
Merge branch 'deepjavalibrary:master' into djl-master
HappyAmazonian Feb 2, 2025
8531d95
Merge branch 'deepjavalibrary:master' into djl-master
HappyAmazonian Feb 5, 2025
985b3b6
Merge branch 'deepjavalibrary:master' into djl-master
HappyAmazonian Feb 6, 2025
111afeb
Merge branch 'deepjavalibrary:master' into djl-master
HappyAmazonian Feb 12, 2025
49ffa91
Merge branch 'deepjavalibrary:master' into djl-master
HappyAmazonian Feb 14, 2025
545aaf2
init comit
HappyAmazonian Feb 15, 2025
df2de18
use integ yml for testing
HappyAmazonian Feb 15, 2025
44950e6
fix repo name
HappyAmazonian Feb 17, 2025
8b7aaea
test image compute
HappyAmazonian Feb 17, 2025
abee6e4
fix typo
HappyAmazonian Feb 17, 2025
3a05893
fix typo
HappyAmazonian Feb 17, 2025
684c454
print
HappyAmazonian Feb 17, 2025
94993e0
add load format to config
HappyAmazonian Feb 17, 2025
004f934
try multinode
HappyAmazonian Feb 17, 2025
9adbe73
fix typo
HappyAmazonian Feb 17, 2025
639c3b5
move to individual file
HappyAmazonian Feb 17, 2025
05bb275
fix dependency
HappyAmazonian Feb 18, 2025
0ac26d7
fix lmi
HappyAmazonian Feb 18, 2025
7264a43
fix to master
HappyAmazonian Feb 18, 2025
a407068
fix styling
HappyAmazonian Feb 18, 2025
9bec083
format
HappyAmazonian Feb 18, 2025
af91ac1
add other model sharding test
HappyAmazonian Feb 18, 2025
6e83894
set runner id for different sub workflows
HappyAmazonian Feb 18, 2025
3eed8a3
add clean
HappyAmazonian Feb 19, 2025
815e0c4
Merge branch 'deepjavalibrary:master' into djl-master
HappyAmazonian Feb 19, 2025
f281f17
merge
HappyAmazonian Feb 19, 2025
6c6aefd
fix typo during merge
HappyAmazonian Feb 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ jobs:
- ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_ID-{0}', github.run_id) }}
- ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_NUMBER-{0}', github.run_number) }}
- ${{ matrix.test.gh-runner && matrix.test.instance || format('SHA-{0}', github.sha) }}
- ${{ matrix.test.gh-runner && matrix.test.instance || format('JOB-{0}', 'create-runners') }}
- ${{ matrix.test.instance }}
timeout-minutes: 120
needs: create-runners
Expand Down
17 changes: 14 additions & 3 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,19 @@ jobs:
with:
djl-version: ${{ needs.build.outputs.djl_version }}
tag-suffix: ${{ inputs.mode == 'nightly' && format('{0}-{1}', 'nightly', github.sha) || github.sha }}

optimization-integration-test:
needs: [build]
uses: ./.github/workflows/optimization_integration.yml
secrets: inherit
with:
djl-version: ${{ needs.build.outputs.djl_version }}
tag-suffix: ${{ inputs.mode == 'nightly' && format('{0}-{1}', 'nightly', github.sha) || github.sha }}


determine_images_to_publish:
if: always()
needs: [ integration-test ]
needs: [ integration-test, optimization-integration-test ]
runs-on: ubuntu-latest
outputs:
images: ${{ steps.generate-images.outputs.images }}
Expand All @@ -60,9 +70,10 @@ jobs:
if [[ "${{ needs.integration-test.outputs.failure_aarch64 }}" == "0" ]]; then
images+=("aarch64")
fi
if [[ "${{ needs.integration-test.outputs.failure_lmi }}" == "0" ]]; then
if [[ "${{ needs.integration-test.outputs.failure_lmi }}" == "0" &&
"${{ needs.optimization-integration-test.outputs.failure_lmi }}" == "0" ]]; then
images+=("lmi")
fi
fi
if [[ "${{ needs.integration-test.outputs.failure_trtllm }}" == "0" ]]; then
images+=("tensorrt-llm")
fi
Expand Down
257 changes: 257 additions & 0 deletions .github/workflows/optimization_integration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
name: Optimization Integration tests

on:
workflow_dispatch:
inputs:
djl-version:
description: 'The released version of DJL.'
required: true
default: ''
tag-suffix:
description: 'Run tests on the specific tags suffix i.e. arch-{suffix}'
required: false
type: string
default: 'nightly'
image-repo:
description: 'The repository to fetch images from'
required: false
type: string
default: '185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp'
workflow_call:
inputs:
djl-version:
description: 'The released version of DJL.'
required: false
type: string
default: ''
tag-suffix:
description: 'Run tests on the specific tags suffix i.e. arch-{suffix}'
required: false
type: string
default: 'nightly'
image-repo:
description: 'The repository to fetch images from'
required: false
type: string
default: '185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp'
outputs:
failure_lmi:
value: ${{ jobs.neo-test.outputs.failure_lmi || '0' }}

permissions:
id-token: write
contents: read

jobs:
create-optimization-runners:
runs-on: [self-hosted, scheduler]
steps:
- name: Create new G6 instance
id: create_g6
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
- name: Create new G6 instance
id: create_g6_2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
- name: Create new P4D instance
id: create_p4d
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_lmic_p4d $token djl-serving
outputs:
gpu_instance_id_1: ${{ steps.create_g6.outputs.action_g6_instance_id }}
gpu_instance_id_2: ${{ steps.create_g6_2.outputs.action_g6_instance_id }}
gpu_instance_id_3: ${{ steps.create_p4d.outputs.action_lmic_p4d_instance_id }}

neo-test:
runs-on:
- ${{ matrix.test.gh-runner && matrix.test.instance || 'self-hosted' }}
- ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_ID-{0}', github.run_id) }}
- ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_NUMBER-{0}', github.run_number) }}
- ${{ matrix.test.gh-runner && matrix.test.instance || format('SHA-{0}', github.sha) }}
- ${{ matrix.test.gh-runner && matrix.test.instance || format('JOB-{0}', 'create-optimization-runners') }}
- ${{ matrix.test.instance }}
timeout-minutes: 120
needs: create-optimization-runners
strategy:
fail-fast: false
matrix:
test:
- test: MultinodeSharding
instance: g6
test_handler: vllm_neo
test_model_config: llama-3.1-8b-multi-node-sharding
test_serve_config: llama-3.1-8b
failure-prefix: lmi
- test: BasicSharding-g6
instance: g6
test_handler: vllm_neo
test_model_config: tiny-llama-fml
test_serve_config: tiny-llama-fml
include_fast_model_loading_s3_test: true
failure-prefix: lmi
- test: BasicSharding-p4d
instance: p4d
test_handler: vllm_neo
test_model_config: tiny-llama-fml
test_serve_config: tiny-llama-fml
include_fast_model_loading_s3_test: true
failure-prefix: lmi
- test: LoraSharding-g6
instance: g6
test_handler: vllm_neo
test_model_config: tiny-llama-lora-fml
test_serve_config: tiny-llama-lora-fml
include_fast_model_loading_s3_test: true
failure-prefix: lmi
- test: LoraSharding-p4d
instance: p4d
test_handler: vllm_neo
test_model_config: tiny-llama-lora-fml
test_serve_config: tiny-llama-lora-fml
include_fast_model_loading_s3_test: true
failure-prefix: lmi
outputs:
failure_lmi: ${{ steps.test-failure.outputs.failure_lmi }}
steps:
- name: Show environment
run: |
nvidia-smi -L
- name: Clean env
run: |
sudo rm -rf tests/integration/models
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- uses: actions/checkout@v4
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests numpy pillow huggingface_hub
- name: Install s5cmd
working-directory: serving/docker
run: sudo scripts/install_s5cmd.sh x64
- name: ECR Auth
working-directory: tests/integration
env:
TEST_DJL_VERSION: ${{ inputs.djl-version }}
IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }}
IMAGE_REPO: ${{ inputs.image-repo }}
run: |
ECR_REGION=$(echo "$IMAGE_REPO" | awk -F. '{print $4}')
if [[ -n "$ECR_REGION" ]]; then
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin "$IMAGE_REPO"
fi
mkdir logs
- name: "Compute Image Uri"
id: compute-image-uri
env:
TEST_DJL_VERSION: ${{ inputs.djl-version }}
IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }}
IMAGE_REPO: ${{ inputs.image-repo }}
CONTAINER: "lmi"
run: |
DJL_VERSION=${TEST_DJL_VERSION:-"0.32.0"}
DJL_VERSION=$(echo $DJL_VERSION | xargs) # trim whitespace

if [ -n "$OVERRIDE_TEST_CONTAINER" ]; then
TEST_IMAGE_URI=$OVERRIDE_TEST_CONTAINER
echo "Warning: An override container has been specified - this container may not work for all tests, ensure you are only running tests compatible with the container" >&2
else
if [ -z "$IMAGE_REPO" ]; then
echo "Error: You must set the docker image repo via IMAGE_REPO environment variable. Ex: deepjavalibrary/djl-serving" >&2
exit 1
fi
CONTAINER_TAG="${DJL_VERSION}-${CONTAINER}"
if [ -n "$IMAGE_TAG_SUFFIX" ]; then
CONTAINER_TAG="${CONTAINER_TAG}-${IMAGE_TAG_SUFFIX}"
fi
TEST_IMAGE_URI="${IMAGE_REPO}:${CONTAINER_TAG}"
fi
echo "Computed image URI: $TEST_IMAGE_URI"
echo "TEST_IMAGE_URI=$TEST_IMAGE_URI" >> $GITHUB_OUTPUT

- name: "Model Optimization Step"
working-directory: tests/integration
run: |
echo ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI}}
# Prepare
sudo rm -rf models
python3 llm/prepare.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_model_config }}
./launch_container.sh ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI }} $PWD/models lmi sm_neo_context

- name: "Local Serving Test"
working-directory: tests/integration
run: |
# test inference
./launch_container.sh ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI }} $PWD/models/compiled lmi ${{ contains(matrix.test.test_model_config, 'multi-node') && 'multi_node' || '' }} serve
python3 llm/client.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_serve_config }}
# clean up
docker rm -f $(docker ps -aq) || true

- name: "Fast Model Loading S3 test"
if: ${{ matrix.test.include_fast_model_loading_s3_test == 'true' }}
env:
RUN_NUMBER: ${{ github.run_number }}
working-directory: tests/integration
run: |
aws s3 sync $PWD/models/compiled s3://djl-scratch-001-gamma-us-west-2/github-workflows/$RUN_NUMBER/${{ matrix.test.test_model_config }}-${{ matrix.test.instance }}-tp2
sudo find "$PWD/models/compiled/" -maxdepth 1 -type d -name "sagemaker-fast-model-loader-*" -exec sudo rm -rf {} +
echo "SM_FAST_MODEL_LOADER_S3_URI=s3://djl-scratch-001-gamma-us-west-2/github-workflows/$RUN_NUMBER/${{ matrix.test.test_model_config }}-${{ matrix.test.instance }}-tp2" >> $PWD/docker_env
# test inference
./launch_container.sh $DJL_CONTAINER_REPO:$DJLSERVING_DOCKER_TAG $PWD/models/compiled lmi serve
python3 llm/client.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_serve_config }}
# clean up
docker rm -f $(docker ps -aq) || true
sudo rm -rf $PWD/models
- name: On Failure
id: test-failure
if: ${{ failure() }}
working-directory: tests/integration
run: |
for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
sudo rm -rf outputs && sudo rm -rf models
rm awscurl
./remove_container.sh
failure_prefix="${{ matrix.test.failure-prefix }}"
echo "failure_${failure_prefix}=1" >> "$GITHUB_OUTPUT"
sudo rm -rf $PWD/models
- name: Upload test logs
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: test-${{ matrix.test.test }}-logs
path: tests/integration/all_logs/

stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-optimization-runners, neo-test]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_1 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_2 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_3 }}
./stop_instance.sh $instance_id
6 changes: 6 additions & 0 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,12 @@
"llama-3.1-8b": {
"option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
},
"llama-3.1-8b-multi-node-sharding": {
"option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
"option.tensor_parallel_degree": "2",
"option.pipeline_parallel_degree": "2",
"option.load_format": "sagemaker_fast_model_loader",
},
"llama-3.1-8b-awq-options": {
"option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
"option.tensor_parallel_degree": "4",
Expand Down
Loading