Skip to content

Commit dcaa187

Browse files
HappyAmazoniansiddvenk
authored andcommitted
[IntegTest]add sharding optimization integ tests (deepjavalibrary#2742)
1 parent cb412de commit dcaa187

File tree

4 files changed

+278
-3
lines changed

4 files changed

+278
-3
lines changed

.github/workflows/integration.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ jobs:
135135
- ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_ID-{0}', github.run_id) }}
136136
- ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_NUMBER-{0}', github.run_number) }}
137137
- ${{ matrix.test.gh-runner && matrix.test.instance || format('SHA-{0}', github.sha) }}
138+
- ${{ matrix.test.gh-runner && matrix.test.instance || format('JOB-{0}', 'create-runners') }}
138139
- ${{ matrix.test.instance }}
139140
timeout-minutes: 120
140141
needs: create-runners

.github/workflows/nightly.yml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,19 @@ jobs:
3939
with:
4040
djl-version: ${{ needs.build.outputs.djl_version }}
4141
tag-suffix: ${{ inputs.mode == 'nightly' && format('{0}-{1}', 'nightly', github.sha) || github.sha }}
42+
43+
optimization-integration-test:
44+
needs: [build]
45+
uses: ./.github/workflows/optimization_integration.yml
46+
secrets: inherit
47+
with:
48+
djl-version: ${{ needs.build.outputs.djl_version }}
49+
tag-suffix: ${{ inputs.mode == 'nightly' && format('{0}-{1}', 'nightly', github.sha) || github.sha }}
50+
51+
4252
determine_images_to_publish:
4353
if: always()
44-
needs: [ integration-test ]
54+
needs: [ integration-test, optimization-integration-test ]
4555
runs-on: ubuntu-latest
4656
outputs:
4757
images: ${{ steps.generate-images.outputs.images }}
@@ -60,9 +70,10 @@ jobs:
6070
if [[ "${{ needs.integration-test.outputs.failure_aarch64 }}" == "0" ]]; then
6171
images+=("aarch64")
6272
fi
63-
if [[ "${{ needs.integration-test.outputs.failure_lmi }}" == "0" ]]; then
73+
if [[ "${{ needs.integration-test.outputs.failure_lmi }}" == "0" &&
74+
"${{ needs.optimization-integration-test.outputs.failure_lmi }}" == "0" ]]; then
6475
images+=("lmi")
65-
fi
76+
fi
6677
if [[ "${{ needs.integration-test.outputs.failure_trtllm }}" == "0" ]]; then
6778
images+=("tensorrt-llm")
6879
fi
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
name: Optimization Integration tests
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
djl-version:
7+
description: 'The released version of DJL.'
8+
required: true
9+
default: ''
10+
tag-suffix:
11+
description: 'Run tests on the specific tags suffix i.e. arch-{suffix}'
12+
required: false
13+
type: string
14+
default: 'nightly'
15+
image-repo:
16+
description: 'The repository to fetch images from'
17+
required: false
18+
type: string
19+
default: '185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp'
20+
workflow_call:
21+
inputs:
22+
djl-version:
23+
description: 'The released version of DJL.'
24+
required: false
25+
type: string
26+
default: ''
27+
tag-suffix:
28+
description: 'Run tests on the specific tags suffix i.e. arch-{suffix}'
29+
required: false
30+
type: string
31+
default: 'nightly'
32+
image-repo:
33+
description: 'The repository to fetch images from'
34+
required: false
35+
type: string
36+
default: '185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp'
37+
outputs:
38+
failure_lmi:
39+
value: ${{ jobs.neo-test.outputs.failure_lmi || '0' }}
40+
41+
permissions:
42+
id-token: write
43+
contents: read
44+
45+
jobs:
46+
create-optimization-runners:
47+
runs-on: [self-hosted, scheduler]
48+
steps:
49+
- name: Create new G6 instance
50+
id: create_g6
51+
run: |
52+
cd /home/ubuntu/djl_benchmark_script/scripts
53+
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
54+
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
55+
--fail \
56+
| jq '.token' | tr -d '"' )
57+
./start_instance.sh action_g6 $token djl-serving
58+
- name: Create new G6 instance
59+
id: create_g6_2
60+
run: |
61+
cd /home/ubuntu/djl_benchmark_script/scripts
62+
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
63+
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
64+
--fail \
65+
| jq '.token' | tr -d '"' )
66+
./start_instance.sh action_g6 $token djl-serving
67+
- name: Create new P4D instance
68+
id: create_p4d
69+
run: |
70+
cd /home/ubuntu/djl_benchmark_script/scripts
71+
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
72+
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
73+
--fail \
74+
| jq '.token' | tr -d '"' )
75+
./start_instance.sh action_lmic_p4d $token djl-serving
76+
outputs:
77+
gpu_instance_id_1: ${{ steps.create_g6.outputs.action_g6_instance_id }}
78+
gpu_instance_id_2: ${{ steps.create_g6_2.outputs.action_g6_instance_id }}
79+
gpu_instance_id_3: ${{ steps.create_p4d.outputs.action_lmic_p4d_instance_id }}
80+
81+
neo-test:
82+
runs-on:
83+
- ${{ matrix.test.gh-runner && matrix.test.instance || 'self-hosted' }}
84+
- ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_ID-{0}', github.run_id) }}
85+
- ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_NUMBER-{0}', github.run_number) }}
86+
- ${{ matrix.test.gh-runner && matrix.test.instance || format('SHA-{0}', github.sha) }}
87+
- ${{ matrix.test.gh-runner && matrix.test.instance || format('JOB-{0}', 'create-optimization-runners') }}
88+
- ${{ matrix.test.instance }}
89+
timeout-minutes: 120
90+
needs: create-optimization-runners
91+
strategy:
92+
fail-fast: false
93+
matrix:
94+
test:
95+
- test: MultinodeSharding
96+
instance: g6
97+
test_handler: vllm_neo
98+
test_model_config: llama-3.1-8b-multi-node-sharding
99+
test_serve_config: llama-3.1-8b
100+
failure-prefix: lmi
101+
- test: BasicSharding-g6
102+
instance: g6
103+
test_handler: vllm_neo
104+
test_model_config: tiny-llama-fml
105+
test_serve_config: tiny-llama-fml
106+
include_fast_model_loading_s3_test: true
107+
failure-prefix: lmi
108+
- test: BasicSharding-p4d
109+
instance: p4d
110+
test_handler: vllm_neo
111+
test_model_config: tiny-llama-fml
112+
test_serve_config: tiny-llama-fml
113+
include_fast_model_loading_s3_test: true
114+
failure-prefix: lmi
115+
- test: LoraSharding-g6
116+
instance: g6
117+
test_handler: vllm_neo
118+
test_model_config: tiny-llama-lora-fml
119+
test_serve_config: tiny-llama-lora-fml
120+
include_fast_model_loading_s3_test: true
121+
failure-prefix: lmi
122+
- test: LoraSharding-p4d
123+
instance: p4d
124+
test_handler: vllm_neo
125+
test_model_config: tiny-llama-lora-fml
126+
test_serve_config: tiny-llama-lora-fml
127+
include_fast_model_loading_s3_test: true
128+
failure-prefix: lmi
129+
outputs:
130+
failure_lmi: ${{ steps.test-failure.outputs.failure_lmi }}
131+
steps:
132+
- name: Show environment
133+
run: |
134+
nvidia-smi -L
135+
- name: Clean env
136+
run: |
137+
sudo rm -rf tests/integration/models
138+
yes | docker system prune -a --volumes
139+
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
140+
echo "wait dpkg lock..."
141+
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
142+
- uses: actions/checkout@v4
143+
- name: Set up Python3
144+
uses: actions/setup-python@v5
145+
with:
146+
python-version: '3.10.x'
147+
- name: Install pip dependencies
148+
run: pip3 install requests numpy pillow huggingface_hub
149+
- name: Install s5cmd
150+
working-directory: serving/docker
151+
run: sudo scripts/install_s5cmd.sh x64
152+
- name: ECR Auth
153+
working-directory: tests/integration
154+
env:
155+
TEST_DJL_VERSION: ${{ inputs.djl-version }}
156+
IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }}
157+
IMAGE_REPO: ${{ inputs.image-repo }}
158+
run: |
159+
ECR_REGION=$(echo "$IMAGE_REPO" | awk -F. '{print $4}')
160+
if [[ -n "$ECR_REGION" ]]; then
161+
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin "$IMAGE_REPO"
162+
fi
163+
mkdir logs
164+
- name: "Compute Image Uri"
165+
id: compute-image-uri
166+
env:
167+
TEST_DJL_VERSION: ${{ inputs.djl-version }}
168+
IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }}
169+
IMAGE_REPO: ${{ inputs.image-repo }}
170+
CONTAINER: "lmi"
171+
run: |
172+
DJL_VERSION=${TEST_DJL_VERSION:-"0.32.0"}
173+
DJL_VERSION=$(echo $DJL_VERSION | xargs) # trim whitespace
174+
175+
if [ -n "$OVERRIDE_TEST_CONTAINER" ]; then
176+
TEST_IMAGE_URI=$OVERRIDE_TEST_CONTAINER
177+
echo "Warning: An override container has been specified - this container may not work for all tests, ensure you are only running tests compatible with the container" >&2
178+
else
179+
if [ -z "$IMAGE_REPO" ]; then
180+
echo "Error: You must set the docker image repo via IMAGE_REPO environment variable. Ex: deepjavalibrary/djl-serving" >&2
181+
exit 1
182+
fi
183+
CONTAINER_TAG="${DJL_VERSION}-${CONTAINER}"
184+
if [ -n "$IMAGE_TAG_SUFFIX" ]; then
185+
CONTAINER_TAG="${CONTAINER_TAG}-${IMAGE_TAG_SUFFIX}"
186+
fi
187+
TEST_IMAGE_URI="${IMAGE_REPO}:${CONTAINER_TAG}"
188+
fi
189+
echo "Computed image URI: $TEST_IMAGE_URI"
190+
echo "TEST_IMAGE_URI=$TEST_IMAGE_URI" >> $GITHUB_OUTPUT
191+
192+
- name: "Model Optimization Step"
193+
working-directory: tests/integration
194+
run: |
195+
echo ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI}}
196+
# Prepare
197+
sudo rm -rf models
198+
python3 llm/prepare.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_model_config }}
199+
./launch_container.sh ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI }} $PWD/models lmi sm_neo_context
200+
201+
- name: "Local Serving Test"
202+
working-directory: tests/integration
203+
run: |
204+
# test inference
205+
./launch_container.sh ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI }} $PWD/models/compiled lmi ${{ contains(matrix.test.test_model_config, 'multi-node') && 'multi_node' || '' }} serve
206+
python3 llm/client.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_serve_config }}
207+
# clean up
208+
docker rm -f $(docker ps -aq) || true
209+
210+
- name: "Fast Model Loading S3 test"
211+
if: ${{ matrix.test.include_fast_model_loading_s3_test == 'true' }}
212+
env:
213+
RUN_NUMBER: ${{ github.run_number }}
214+
working-directory: tests/integration
215+
run: |
216+
aws s3 sync $PWD/models/compiled s3://djl-scratch-001-gamma-us-west-2/github-workflows/$RUN_NUMBER/${{ matrix.test.test_model_config }}-${{ matrix.test.instance }}-tp2
217+
sudo find "$PWD/models/compiled/" -maxdepth 1 -type d -name "sagemaker-fast-model-loader-*" -exec sudo rm -rf {} +
218+
echo "SM_FAST_MODEL_LOADER_S3_URI=s3://djl-scratch-001-gamma-us-west-2/github-workflows/$RUN_NUMBER/${{ matrix.test.test_model_config }}-${{ matrix.test.instance }}-tp2" >> $PWD/docker_env
219+
# test inference
220+
./launch_container.sh $DJL_CONTAINER_REPO:$DJLSERVING_DOCKER_TAG $PWD/models/compiled lmi serve
221+
python3 llm/client.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_serve_config }}
222+
# clean up
223+
docker rm -f $(docker ps -aq) || true
224+
sudo rm -rf $PWD/models
225+
- name: On Failure
226+
id: test-failure
227+
if: ${{ failure() }}
228+
working-directory: tests/integration
229+
run: |
230+
for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
231+
sudo rm -rf outputs && sudo rm -rf models
232+
rm awscurl
233+
./remove_container.sh
234+
failure_prefix="${{ matrix.test.failure-prefix }}"
235+
echo "failure_${failure_prefix}=1" >> "$GITHUB_OUTPUT"
236+
sudo rm -rf $PWD/models
237+
- name: Upload test logs
238+
if: ${{ always() }}
239+
uses: actions/upload-artifact@v4
240+
with:
241+
name: test-${{ matrix.test.test }}-logs
242+
path: tests/integration/all_logs/
243+
244+
stop-runners:
245+
if: always()
246+
runs-on: [ self-hosted, scheduler ]
247+
needs: [ create-optimization-runners, neo-test]
248+
steps:
249+
- name: Stop all instances
250+
run: |
251+
cd /home/ubuntu/djl_benchmark_script/scripts
252+
instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_1 }}
253+
./stop_instance.sh $instance_id
254+
instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_2 }}
255+
./stop_instance.sh $instance_id
256+
instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_3 }}
257+
./stop_instance.sh $instance_id

tests/integration/llm/prepare.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,12 @@
11141114
"llama-3.1-8b": {
11151115
"option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
11161116
},
1117+
"llama-3.1-8b-multi-node-sharding": {
1118+
"option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
1119+
"option.tensor_parallel_degree": "2",
1120+
"option.pipeline_parallel_degree": "2",
1121+
"option.load_format": "sagemaker_fast_model_loader",
1122+
},
11171123
"llama-3.1-8b-awq-options": {
11181124
"option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
11191125
"option.tensor_parallel_degree": "4",

0 commit comments

Comments
 (0)