1+ name : Optimization Integration tests
2+
3+ on :
4+ workflow_dispatch :
5+ inputs :
6+ djl-version :
7+ description : ' The released version of DJL.'
8+ required : true
9+ default : ' '
10+ tag-suffix :
11+ description : ' Run tests on the specific tags suffix i.e. arch-{suffix}'
12+ required : false
13+ type : string
14+ default : ' nightly'
15+ image-repo :
16+ description : ' The repository to fetch images from'
17+ required : false
18+ type : string
19+ default : ' 185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp'
20+ workflow_call :
21+ inputs :
22+ djl-version :
23+ description : ' The released version of DJL.'
24+ required : false
25+ type : string
26+ default : ' '
27+ tag-suffix :
28+ description : ' Run tests on the specific tags suffix i.e. arch-{suffix}'
29+ required : false
30+ type : string
31+ default : ' nightly'
32+ image-repo :
33+ description : ' The repository to fetch images from'
34+ required : false
35+ type : string
36+ default : ' 185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp'
37+ outputs :
38+ failure_lmi :
39+ value : ${{ jobs.neo-test.outputs.failure_lmi || '0' }}
40+
41+ permissions :
42+ id-token : write
43+ contents : read
44+
45+ jobs :
46+ create-optimization-runners :
47+ runs-on : [self-hosted, scheduler]
48+ steps :
49+ - name : Create new G6 instance
50+ id : create_g6
51+ run : |
52+ cd /home/ubuntu/djl_benchmark_script/scripts
53+ token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
54+ https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
55+ --fail \
56+ | jq '.token' | tr -d '"' )
57+ ./start_instance.sh action_g6 $token djl-serving
58+ - name : Create new G6 instance
59+ id : create_g6_2
60+ run : |
61+ cd /home/ubuntu/djl_benchmark_script/scripts
62+ token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
63+ https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
64+ --fail \
65+ | jq '.token' | tr -d '"' )
66+ ./start_instance.sh action_g6 $token djl-serving
67+ - name : Create new P4D instance
68+ id : create_p4d
69+ run : |
70+ cd /home/ubuntu/djl_benchmark_script/scripts
71+ token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
72+ https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
73+ --fail \
74+ | jq '.token' | tr -d '"' )
75+ ./start_instance.sh action_lmic_p4d $token djl-serving
76+ outputs :
77+ gpu_instance_id_1 : ${{ steps.create_g6.outputs.action_g6_instance_id }}
78+ gpu_instance_id_2 : ${{ steps.create_g6_2.outputs.action_g6_instance_id }}
79+ gpu_instance_id_3 : ${{ steps.create_p4d.outputs.action_lmic_p4d_instance_id }}
80+
81+ neo-test :
82+ runs-on :
83+ - ${{ matrix.test.gh-runner && matrix.test.instance || 'self-hosted' }}
84+ - ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_ID-{0}', github.run_id) }}
85+ - ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_NUMBER-{0}', github.run_number) }}
86+ - ${{ matrix.test.gh-runner && matrix.test.instance || format('SHA-{0}', github.sha) }}
87+ - ${{ matrix.test.gh-runner && matrix.test.instance || format('JOB-{0}', 'create-optimization-runners') }}
88+ - ${{ matrix.test.instance }}
89+ timeout-minutes : 120
90+ needs : create-optimization-runners
91+ strategy :
92+ fail-fast : false
93+ matrix :
94+ test :
95+ - test : MultinodeSharding
96+ instance : g6
97+ test_handler : vllm_neo
98+ test_model_config : llama-3.1-8b-multi-node-sharding
99+ test_serve_config : llama-3.1-8b
100+ failure-prefix : lmi
101+ - test : BasicSharding-g6
102+ instance : g6
103+ test_handler : vllm_neo
104+ test_model_config : tiny-llama-fml
105+ test_serve_config : tiny-llama-fml
106+ include_fast_model_loading_s3_test : true
107+ failure-prefix : lmi
108+ - test : BasicSharding-p4d
109+ instance : p4d
110+ test_handler : vllm_neo
111+ test_model_config : tiny-llama-fml
112+ test_serve_config : tiny-llama-fml
113+ include_fast_model_loading_s3_test : true
114+ failure-prefix : lmi
115+ - test : LoraSharding-g6
116+ instance : g6
117+ test_handler : vllm_neo
118+ test_model_config : tiny-llama-lora-fml
119+ test_serve_config : tiny-llama-lora-fml
120+ include_fast_model_loading_s3_test : true
121+ failure-prefix : lmi
122+ - test : LoraSharding-p4d
123+ instance : p4d
124+ test_handler : vllm_neo
125+ test_model_config : tiny-llama-lora-fml
126+ test_serve_config : tiny-llama-lora-fml
127+ include_fast_model_loading_s3_test : true
128+ failure-prefix : lmi
129+ outputs :
130+ failure_lmi : ${{ steps.test-failure.outputs.failure_lmi }}
131+ steps :
132+ - name : Show environment
133+ run : |
134+ nvidia-smi -L
135+ - name : Clean env
136+ run : |
137+ sudo rm -rf tests/integration/models
138+ yes | docker system prune -a --volumes
139+ sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
140+ echo "wait dpkg lock..."
141+ while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
142+ - uses : actions/checkout@v4
143+ - name : Set up Python3
144+ uses : actions/setup-python@v5
145+ with :
146+ python-version : ' 3.10.x'
147+ - name : Install pip dependencies
148+ run : pip3 install requests numpy pillow huggingface_hub
149+ - name : Install s5cmd
150+ working-directory : serving/docker
151+ run : sudo scripts/install_s5cmd.sh x64
152+ - name : ECR Auth
153+ working-directory : tests/integration
154+ env :
155+ TEST_DJL_VERSION : ${{ inputs.djl-version }}
156+ IMAGE_TAG_SUFFIX : ${{ inputs.tag-suffix }}
157+ IMAGE_REPO : ${{ inputs.image-repo }}
158+ run : |
159+ ECR_REGION=$(echo "$IMAGE_REPO" | awk -F. '{print $4}')
160+ if [[ -n "$ECR_REGION" ]]; then
161+ aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin "$IMAGE_REPO"
162+ fi
163+ mkdir logs
164+ - name : " Compute Image Uri"
165+ id : compute-image-uri
166+ env :
167+ TEST_DJL_VERSION : ${{ inputs.djl-version }}
168+ IMAGE_TAG_SUFFIX : ${{ inputs.tag-suffix }}
169+ IMAGE_REPO : ${{ inputs.image-repo }}
170+ CONTAINER : " lmi"
171+ run : |
172+ DJL_VERSION=${TEST_DJL_VERSION:-"0.32.0"}
173+ DJL_VERSION=$(echo $DJL_VERSION | xargs) # trim whitespace
174+
175+ if [ -n "$OVERRIDE_TEST_CONTAINER" ]; then
176+ TEST_IMAGE_URI=$OVERRIDE_TEST_CONTAINER
177+ echo "Warning: An override container has been specified - this container may not work for all tests, ensure you are only running tests compatible with the container" >&2
178+ else
179+ if [ -z "$IMAGE_REPO" ]; then
180+ echo "Error: You must set the docker image repo via IMAGE_REPO environment variable. Ex: deepjavalibrary/djl-serving" >&2
181+ exit 1
182+ fi
183+ CONTAINER_TAG="${DJL_VERSION}-${CONTAINER}"
184+ if [ -n "$IMAGE_TAG_SUFFIX" ]; then
185+ CONTAINER_TAG="${CONTAINER_TAG}-${IMAGE_TAG_SUFFIX}"
186+ fi
187+ TEST_IMAGE_URI="${IMAGE_REPO}:${CONTAINER_TAG}"
188+ fi
189+ echo "Computed image URI: $TEST_IMAGE_URI"
190+ echo "TEST_IMAGE_URI=$TEST_IMAGE_URI" >> $GITHUB_OUTPUT
191+
192+ - name : " Model Optimization Step"
193+ working-directory : tests/integration
194+ run : |
195+ echo ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI}}
196+ # Prepare
197+ sudo rm -rf models
198+ python3 llm/prepare.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_model_config }}
199+ ./launch_container.sh ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI }} $PWD/models lmi sm_neo_context
200+
201+ - name : " Local Serving Test"
202+ working-directory : tests/integration
203+ run : |
204+ # test inference
205+ ./launch_container.sh ${{ steps.compute-image-uri.outputs.TEST_IMAGE_URI }} $PWD/models/compiled lmi ${{ contains(matrix.test.test_model_config, 'multi-node') && 'multi_node' || '' }} serve
206+ python3 llm/client.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_serve_config }}
207+ # clean up
208+ docker rm -f $(docker ps -aq) || true
209+
210+ - name : " Fast Model Loading S3 test"
211+ if : ${{ matrix.test.include_fast_model_loading_s3_test == 'true' }}
212+ env :
213+ RUN_NUMBER : ${{ github.run_number }}
214+ working-directory : tests/integration
215+ run : |
216+ aws s3 sync $PWD/models/compiled s3://djl-scratch-001-gamma-us-west-2/github-workflows/$RUN_NUMBER/${{ matrix.test.test_model_config }}-${{ matrix.test.instance }}-tp2
217+ sudo find "$PWD/models/compiled/" -maxdepth 1 -type d -name "sagemaker-fast-model-loader-*" -exec sudo rm -rf {} +
218+ echo "SM_FAST_MODEL_LOADER_S3_URI=s3://djl-scratch-001-gamma-us-west-2/github-workflows/$RUN_NUMBER/${{ matrix.test.test_model_config }}-${{ matrix.test.instance }}-tp2" >> $PWD/docker_env
219+ # test inference
220+ ./launch_container.sh $DJL_CONTAINER_REPO:$DJLSERVING_DOCKER_TAG $PWD/models/compiled lmi serve
221+ python3 llm/client.py ${{ matrix.test.test_handler }} ${{ matrix.test.test_serve_config }}
222+ # clean up
223+ docker rm -f $(docker ps -aq) || true
224+ sudo rm -rf $PWD/models
225+ - name : On Failure
226+ id : test-failure
227+ if : ${{ failure() }}
228+ working-directory : tests/integration
229+ run : |
230+ for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
231+ sudo rm -rf outputs && sudo rm -rf models
232+ rm awscurl
233+ ./remove_container.sh
234+ failure_prefix="${{ matrix.test.failure-prefix }}"
235+ echo "failure_${failure_prefix}=1" >> "$GITHUB_OUTPUT"
236+ sudo rm -rf $PWD/models
237+ - name : Upload test logs
238+ if : ${{ always() }}
239+ uses : actions/upload-artifact@v4
240+ with :
241+ name : test-${{ matrix.test.test }}-logs
242+ path : tests/integration/all_logs/
243+
244+ stop-runners :
245+ if : always()
246+ runs-on : [ self-hosted, scheduler ]
247+ needs : [ create-optimization-runners, neo-test]
248+ steps :
249+ - name : Stop all instances
250+ run : |
251+ cd /home/ubuntu/djl_benchmark_script/scripts
252+ instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_1 }}
253+ ./stop_instance.sh $instance_id
254+ instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_2 }}
255+ ./stop_instance.sh $instance_id
256+ instance_id=${{ needs.create-optimization-runners.outputs.gpu_instance_id_3 }}
257+ ./stop_instance.sh $instance_id
0 commit comments