Skip to content

Commit 6a76ee1

Browse files
authored
[ci] update docker build of temp images to properly build release images (#2739)
1 parent 461c3ca commit 6a76ee1

File tree

8 files changed

+131
-120
lines changed

8 files changed

+131
-120
lines changed

.github/workflows/docker-nightly-publish.yml

Lines changed: 25 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -114,55 +114,48 @@ jobs:
114114
with:
115115
path: ~/.gradle/caches
116116
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }}
117-
- name: Extract DJL and DJL Serving versions from TOML
117+
- name: Set env vars for container build and tag
118118
id: get-versions
119119
run: |
120120
DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)
121121
SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)
122122
echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV
123-
echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV
124123
echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_OUTPUT
125-
- name: Build release candidate docker image
126-
if: ${{ inputs.mode == 'release' }}
127-
working-directory: serving/docker
124+
echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV
125+
mode=${{ inputs.mode }}
126+
if [[ "$mode" != "release" ]]; then
127+
export NIGHTLY="-nightly"
128+
echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV
129+
./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot
130+
export BUILD_VERSION_ARG_DJL="${DJL_VERSION}-SNAPSHOT"
131+
export BUILD_VERSION_ARG_SERVING="${SERVING_VERSION}-SNAPSHOT"
132+
else
133+
rm -rf serving/docker/distributions
134+
export BUILD_VERSION_ARG_DJL="${DJL_VERSION}-SNAPSHOT"
135+
export BUILD_VERSION_ARG_SERVING="${SERVING_VERSION}-SNAPSHOT"
136+
fi
137+
echo "BUILD_VERSION_ARG_DJL=$BUILD_VERSION_ARG_DJL" >> $GITHUB_ENV
138+
echo "BUILD_VERSION_ARG_SERVING=$BUILD_VERSION_ARG_SERVING" >> $GITHUB_ENV
139+
- name: Build docker image
128140
run: |
129-
export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}"
130141
export RELEASE_VERSION="${{ env.SERVING_VERSION }}-"
131-
docker compose build --no-cache \
132-
--build-arg djl_version=${{ env.DJL_VERSION }} \
133-
--build-arg djl_serving_version=${{ env.SERVING_VERSION }} \
134-
${{ matrix.arch }}
135-
- name: Build temp docker image
136-
if: ${{ inputs.mode == '' || inputs.mode == 'temp' || inputs.mode == 'nightly' }}
137-
run: |
138-
./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot
139142
cd serving/docker
140-
export NIGHTLY="-nightly"
141-
echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV
142143
docker compose build --no-cache \
143-
--build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \
144-
--build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \
144+
--build-arg djl_version=${{ env.BUILD_VERSION_ARG_DJL }} \
145+
--build-arg djl_serving_version=${{ env.BUILD_VERSION_ARG_SERVING }} \
145146
${{ matrix.arch }}
146147
- name: Tag and push temp image to ECR repo
147148
working-directory: serving/docker
148149
run: |
149150
ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}')
150151
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}}
151-
mode=${{ inputs.mode }}
152-
if [[ "${{ inputs.mode }}" == "release" ]]; then
153-
mode=${{ env.DJL_VERSION }}
154-
fi
155-
if [[ -z "${{ inputs.mode }}" ]]; then
156-
mode="nightly"
157-
fi
158-
tempRunIdTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-$mode-${GITHUB_RUN_ID}"
159-
tempCommitTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-$mode-${GITHUB_SHA}"
152+
baseDockerTag="${{ env.AWS_ECR_REPO }}:${{ env.SERVING_VERSION}}-${{ matrix.arch }}${{ env.NIGHTLY }}"
153+
tempRunIdTag="${baseDockerTag}-${GITHUB_RUN_ID}"
154+
tempCommitTag="${baseDockerTag}-${GITHUB_SHA}"
160155
161-
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempRunIdTag
162-
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempCommitTag
163-
if [[ "$mode" == "nightly" ]]; then
164-
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} ${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-nightly
165-
fi
156+
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ env.SERVING_VERSION}}-${{ matrix.arch }}${{ env.NIGHTLY }} $baseDockerTag
157+
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ env.SERVING_VERSION}}-${{ matrix.arch }}${{ env.NIGHTLY }} $tempRunIdTag
158+
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ env.SERVING_VERSION}}-${{ matrix.arch }}${{ env.NIGHTLY }} $tempCommitTag
166159
time docker push --all-tags ${{ env.AWS_ECR_REPO }}
167160
168161
stop-runners:

.github/workflows/docker_publish.yml

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -48,26 +48,50 @@ env:
4848
ECR_REPO_REGION: "us-east-1"
4949

5050
jobs:
51-
create-aarch64-runner:
51+
create-runners:
5252
runs-on: [ self-hosted, scheduler ]
5353
steps:
54-
- name: Create new Graviton instance
55-
id: create_aarch64
54+
- name: Create new CPU instance
55+
id: create_cpu_1
5656
run: |
5757
cd /home/ubuntu/djl_benchmark_script/scripts
5858
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
5959
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
6060
--fail \
6161
| jq '.token' | tr -d '"' )
62-
./start_instance.sh action_graviton $token djl-serving
62+
./start_instance.sh action_cpu $token djl-serving
63+
- name: Create new CPU instance
64+
id: create_cpu_2
65+
run: |
66+
cd /home/ubuntu/djl_benchmark_script/scripts
67+
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
68+
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
69+
--fail \
70+
| jq '.token' | tr -d '"' )
71+
./start_instance.sh action_cpu $token djl-serving
72+
- name: Create new CPU instance
73+
id: create_cpu_3
74+
run: |
75+
cd /home/ubuntu/djl_benchmark_script/scripts
76+
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
77+
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
78+
--fail \
79+
| jq '.token' | tr -d '"' )
80+
./start_instance.sh action_cpu $token djl-serving
6381
outputs:
64-
aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }}
65-
82+
cpu_instance_id1: ${{ steps.create_cpu_1.outputs.action_cpu_instance_id }}
83+
cpu_instance_id2: ${{ steps.create_cpu_2.outputs.action_cpu_instance_id }}
84+
cpu_instance_id3: ${{ steps.create_cpu_3.outputs.action_cpu_instance_id }}
6685

6786
docker-sync:
68-
runs-on: ${{ contains( '[tensorrt-llm]', matrix.arch) && needs.create-aarch64-runner.outputs.aarch64_instance_id || 'ubuntu-latest' }}
87+
runs-on:
88+
- self-hosted
89+
- cpu
90+
- RUN_ID-${{ github.run_id }}
91+
- RUN_NUMBER-${{ github.run_number }}
92+
- SHA-${{ github.sha }}
6993
timeout-minutes: 60
70-
needs: [create-aarch64-runner]
94+
needs: [create-runners]
7195
strategy:
7296
fail-fast: false
7397
matrix:
@@ -112,23 +136,21 @@ jobs:
112136
working-directory: serving/docker
113137
run: |
114138
./scripts/push_image_from_ECR.sh $DJL_VERSION $AWS_STAGING_ECR_REPO ${{ inputs.mode }} ${{ matrix.arch }} ${{ inputs.commit_sha }}
115-
- name: Retag image for release latest
116-
if: ${{ inputs.mode == 'release' }}
117-
working-directory: serving/docker
118-
run: |
119-
docker tag deepjavalibrary/djl-serving:${DJL_VERSION} deepjavalibrary/djl-serving:latest
120-
docker push deepjavalibrary/djl-serving:latest
121139
- name: Clean docker env
122140
working-directory: serving/docker
123141
run: |
124142
yes | docker system prune -a --volumes
125-
stop-aarch64-runner:
143+
stop-runners:
126144
if: always()
127145
runs-on: [ self-hosted, scheduler ]
128-
needs: [docker-sync, create-aarch64-runner]
146+
needs: [docker-sync, create-runners]
129147
steps:
130148
- name: Stop all instances
131149
run: |
132150
cd /home/ubuntu/djl_benchmark_script/scripts
133-
instance_id=${{ needs.create-aarch64-runner.outputs.aarch64_instance_id }}
151+
instance_id=${{ needs.create-runners.outputs.cpu_instance_id1 }}
152+
./stop_instance.sh $instance_id
153+
instance_id=${{ needs.create-runners.outputs.cpu_instance_id2 }}
154+
./stop_instance.sh $instance_id
155+
instance_id=${{ needs.create-runners.outputs.cpu_instance_id3 }}
134156
./stop_instance.sh $instance_id

.github/workflows/integration.yml

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,35 @@ on:
55
inputs:
66
djl-version:
77
description: 'The released version of DJL.'
8-
required: false
8+
required: true
99
default: ''
1010
tag-suffix:
1111
description: 'Run tests on the specific tags suffix i.e. arch-{suffix}'
1212
required: false
1313
type: string
1414
default: 'nightly'
15+
image-repo:
16+
description: 'The repository to fetch images from'
17+
required: false
18+
type: string
19+
default: '185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp'
1520
workflow_call:
1621
inputs:
1722
djl-version:
1823
description: 'The released version of DJL.'
1924
required: false
2025
type: string
21-
default: 'nightly'
26+
default: ''
2227
tag-suffix:
2328
description: 'Run tests on the specific tags suffix i.e. arch-{suffix}'
2429
required: false
2530
type: string
26-
default: ''
31+
default: 'nightly'
32+
image-repo:
33+
description: 'The repository to fetch images from'
34+
required: false
35+
type: string
36+
default: '185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp'
2737
outputs:
2838
failure_cpu:
2939
value: ${{ jobs.test.outputs.failure_cpu || '0' }}
@@ -42,9 +52,6 @@ permissions:
4252
id-token: write
4353
contents: read
4454

45-
env:
46-
AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp"
47-
4855
jobs:
4956
create-runners:
5057
runs-on: [self-hosted, scheduler]
@@ -244,7 +251,6 @@ jobs:
244251
chmod +x awscurl
245252
mkdir outputs
246253
- name: Configure AWS Credentials
247-
if: matrix.test.instance == 'ubuntu-latest'
248254
uses: aws-actions/configure-aws-credentials@v4
249255
with:
250256
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
@@ -253,11 +259,13 @@ jobs:
253259
working-directory: tests/integration
254260
env:
255261
TEST_DJL_VERSION: ${{ inputs.djl-version }}
256-
OVERRIDE_IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }}
257-
IMAGE_REPO: ${{ env.AWS_ECR_REPO }}
262+
IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }}
263+
IMAGE_REPO: ${{ inputs.image-repo }}
258264
run: |
259-
ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}')
260-
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}}
265+
ECR_REGION=$(echo "$IMAGE_REPO" | awk -F. '{print $4}')
266+
if [[ -n "$ECR_REGION" ]]; then
267+
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin "$IMAGE_REPO"
268+
fi
261269
python -m pytest -s -k ${{ matrix.test.test }} tests.py
262270
- name: Cleanup
263271
working-directory: tests/integration
@@ -314,16 +322,16 @@ jobs:
314322
aws-region: us-east-1
315323
- name: Download models and dockers
316324
run: |
317-
if [ "${{ github.event.inputs.djl-version }}" == "temp" ]; then
318-
DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-temp-${GITHUB_SHA}"
319-
elif [ -n "${{ inputs.tag-suffix }}" ]; then
320-
DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-${{ inputs.tag-suffix }}"
325+
if [ -n "${{ inputs.tag-suffix }}" ]; then
326+
DOCKER_IMAGE_URI="${{ inputs.image-repo }}:${{ inputs.djl-version }}-pytorch-inf2-${{ inputs.tag-suffix }}"
321327
else
322-
DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-nightly"
328+
DOCKER_IMAGE_URI="${{ inputs.image-repo }}:${{ inputs.djl-version }}-pytorch-inf2"
323329
fi
324330
echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >>$GITHUB_ENV
325-
ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}')
326-
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}}
331+
ECR_REGION=$(echo "${{ inputs.image-repo }}" | awk -F. '{print $4}')
332+
if [[ -n "$ECR_REGION" ]]; then
333+
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{ inputs.image-repo }}
334+
fi
327335
echo $DOCKER_IMAGE_URI
328336
docker pull $DOCKER_IMAGE_URI
329337
- name: Run djl_python unit/integration tests on container

.github/workflows/nightly.yml

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,29 +32,13 @@ jobs:
3232
secrets: inherit
3333
with:
3434
mode: ${{ inputs.mode || 'nightly' }}
35-
get_image_tag_suffix:
36-
outputs:
37-
test_image_tag_suffix: ${{ steps.get_image_tag_suffix.test_image_tag_suffix }}
38-
needs: [build]
39-
runs-on: ubuntu-latest
40-
steps:
41-
- name: get_image_tag_suffix
42-
id: get_image_tag_suffix
43-
run: |
44-
if [[ "${{ inputs.mode || 'nightly' }}" == "nightly" ]]; then
45-
echo "test_image_tag_suffix=nightly" >> $GITHUB_OUTPUT
46-
elif [[ "${{ inputs.mode }}" == "release" ]]; then
47-
echo "test_image_tag_suffix=${{ needs.build.outputs.djl_version }}-${GITHUB_RUN_ID}" >> $GITHUB_OUTPUT
48-
else
49-
echo "Invalid mode specified"
50-
exit 1
51-
fi
5235
integration-test:
53-
needs: [get_image_tag_suffix]
36+
needs: [build]
5437
uses: ./.github/workflows/integration.yml
5538
secrets: inherit
5639
with:
57-
tag-suffix: ${{ needs.get_image_tag_suffix.outputs.test_image_tag_suffix }}
40+
djl-version: ${{ needs.build.outputs.djl_version }}
41+
tag-suffix: ${{ inputs.mode == 'nightly' && format('{0}-{1}', 'nightly', github.sha) || github.sha }}
5842
determine_images_to_publish:
5943
if: always()
6044
needs: [ integration-test ]

serving/docker/docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ services:
55
context: .
66
target: base
77
dockerfile: Dockerfile
8-
image: "deepjavalibrary/djl-serving:${BASE_RELEASE_VERSION:-cpu}${NIGHTLY}"
8+
image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}cpu${NIGHTLY}"
99
cpu-full:
1010
build:
1111
context: .

serving/docker/partition/partition.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,9 @@ def run_partition(self) -> str:
199199
self.cleanup()
200200
return ''.join(partition_stdout)
201201
else:
202-
logging.error(
203-
f"Partitioning was not successful.")
202+
logging.error(f"Partitioning was not successful.")
204203
raise Exception(
205-
f"Partitioning exited with return code: {proc.returncode}."
206-
)
204+
f"Partitioning exited with return code: {proc.returncode}.")
207205

208206
def run_quantization(self):
209207
quant_method = self.properties['option.quantize']

serving/docker/scripts/push_image_from_ECR.sh

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,22 @@ from_repo=$AWS_TMP_ECR_REPO
2424

2525
set -x
2626

27-
if [[ "$mode" == "release" ]]; then
28-
if [[ "$image" == "cpu" ]]; then
29-
tag=$version
30-
else
31-
tag="$version-$image"
32-
fi
33-
fi
27+
base_tag="$version-$image"
3428

3529
if [[ "$mode" == "nightly" ]]; then
36-
tag="$image-nightly"
30+
base_tag="$base_tag-nightly"
31+
fi
32+
docker pull "$from_repo:$base_tag-$commit_sha"
33+
docker tag "$from_repo:$base_tag-$commit_sha" "$to_repo:$base_tag"
34+
docker push "$to_repo:$base_tag"
35+
if [[ "$image" == "cpu" ]]; then
36+
if [[ "$mode" == "release" ]]; then
37+
docker tag "$from_repo:$base_tag-$commit_sha" "$to_repo:$version"
38+
docker push "$to_repo:$version"
39+
docker tag "$from_repo:$base_tag-$commit_sha" "$to_repo:latest"
40+
docker push "$to_repo:latest"
41+
elif [[ "$mode" == "nightly" ]]; then
42+
docker tag "$from_repo:$base_tag-$commit_sha" "$to_repo:$version-nightly"
43+
docker push "$to_repo:$version-nightly"
44+
fi
3745
fi
38-
docker pull $from_repo:$image-$mode-$commit_sha
39-
docker tag $from_repo:$image-$mode-$commit_sha $to_repo:$tag
40-
docker push $to_repo:$tag

0 commit comments

Comments
 (0)