Skip to content

Commit 35dbb5b

Browse files
authored
TF2.19 ARM64 currency (#4979)
1 parent f5834f0 commit 35dbb5b

File tree

4 files changed

+281
-7
lines changed

4 files changed

+281
-7
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
framework: &FRAMEWORK tensorflow
5+
version: &VERSION 2.19.0
6+
short_version: &SHORT_VERSION 2.19
7+
arch_type: arm64
8+
#autopatch_build: "True"
9+
10+
repository_info:
11+
inference_repository: &INFERENCE_REPOSITORY
12+
image_type: &INFERENCE_IMAGE_TYPE inference
13+
root: !join [ *FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
14+
repository_name: &REPOSITORY_NAME !join [pr, "-", *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE, "-", arm64]
15+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
16+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE, "-", arm64 ]
17+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/,
18+
*RELEASE_REPOSITORY_NAME ]
19+
20+
context:
21+
inference_context: &INFERENCE_CONTEXT
22+
start_cuda_compat:
23+
source: docker/build_artifacts/start_cuda_compat.sh
24+
target: start_cuda_compat.sh
25+
dockerd_entrypoint:
26+
source: docker/build_artifacts/dockerd_entrypoint.sh
27+
target: dockerd_entrypoint.sh
28+
init:
29+
source: docker/build_artifacts/__init__.py
30+
target: __init__.py
31+
dockerd-entrypoint:
32+
source: docker/build_artifacts/dockerd-entrypoint.py
33+
target: dockerd-entrypoint.py
34+
deep_learning_container:
35+
source: ../../src/deep_learning_container.py
36+
target: deep_learning_container.py
37+
sagemaker_package_name:
38+
source: docker/build_artifacts/sagemaker
39+
target: sagemaker
40+
41+
images:
42+
BuildSageMakerTensorflowArm64CPUInferencePy3DockerImage:
43+
<<: *INFERENCE_REPOSITORY
44+
build: &TENSORFLOW_CPU_INFERENCE_PY3 false
45+
image_size_baseline: 4899
46+
device_type: &DEVICE_TYPE cpu
47+
python_version: &DOCKER_PYTHON_VERSION py3
48+
tag_python_version: &TAG_PYTHON_VERSION py312
49+
os_version: &OS_VERSION ubuntu22.04
50+
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
51+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
52+
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile.arm64., *DEVICE_TYPE ]
53+
target: sagemaker
54+
# build_tag_override: "beta:2.18.0-cpu-py312-ubuntu22.04-sagemaker"
55+
context:
56+
<<: *INFERENCE_CONTEXT
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
buildspec_pointer: buildspec-arm64-2-18-sm.yml
1+
buildspec_pointer: buildspec-arm64-2-19-sm.yml
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
########################################################
2+
# _____ ____ ____ ___
3+
# | ____/ ___|___ \ |_ _|_ __ ___ __ _ __ _ ___
4+
# | _|| | __) | | || '_ ` _ \ / _` |/ _` |/ _ \
5+
# | |__| |___ / __/ | || | | | | | (_| | (_| | __/
6+
# |_____\____|_____| |___|_| |_| |_|\__,_|\__, |\___|
7+
# |___/
8+
# ____ _
9+
# | _ \ ___ ___(_)_ __ ___
10+
# | |_) / _ \/ __| | '_ \ / _ \
11+
# | _ < __/ (__| | |_) | __/
12+
# |_| \_\___|\___|_| .__/ \___|
13+
# |_|
14+
########################################################
15+
16+
FROM arm64v8/ubuntu:22.04 AS ec2
17+
18+
ENV DEBIAN_FRONTEND=noninteractive \
19+
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib"
20+
21+
LABEL maintainer="Amazon AI"
22+
LABEL dlc_major_version="1"
23+
24+
ARG PYTHON=python3.12
25+
ARG PYTHON_PIP=python3-pip
26+
ARG PIP=pip3
27+
ARG PYTHON_VERSION=3.12.10
28+
ARG TFS_API_VERSION=2.19.0
29+
ARG TFS_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/tensorflow_serving/r2.19_aws/cpu_arm/tensorflow_model_server
30+
ARG TFS_SHORT_VERSION=2.19
31+
32+
# ENV variable to be passed to SageMaker stage
33+
ENV PIP=${PIP}
34+
ENV PYTHON=${PYTHON}
35+
36+
# See http://bugs.python.org/issue19846
37+
ENV LANG=C.UTF-8
38+
# Python won’t try to write .pyc or .pyo files on the import of source modules
39+
ENV PYTHONDONTWRITEBYTECODE=1
40+
ENV PYTHONUNBUFFERED=1
41+
ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
42+
ENV MODEL_BASE_PATH=/models
43+
# The only required piece is the model name in order to differentiate endpoints
44+
ENV MODEL_NAME=model
45+
ENV DEBIAN_FRONTEND=noninteractive
46+
47+
RUN apt-get update \
48+
&& apt-get -y upgrade \
49+
&& apt-get -y install --no-install-recommends \
50+
curl \
51+
gnupg2 \
52+
ca-certificates \
53+
emacs \
54+
git \
55+
unzip \
56+
wget \
57+
vim \
58+
libbz2-dev \
59+
liblzma-dev \
60+
libffi-dev \
61+
build-essential \
62+
zlib1g-dev \
63+
openssl \
64+
libreadline-dev \
65+
libncursesw5-dev \
66+
libssl-dev \
67+
libsqlite3-dev \
68+
tk-dev \
69+
libgdbm-dev \
70+
libc6-dev \
71+
&& apt-get autoremove -y \
72+
&& apt-get clean \
73+
&& rm -rf /var/lib/apt/lists/*
74+
75+
# Install python3.12
76+
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
77+
&& tar -xvf Python-$PYTHON_VERSION.tgz \
78+
&& cd Python-$PYTHON_VERSION \
79+
&& ./configure && make && make install \
80+
&& rm -rf ../Python-$PYTHON_VERSION*
81+
82+
# Install libssl1.1 from Ubuntu 20.04 to satisfy CUDA dependencies
83+
#RUN wget http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb\
84+
# && dpkg -i libssl1.1_1.1.1f-1ubuntu2_arm64.deb \
85+
# && rm libssl1.1_1.1.1f-1ubuntu2_arm64.deb
86+
87+
RUN ${PIP} --no-cache-dir install --upgrade \
88+
pip \
89+
setuptools
90+
91+
# cython, falcon, gunicorn, grpc
92+
RUN ${PIP} install --no-cache-dir \
93+
"awscli<2" \
94+
boto3 \
95+
"cython<3.0" \
96+
gevent \
97+
requests \
98+
grpcio \
99+
"protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3" \
100+
packaging \
101+
# using --no-dependencies to avoid installing tensorflow binary
102+
&& ${PIP} install --no-dependencies --no-cache-dir \
103+
tensorflow-serving-api==${TFS_API_VERSION}
104+
105+
# Some TF tools expect a "python" binary
106+
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
107+
&& ln -s $(which ${PIP}) /usr/bin/pip
108+
109+
RUN cd tmp/ \
110+
&& rm -rf tmp*
111+
112+
RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \
113+
&& chmod 555 /usr/bin/tensorflow_model_server
114+
115+
# Expose ports
116+
# gRPC and REST
117+
EXPOSE 8500 8501
118+
119+
# Set where models should be stored in the container
120+
RUN mkdir -p ${MODEL_BASE_PATH}
121+
122+
ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
123+
124+
RUN chmod +x /usr/local/bin/deep_learning_container.py
125+
126+
COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
127+
128+
RUN chmod +x /usr/local/bin/bash_telemetry.sh
129+
130+
RUN echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc
131+
132+
# Create a script that runs the model server so we can use environment variables
133+
# while also passing in arguments from the docker command line
134+
RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
135+
&& echo 'bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true' > /usr/bin/tf_serving_entrypoint.sh \
136+
&& echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
137+
&& chmod +x /usr/bin/tf_serving_entrypoint.sh
138+
139+
RUN HOME_DIR=/root \
140+
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
141+
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
142+
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
143+
&& chmod +x /usr/local/bin/testOSSCompliance \
144+
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
145+
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
146+
&& cp ${HOME_DIR}/oss_compliance/build_from_source_packages/BUILD_FROM_SOURCE_PACKAGES_LICENCES_AARCH64_IMAGES ${HOME_DIR} \
147+
&& rm -rf ${HOME_DIR}/oss_compliance*
148+
149+
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-${TFS_SHORT_VERSION}/license.txt -o /license.txt
150+
151+
CMD ["/usr/bin/tf_serving_entrypoint.sh"]
152+
153+
#################################################################
154+
# ____ __ __ _
155+
# / ___| __ _ __ _ ___| \/ | __ _| | _____ _ __
156+
# \___ \ / _` |/ _` |/ _ \ |\/| |/ _` | |/ / _ \ '__|
157+
# ___) | (_| | (_| | __/ | | | (_| | < __/ |
158+
# |____/ \__,_|\__, |\___|_| |_|\__,_|_|\_\___|_|
159+
# |___/
160+
# ___ ____ _
161+
# |_ _|_ __ ___ __ _ __ _ ___ | _ \ ___ ___(_)_ __ ___
162+
# | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \
163+
# | || | | | | | (_| | (_| | __/ | _ < __/ (__| | |_) | __/
164+
# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___|
165+
# |___/ |_|
166+
#################################################################
167+
168+
FROM ec2 AS sagemaker
169+
170+
LABEL maintainer="Amazon AI"
171+
LABEL dlc_major_version="1"
172+
173+
# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
174+
# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
175+
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
176+
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
177+
178+
ARG TFS_SHORT_VERSION=2.19
179+
ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
180+
ENV PATH="$PATH:/sagemaker"
181+
182+
# nginx + njs
183+
RUN curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
184+
&& echo 'deb http://nginx.org/packages/ubuntu/ jammy nginx' >> /etc/apt/sources.list \
185+
&& apt-get update \
186+
&& apt-get -y install --no-install-recommends \
187+
nginx \
188+
nginx-module-njs \
189+
&& apt-get clean \
190+
&& rm -rf /var/lib/apt/lists/*
191+
192+
# pin gunicorn>=23.0.0 to address vul 72809
193+
RUN ${PIP} install --no-cache-dir \
194+
falcon==3.1.0 \
195+
"gunicorn>=23.0.0"
196+
197+
COPY ./sagemaker /sagemaker
198+
199+
# Expose ports
200+
# gRPC and REST
201+
EXPOSE 8500 8501
202+
203+
RUN HOME_DIR=/root \
204+
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
205+
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
206+
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
207+
&& chmod +x /usr/local/bin/testOSSCompliance \
208+
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
209+
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
210+
&& rm -rf ${HOME_DIR}/oss_compliance*
211+
212+
RUN rm -rf /tmp/*
213+
214+
# Removing cuobjdump and nvdisasm for CVE-2025-23280 CVE-2025-23248
215+
RUN rm -rf /usr/local/cuda/bin/cuobjdump*
216+
RUN rm -rf /usr/local/cuda/bin/nvdisasm*
217+
218+
CMD ["/usr/bin/tf_serving_entrypoint.sh"]

test/dlc_tests/ec2/test_telemetry.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def test_telemetry_instance_tag_failure_arm64_gpu(
7777
@pytest.mark.model("N/A")
7878
@pytest.mark.processor("cpu")
7979
@pytest.mark.integration("telemetry")
80-
@pytest.mark.parametrize("ec2_instance_type", ["c6g.4xlarge"], indirect=True)
80+
@pytest.mark.parametrize("ec2_instance_type", ["c6g.8xlarge"], indirect=True)
8181
@pytest.mark.parametrize(
8282
"ec2_instance_ami", [test_utils.AL2023_BASE_DLAMI_ARM64_US_WEST_2], indirect=True
8383
)
@@ -95,7 +95,7 @@ def test_telemetry_instance_tag_failure_graviton_cpu(
9595
@pytest.mark.model("N/A")
9696
@pytest.mark.processor("cpu")
9797
@pytest.mark.integration("telemetry")
98-
@pytest.mark.parametrize("ec2_instance_type", ["c6g.4xlarge"], indirect=True)
98+
@pytest.mark.parametrize("ec2_instance_type", ["c6g.8xlarge"], indirect=True)
9999
@pytest.mark.parametrize(
100100
"ec2_instance_ami", [test_utils.AL2023_BASE_DLAMI_ARM64_US_WEST_2], indirect=True
101101
)
@@ -203,7 +203,7 @@ def test_telemetry_instance_tag_success_arm64_gpu(
203203
@pytest.mark.model("N/A")
204204
@pytest.mark.processor("cpu")
205205
@pytest.mark.integration("telemetry")
206-
@pytest.mark.parametrize("ec2_instance_type", ["c6g.4xlarge"], indirect=True)
206+
@pytest.mark.parametrize("ec2_instance_type", ["c6g.8xlarge"], indirect=True)
207207
@pytest.mark.parametrize(
208208
"ec2_instance_ami", [test_utils.AL2023_BASE_DLAMI_ARM64_US_WEST_2], indirect=True
209209
)
@@ -220,7 +220,7 @@ def test_telemetry_instance_tag_success_graviton_cpu(
220220
@pytest.mark.model("N/A")
221221
@pytest.mark.processor("cpu")
222222
@pytest.mark.integration("telemetry")
223-
@pytest.mark.parametrize("ec2_instance_type", ["c6g.4xlarge"], indirect=True)
223+
@pytest.mark.parametrize("ec2_instance_type", ["c6g.8xlarge"], indirect=True)
224224
@pytest.mark.parametrize(
225225
"ec2_instance_ami", [test_utils.AL2023_BASE_DLAMI_ARM64_US_WEST_2], indirect=True
226226
)
@@ -325,7 +325,7 @@ def test_telemetry_s3_query_bucket_success_arm64_gpu(
325325
@pytest.mark.model("N/A")
326326
@pytest.mark.processor("cpu")
327327
@pytest.mark.integration("telemetry")
328-
@pytest.mark.parametrize("ec2_instance_type", ["c6g.4xlarge"], indirect=True)
328+
@pytest.mark.parametrize("ec2_instance_type", ["c6g.8xlarge"], indirect=True)
329329
@pytest.mark.parametrize(
330330
"ec2_instance_ami", [test_utils.AL2023_BASE_DLAMI_ARM64_US_WEST_2], indirect=True
331331
)
@@ -341,7 +341,7 @@ def test_telemetry_s3_query_bucket_success_graviton_cpu(
341341
@pytest.mark.model("N/A")
342342
@pytest.mark.processor("cpu")
343343
@pytest.mark.integration("telemetry")
344-
@pytest.mark.parametrize("ec2_instance_type", ["c6g.4xlarge"], indirect=True)
344+
@pytest.mark.parametrize("ec2_instance_type", ["c6g.8xlarge"], indirect=True)
345345
@pytest.mark.parametrize(
346346
"ec2_instance_ami", [test_utils.AL2023_BASE_DLAMI_ARM64_US_WEST_2], indirect=True
347347
)

0 commit comments

Comments
 (0)