Skip to content

Commit 210fa3e

Browse files
Make model-engine FIPS compliant by updating base chainguard image (#724)
* Update Dockerfile * Update circleci config to login to chainguard * fix typo in circleci config * Add code to debug circleci errors * Debug missing chainguard token * Debug failing oidc token swap * Update config * Retry OIDC token swap with updated chainguard identity * Update audience for token exchange request * Simplify chainguard authentication with chainctl * Specify audience cgr.dev in auth login * Update system packages in Dockerfile * Update Dockerfile packages for chainguard compatbility * update Dockerfile * Revert circleci python version to 3.10.14 * Update hardcoded model-engine image tag used in integration tests * Fix CircleCI config trying to use hardcoded model-engine image tag for batch jobs pod * Mount service_config_circleci.yaml in batch job pods * Fix broken helm template * Add missing infra config and service template config to batch job pods * remove redundant config for batch job pods * enable SHA256 checksums for Celery S3 backend to avoid MD5 decoding issues when creating endpoints * Fix failing md5 monkey patch * bump sqlalchemy to 2.0.21 to address md5 FIPS compliance * Fix black linting errors * wrap Dockerfile layers between root and nonroot user * Remove the federal/ directory since Dockerfile is now FIPS compliant and doesn't require monkey patching * set celery_enable_sha256 to true in all configs for FIPS compliance * make changes backwards compatible by having separate Dockerfiles * formatting
1 parent cbccd65 commit 210fa3e

File tree

10 files changed

+99
-39
lines changed

10 files changed

+99
-39
lines changed

.circleci/config.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ jobs:
105105
executor: ubuntu-large
106106
steps:
107107
- checkout
108+
- chainguard_login
108109
- run:
109110
name: Build Docker Image
110111
command: |
@@ -116,6 +117,7 @@ jobs:
116117
- aws-cli/setup:
117118
role-arn: ${CIRCLECI_ROLE_ARN}
118119
aws-region: AWS_REGION
120+
- chainguard_login
119121
- run:
120122
name: Build Docker Image
121123
command: |
@@ -156,7 +158,11 @@ jobs:
156158
- run:
157159
name: Pre-load model-engine image to minikube
158160
command: |
161+
# Load the base image for gateway/init containers
159162
minikube --logtostderr -v 1 image load model-engine:$CIRCLE_SHA1
163+
# Tag and load with ECR prefix for batch job containers
164+
docker tag model-engine:$CIRCLE_SHA1 $CIRCLECI_AWS_ACCOUNT_ID.dkr.ecr.us-west-2.amazonaws.com/model-engine:$CIRCLE_SHA1
165+
minikube --logtostderr -v 1 image load $CIRCLECI_AWS_ACCOUNT_ID.dkr.ecr.us-west-2.amazonaws.com/model-engine:$CIRCLE_SHA1
160166
- run:
161167
name: Pre-load integration test images to minikube
162168
command: |
@@ -209,6 +215,20 @@ executors:
209215
resource_class: 2xlarge
210216

211217
commands:
218+
chainguard_login:
219+
description: Authenticate to Chainguard Registry via OIDC
220+
steps:
221+
- run:
222+
name: Install chainctl
223+
command: |
224+
curl -o chainctl "https://dl.enforce.dev/chainctl/latest/chainctl_$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m | sed 's/aarch64/arm64/')"
225+
sudo install -o $UID -g $(id -g) -m 0755 chainctl /usr/local/bin/
226+
- run:
227+
name: Login to Chainguard Registry
228+
command: |
229+
chainctl auth login --identity-token "${CIRCLE_OIDC_TOKEN}" --identity "${CHAINGUARD_IDENTITY_ID}" --audience cgr.dev
230+
CHAINGUARD_TOKEN=$(chainctl auth token --audience cgr.dev)
231+
echo "${CHAINGUARD_TOKEN}" | docker login cgr.dev -u "oauth2accesstoken" --password-stdin
212232
environment_setup:
213233
description: Basic Environment setup
214234
steps:

charts/model-engine/templates/service_template_config_map.yaml

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,12 +1025,26 @@ data:
10251025
{{- toYaml . | nindent 12 }}
10261026
{{- end }}
10271027
serviceAccountName: {{ $launch_name }}
1028-
{{- if $require_aws_config }}
10291028
volumes:
1029+
{{- if $require_aws_config }}
10301030
- name: config-volume
10311031
configMap:
10321032
name: {{ $aws_config_map_name }}
1033-
{{- end }}
1033+
{{- end }}
1034+
{{- if $config_values }}
1035+
- name: service-config-volume
1036+
configMap:
1037+
name: {{ $launch_name }}-service-config
1038+
items:
1039+
- key: launch_service_config
1040+
path: service_config.yaml
1041+
- name: infra-service-config-volume
1042+
configMap:
1043+
name: {{ $launch_name }}-service-config
1044+
items:
1045+
- key: infra_service_config
1046+
path: config.yaml
1047+
{{- end }}
10341048
containers:
10351049
- name: main
10361050
image: {{ $gateway_repository }}:${GIT_TAG}
@@ -1077,12 +1091,18 @@ data:
10771091
cpu: 4
10781092
memory: 32Gi
10791093
ephemeral-storage: 30Gi
1080-
{{- if $require_aws_config }}
10811094
volumeMounts:
1095+
{{- if $require_aws_config }}
10821096
- name: config-volume
10831097
mountPath: /opt/.aws/config
10841098
subPath: config
1085-
{{- end }}
1099+
{{- end }}
1100+
{{- if $config_values }}
1101+
- name: service-config-volume
1102+
mountPath: /workspace/model-engine/service_configs
1103+
- name: infra-service-config-volume
1104+
mountPath: /workspace/model-engine/model_engine_server/core/configs
1105+
{{- end }}
10861106
{{- range $device := tuple "cpu" "gpu" }}
10871107
docker-image-batch-job-{{- $device }}.yaml: |-
10881108
apiVersion: batch/v1
@@ -1134,6 +1154,14 @@ data:
11341154
configMap:
11351155
name: {{ $aws_config_map_name }}
11361156
{{- end }}
1157+
{{- if $config_values }}
1158+
- name: service-config-volume
1159+
configMap:
1160+
name: {{ $launch_name }}-service-config
1161+
items:
1162+
- key: launch_service_config
1163+
path: service_config.yaml
1164+
{{- end }}
11371165
- name: workdir
11381166
emptyDir: {}
11391167
- name: dshm
@@ -1178,6 +1206,10 @@ data:
11781206
mountPath: /opt/.aws/config
11791207
subPath: config
11801208
{{- end }}
1209+
{{- if $config_values }}
1210+
- name: service-config-volume
1211+
mountPath: /workspace/model-engine/service_configs
1212+
{{- end }}
11811213
- name: workdir
11821214
mountPath: ${MOUNT_PATH}
11831215
- mountPath: /dev/shm
@@ -1212,6 +1244,10 @@ data:
12121244
mountPath: /opt/.aws/config
12131245
subPath: config
12141246
{{- end }}
1247+
{{- if $config_values }}
1248+
- name: service-config-volume
1249+
mountPath: /workspace/model-engine/service_configs
1250+
{{- end }}
12151251
- name: workdir
12161252
mountPath: ${MOUNT_PATH}
12171253
{{- end }}

federal/sitecustomize.py

Lines changed: 0 additions & 17 deletions
This file was deleted.

integration_tests/rest_api_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def my_model(**keyword_args):
7878
"flavor": {
7979
"flavor": "streaming_enhanced_runnable_image",
8080
"repository": "model-engine",
81-
"tag": "830c81ecba2a147022e504917c6ce18b00c2af44",
81+
"tag": os.environ.get("GIT_TAG"),
8282
"command": [
8383
"dumb-init",
8484
"--",
@@ -269,7 +269,7 @@ def my_model(**keyword_args):
269269
CREATE_DOCKER_IMAGE_BATCH_JOB_BUNDLE_REQUEST: Dict[str, Any] = {
270270
"name": format_name("di_batch_job_bundle_1"),
271271
"image_repository": "model-engine",
272-
"image_tag": "830c81ecba2a147022e504917c6ce18b00c2af44",
272+
"image_tag": os.environ.get("GIT_TAG"),
273273
"command": ["jq", ".", "/launch_mount_location/file"],
274274
"env": {"ENV1": "VAL1"},
275275
"mount_location": "/launch_mount_location/file",
@@ -289,7 +289,7 @@ def my_model(**keyword_args):
289289
CREATE_FINE_TUNE_DI_BATCH_JOB_BUNDLE_REQUEST: Dict[str, Any] = {
290290
"name": format_name("fine_tune_di_batch_job_bundle_1"),
291291
"image_repository": "model-engine",
292-
"image_tag": "830c81ecba2a147022e504917c6ce18b00c2af44",
292+
"image_tag": os.environ.get("GIT_TAG"),
293293
"command": ["cat", "/launch_mount_location/file"],
294294
"env": {"ENV1": "VAL1"},
295295
"mount_location": "/launch_mount_location/file",

federal/Dockerfile.chainguard renamed to model-engine/Dockerfile.fips

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
# federal/Dockerfile.chainguard
2-
FROM cgr.dev/scale.com/python-fips:3.10.15-dev
1+
FROM cgr.dev/scale.com/python-fips:3.10.19-dev
32
WORKDIR /workspace
43
USER root
54

6-
RUN apk update && apk add htop \
5+
RUN apk update && apk add \
6+
htop \
77
dumb-init \
88
libssh \
99
openssh-client \
@@ -13,7 +13,16 @@ RUN apk update && apk add htop \
1313
procps \
1414
libcurl-openssl4 \
1515
vim \
16-
kubectl
16+
kubectl \
17+
jq \
18+
gcc \
19+
glibc-dev \
20+
python-3.10-dev \
21+
libffi-dev \
22+
openssl-dev \
23+
build-base \
24+
postgresql-dev \
25+
libpq-16
1726

1827
RUN curl -Lo /bin/aws-iam-authenticator https://github.com/kubernetes-sigs/aws-iam-authenticator/releases/download/v0.5.9/aws-iam-authenticator_0.5.9_linux_amd64
1928
RUN chmod +x /bin/aws-iam-authenticator
@@ -23,8 +32,6 @@ RUN chmod -R 777 /workspace
2332

2433
RUN pip install awscli==1.34.28 --no-cache-dir
2534

26-
COPY federal/sitecustomize.py /usr/lib/python3.10/site-packages/sitecustomize.py
27-
2835
WORKDIR /workspace/model-engine/
2936
COPY model-engine/requirements-test.txt /workspace/model-engine/requirements-test.txt
3037
COPY model-engine/requirements.txt /workspace/model-engine/requirements.txt
@@ -39,9 +46,9 @@ RUN pip install -e .
3946
COPY integration_tests /workspace/integration_tests
4047

4148
WORKDIR /workspace
42-
ENV PYTHONPATH /workspace
43-
ENV WORKSPACE /workspace
49+
ENV PYTHONPATH=/workspace
50+
ENV WORKSPACE=/workspace
4451

4552
USER nonroot
4653

47-
EXPOSE 5000
54+
EXPOSE 5000

model-engine/model_engine_server/common/settings.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,10 @@ def generate_destination(user_id: str, endpoint_name: str, endpoint_type: str) -
6161

6262

6363
def _generate_deployment_name_parts(user_id: str, endpoint_name: str) -> List[str]:
64-
user_endpoint_hash = hashlib.md5((user_id + endpoint_name).encode("utf-8")).hexdigest()
64+
# Use MD5 for deployment name hashing (non-security purpose) - FIPS compliant
65+
user_endpoint_hash = hashlib.new(
66+
"md5", (user_id + endpoint_name).encode("utf-8"), usedforsecurity=False
67+
).hexdigest()
6568
return [
6669
DEPLOYMENT_PREFIX,
6770
user_id[:24],

model-engine/model_engine_server/core/celery/celery_autoscaler.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ class CeleryAutoscalerParams:
6969

7070

7171
def _hash_any_to_int(data: Any):
72-
return int(hashlib.md5(str(data).encode()).hexdigest(), 16) # nosemgrep
72+
# Use MD5 for hashing (non-security purpose) - FIPS compliant with usedforsecurity=False
73+
return int(
74+
hashlib.new("md5", str(data).encode(), usedforsecurity=False).hexdigest(), 16
75+
) # nosemgrep
7376

7477

7578
async def list_deployments(core_api, apps_api) -> Dict[Tuple[str, str], CeleryAutoscalerParams]:

model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,10 @@ def _get_inject_bundle_image_params(
599599
bundle_id = model_bundle.id
600600
service_image_str = "-".join([base_image_params.image_tag, GIT_TAG, bundle_id])
601601
# nosemgrep
602-
service_image_hash = hashlib.md5(str(service_image_str).encode("utf-8")).hexdigest()
602+
# Use MD5 for image tag hashing (non-security purpose, required for Docker compatibility)
603+
service_image_hash = hashlib.new(
604+
"md5", str(service_image_str).encode("utf-8"), usedforsecurity=False
605+
).hexdigest()
603606
service_image_tag = f"inject-bundle-image-{service_image_hash}"
604607
ecr_repo = base_image_params.repo
605608

@@ -812,7 +815,12 @@ def _get_restricted_env_vars(env_vars: Dict[str, str]) -> Set[str]:
812815
def _get_requirements_hash(requirements: List[str]) -> str:
813816
"""Identifying hash for endpoint's Python requirements."""
814817
# nosemgrep
815-
return hashlib.md5("\n".join(sorted(requirements)).encode("utf-8")).hexdigest()[:6]
818+
# Use MD5 for requirements hashing (non-security purpose)
819+
return hashlib.new(
820+
"md5",
821+
"\n".join(sorted(requirements)).encode("utf-8"),
822+
usedforsecurity=False,
823+
).hexdigest()[:6]
816824

817825
@staticmethod
818826
def _get_image_tag(base_image_tag: str, git_tag: str, requirements_hash: str) -> str:

model-engine/requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ rich~=12.6
4646
sentencepiece==0.1.99
4747
sh~=1.13
4848
smart-open~=5.2
49-
sqlalchemy[asyncio]~=2.0.4
49+
sqlalchemy[asyncio]~=2.0.21
5050
sse-starlette==1.6.1
5151
sseclient-py==1.7.2
5252
starlette[full]>=0.36.2 # not used directly, but needs to be pinned for Microsoft security scan

model-engine/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ sniffio==1.3.0
458458
# via
459459
# anyio
460460
# httpx
461-
sqlalchemy[asyncio]==2.0.4
461+
sqlalchemy[asyncio]==2.0.21
462462
# via
463463
# -r model-engine/requirements.in
464464
# alembic

0 commit comments

Comments
 (0)