Skip to content

Commit 7089659

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents 67be92f + b22fe89 commit 7089659

40 files changed

+4241
-96
lines changed

.tekton/odh-training-cpu-torch29-py312-rhel9-pull-request.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ metadata:
99
pipelinesascode.tekton.dev/cancel-in-progress: "true"
1010
pipelinesascode.tekton.dev/max-keep-runs: "3"
1111
pipelinesascode.tekton.dev/on-cel-expression: event == "pull_request" && target_branch == "main" &&
12-
("images/universal/training/cpu-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/cpu-torch290-py312/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-cpu-torch29-py312-rhel9-pull-request.yaml".pathChanged())
12+
("images/universal/training/cpu-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/utils/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-cpu-torch29-py312-rhel9-pull-request.yaml".pathChanged() || "images/universal/training/utils/start-notebook.sh".pathChanged() || "images/universal/training/utils/process.sh".pathChanged())
1313
creationTimestamp: null
1414
labels:
1515
appstudio.openshift.io/application: odh-universal-image
@@ -24,16 +24,16 @@ spec:
2424
- name: revision
2525
value: '{{revision}}'
2626
- name: output-image
27-
value: quay.io/opendatahub/odh-training-th03-cuda128-torch28-py312-rhel9:on-pr-cpu-{{revision}}
27+
value: quay.io/opendatahub/odh-training-th04-cpu-torch29-py312-rhel9:on-pr-cpu-{{revision}}
2828
- name: image-expires-after
2929
value: 5d
3030
- name: build-platforms
3131
value:
3232
- linux-extra-fast/amd64
3333
- name: dockerfile
34-
value: /images/universal/training/cpu-torch290-py312/Dockerfile
34+
value: cpu-torch290-py312/Dockerfile
3535
- name: path-context
36-
value: images/universal/training/cpu-torch290-py312
36+
value: images/universal/training
3737
pipelineSpec:
3838
description: |
3939
This pipeline is ideal for building multi-arch container images from a Containerfile while maintaining trust after pipeline customization.

.tekton/odh-training-cpu-torch29-py312-rhel9-push.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ metadata:
88
pipelinesascode.tekton.dev/cancel-in-progress: "false"
99
pipelinesascode.tekton.dev/max-keep-runs: "3"
1010
pipelinesascode.tekton.dev/on-cel-expression: event == "push" && target_branch == "main" &&
11-
("images/universal/training/cpu-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/cpu-torch290-py312/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-cpu-torch29-py312-rhel9-push.yaml".pathChanged())
11+
("images/universal/training/cpu-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/utils/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-cpu-torch29-py312-rhel9-push.yaml".pathChanged() || "images/universal/training/utils/start-notebook.sh".pathChanged() || "images/universal/training/utils/process.sh".pathChanged())
1212
creationTimestamp: null
1313
labels:
1414
appstudio.openshift.io/application: odh-universal-image
@@ -23,14 +23,14 @@ spec:
2323
- name: revision
2424
value: '{{revision}}'
2525
- name: output-image
26-
value: quay.io/opendatahub/odh-training-th03-cuda128-torch28-py312-rhel9:latest-cpu
26+
value: quay.io/opendatahub/odh-training-th04-cpu-torch29-py312-rhel9:latest-cpu
2727
- name: build-platforms
2828
value:
2929
- linux-extra-fast/amd64
3030
- name: dockerfile
31-
value: /images/universal/training/cpu-torch290-py312/Dockerfile
31+
value: cpu-torch290-py312/Dockerfile
3232
- name: path-context
33-
value: images/universal/training/cpu-torch290-py312
33+
value: images/universal/training
3434
pipelineSpec:
3535
description: |
3636
This pipeline is ideal for building multi-arch container images from a Containerfile while maintaining trust after pipeline customization.

.tekton/odh-training-rocm64-torch29-py312-rhel9-pull-request.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ metadata:
99
pipelinesascode.tekton.dev/cancel-in-progress: "true"
1010
pipelinesascode.tekton.dev/max-keep-runs: "3"
1111
pipelinesascode.tekton.dev/on-cel-expression: event == "pull_request" && target_branch == "main" &&
12-
("images/universal/training/rocm64-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/rocm64-torch290-py312/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-rocm64-torch290-py312-rhel9-pull-request.yaml".pathChanged())
12+
("images/universal/training/rocm64-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/utils/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-rocm64-torch290-py312-rhel9-pull-request.yaml".pathChanged() || "images/universal/training/utils/start-notebook.sh".pathChanged() || "images/universal/training/utils/process.sh".pathChanged())
1313
creationTimestamp: null
1414
labels:
1515
appstudio.openshift.io/application: odh-universal-image
@@ -27,18 +27,18 @@ spec:
2727
- name: revision
2828
value: '{{revision}}'
2929
- name: output-image
30-
value: quay.io/opendatahub/odh-training-th03-cuda128-torch28-py312-rhel9:rocm-on-pr-{{revision}}
30+
value: quay.io/opendatahub/odh-training-rocm64-torch29-py312-rhel9:rocm-on-pr-{{revision}}
3131
- name: image-expires-after
3232
value: 5d
3333
- name: build-platforms
3434
value:
3535
- linux-d160-m8xlarge/amd64
3636
- name: build-args-file
37-
value: images/universal/training/rocm64-torch290-py312/argfile.konflux.conf
37+
value: rocm64-torch290-py312/argfile.konflux.conf
3838
- name: dockerfile
39-
value: Dockerfile
39+
value: rocm64-torch290-py312/Dockerfile
4040
- name: path-context
41-
value: images/universal/training/rocm64-torch290-py312
41+
value: images/universal/training
4242
pipelineSpec:
4343
timeouts:
4444
pipeline: 100h

.tekton/odh-training-rocm64-torch29-py312-rhel9-push.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ metadata:
88
pipelinesascode.tekton.dev/cancel-in-progress: "false"
99
pipelinesascode.tekton.dev/max-keep-runs: "3"
1010
pipelinesascode.tekton.dev/on-cel-expression: event == "push" && target_branch == "main" &&
11-
("images/universal/training/rocm64-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/rocm64-torch290-py312/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-rocm64-torch290-py312-rhel9-push.yaml".pathChanged())
11+
("images/universal/training/rocm64-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/utils/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-rocm64-torch290-py312-rhel9-push.yaml".pathChanged() || "images/universal/training/utils/start-notebook.sh".pathChanged() || "images/universal/training/utils/process.sh".pathChanged())
1212
creationTimestamp: null
1313
labels:
1414
appstudio.openshift.io/application: odh-universal-image
@@ -26,16 +26,16 @@ spec:
2626
- name: revision
2727
value: '{{revision}}'
2828
- name: output-image
29-
value: quay.io/opendatahub/odh-training-th03-cuda128-torch28-py312-rhel9:rocm-latest
29+
value: quay.io/opendatahub/odh-training-rocm64-torch29-py312-rhel9:rocm-latest
3030
- name: build-platforms
3131
value:
3232
- linux-d160-m8xlarge/amd64
3333
- name: dockerfile
34-
value: Dockerfile
34+
value: rocm64-torch290-py312/Dockerfile
3535
- name: path-context
36-
value: images/universal/training/rocm64-torch290-py312
36+
value: images/universal/training
3737
- name: build-args-file
38-
value: images/universal/training/rocm64-torch290-py312/argfile.konflux.conf
38+
value: rocm64-torch290-py312/argfile.konflux.conf
3939
pipelineSpec:
4040
timeouts:
4141
pipeline: 100h

.tekton/odh-training-th04-cuda128-torch29-py312-rhel9-pull-request.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ metadata:
99
pipelinesascode.tekton.dev/cancel-in-progress: "true"
1010
pipelinesascode.tekton.dev/max-keep-runs: "3"
1111
pipelinesascode.tekton.dev/on-cel-expression: event == "pull_request" && target_branch == "main" &&
12-
("images/universal/training/th04-cuda128-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/th04-cuda128-torch290-py312/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-th04-cuda128-torch29-py312-rhel9-pull-request.yaml".pathChanged())
12+
("images/universal/training/th04-cuda128-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/utils/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-th04-cuda128-torch29-py312-rhel9-pull-request.yaml".pathChanged() || "images/universal/training/utils/start-notebook.sh".pathChanged() || "images/universal/training/utils/process.sh".pathChanged())
1313
creationTimestamp: null
1414
labels:
1515
appstudio.openshift.io/application: odh-universal-image
@@ -27,16 +27,16 @@ spec:
2727
- name: revision
2828
value: '{{revision}}'
2929
- name: output-image
30-
value: quay.io/redhat-user-workloads/open-data-hub-tenant/odh-training-th04-cuda128-torch29-py312-rhel9:on-pr-{{revision}}
30+
value: quay.io/opendatahub/odh-training-th04-cuda128-torch29-py312-rhel9:on-pr-{{revision}}
3131
- name: image-expires-after
3232
value: 5d
3333
- name: build-platforms
3434
value:
35-
- linux-extra-fast/amd64
35+
- linux-d160-m8xlarge/amd64
3636
- name: dockerfile
37-
value: Dockerfile
37+
value: th04-cuda128-torch290-py312/Dockerfile
3838
- name: path-context
39-
value: images/universal/training/th04-cuda128-torch290-py312
39+
value: images/universal/training
4040
pipelineSpec:
4141
description: |
4242
This pipeline is ideal for building multi-arch container images from a Containerfile while maintaining trust after pipeline customization.

.tekton/odh-training-th04-cuda128-torch29-py312-rhel9-push.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ metadata:
88
pipelinesascode.tekton.dev/cancel-in-progress: "false"
99
pipelinesascode.tekton.dev/max-keep-runs: "3"
1010
pipelinesascode.tekton.dev/on-cel-expression: event == "push" && target_branch == "main" &&
11-
("images/universal/training/th04-cuda128-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/th04-cuda128-torch290-py312/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-th04-cuda128-torch29-py312-rhel9-push.yaml".pathChanged())
11+
("images/universal/training/th04-cuda128-torch290-py312/Dockerfile".pathChanged() || "images/universal/training/utils/entrypoint-universal.sh".pathChanged() || ".tekton/odh-training-th04-cuda128-torch29-py312-rhel9-push.yaml".pathChanged() || "images/universal/training/utils/start-notebook.sh".pathChanged() || "images/universal/training/utils/process.sh".pathChanged())
1212
creationTimestamp: null
1313
labels:
1414
appstudio.openshift.io/application: odh-universal-image
@@ -26,14 +26,14 @@ spec:
2626
- name: revision
2727
value: '{{revision}}'
2828
- name: output-image
29-
value: quay.io/redhat-user-workloads/open-data-hub-tenant/odh-training-th04-cuda128-torch29-py312-rhel9:latest
29+
value: quay.io/opendatahub/odh-training-th04-cuda128-torch29-py312-rhel9:latest
3030
- name: build-platforms
3131
value:
32-
- linux-extra-fast/amd64
32+
- linux-d160-m8xlarge/amd64
3333
- name: dockerfile
34-
value: Dockerfile
34+
value: th04-cuda128-torch290-py312/Dockerfile
3535
- name: path-context
36-
value: images/universal/training/th04-cuda128-torch290-py312
36+
value: images/universal/training
3737
pipelineSpec:
3838
description: |
3939
This pipeline is ideal for building multi-arch container images from a Containerfile while maintaining trust after pipeline customization.

images/universal/training/cpu-torch290-py312/Dockerfile

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
################################################################################
99
# Build Arguments
1010
################################################################################
11-
ARG BASE_IMAGE=quay.io/opendatahub/odh-workbench-jupyter-minimal-cpu-py312-ubi9:2025b-v1.39
11+
# Align base with notebooks minimal CPU
12+
ARG BASE_IMAGE=quay.io/opendatahub/odh-base-image-cpu-py312-c9s:latest
1213
ARG PYTHON_VERSION=3.12
14+
ARG SRC_DIR=cpu-torch290-py312
1315

1416
################################################################################
1517
# Builder Stage - Install uv for dependency resolution
@@ -34,7 +36,8 @@ LABEL name="cpu:py312-torch290" \
3436
io.k8s.description="CPU image: Jupyter workbench by default; runtime when command provided."
3537

3638
# Copy license file
37-
COPY LICENSE.md /licenses/cpu-license.md
39+
ARG SRC_DIR
40+
COPY ${SRC_DIR}/LICENSE.md /licenses/cpu-license.md
3841

3942
USER 0
4043
WORKDIR /opt/app-root/bin
@@ -44,9 +47,36 @@ WORKDIR /opt/app-root/bin
4447
################################################################################
4548
FROM base AS system-deps
4649

50+
ARG SRC_DIR
4751
USER 0
4852
WORKDIR /opt/app-root/bin
4953

54+
# Core OS packages (minimal, keep from notebooks)
55+
RUN dnf install -y --setopt=install_weak_deps=False \
56+
perl \
57+
mesa-libGL \
58+
skopeo && \
59+
dnf clean all && rm -rf /var/cache/dnf/*
60+
61+
# Install the oc client (matches notebooks)
62+
RUN /bin/bash <<'EOF'
63+
set -Eeuxo pipefail
64+
curl -L https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/clients/ocp/stable/openshift-client-linux.tar.gz \
65+
-o /tmp/openshift-client-linux.tar.gz
66+
tar -xzvf /tmp/openshift-client-linux.tar.gz oc
67+
rm -f /tmp/openshift-client-linux.tar.gz
68+
EOF
69+
70+
# Notebook utils and PDF deps (local copy)
71+
COPY utils/ /opt/app-root/bin/utils/
72+
RUN chmod -R 0755 /opt/app-root/bin/utils && \
73+
/opt/app-root/bin/utils/install_pdf_deps.sh
74+
75+
# Copy notebook entry script and entrypoint
76+
COPY utils/start-notebook.sh /opt/app-root/bin/start-notebook.sh
77+
COPY utils/entrypoint-universal.sh /usr/local/bin/entrypoint-universal.sh
78+
RUN chmod 0755 /opt/app-root/bin/start-notebook.sh /usr/local/bin/entrypoint-universal.sh
79+
5080
# Install build toolchain (from UBI repos)
5181
# - gcc, gcc-c++, make: C/C++ compilation tools
5282
# - python3-devel: Python headers for building native extensions
@@ -65,14 +95,18 @@ RUN dnf install -y --setopt=install_weak_deps=False \
6595
################################################################################
6696
FROM system-deps AS python-deps
6797

98+
ARG SRC_DIR
6899
USER 0
69100
WORKDIR /tmp/deps
70101

102+
# Ensure python version arg available in this stage
103+
ARG PYTHON_VERSION
104+
71105
# Copy uv from builder stage (FIPS: uv only used during build, not in runtime)
72106
COPY --from=builder /opt/app-root/bin/uv /usr/local/bin/uv
73107

74108
# Copy dependency files
75-
COPY --chown=1001:0 pyproject.toml pylock.toml ./
109+
COPY --chown=1001:0 ${SRC_DIR}/pyproject.toml ${SRC_DIR}/pylock.toml ./
76110

77111
# Switch to user 1001 for pip installations
78112
USER 1001
@@ -88,6 +122,23 @@ ENV UV_NO_CACHE=
88122
RUN pip install --retries 5 --timeout 300 --no-cache-dir \
89123
"git+https://github.com/opendatahub-io/kubeflow-sdk@main"
90124

125+
# Apply notebook customizations (match notebooks minimal)
126+
RUN /bin/bash <<'EOF'
127+
set -Eeuo pipefail
128+
# disable announcements
129+
jupyter labextension disable "@jupyterlab/apputils-extension:announcements" || true
130+
# rename kernel launcher to current python version
131+
sed -i -e "s/Python.*/$(python --version | cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json
132+
# copy jupyter config
133+
mkdir -p /opt/app-root/etc/jupyter
134+
cp /opt/app-root/bin/utils/jupyter_server_config.py /opt/app-root/etc/jupyter
135+
# apply addons
136+
/opt/app-root/bin/utils/addons/apply.sh
137+
# usercustomize / protobuf patch
138+
cp /opt/app-root/bin/utils/usercustomize.pth /opt/app-root/lib/python${PYTHON_VERSION}/site-packages/
139+
cp /opt/app-root/bin/utils/monkey_patch_protobuf_6x.py /opt/app-root/lib/python${PYTHON_VERSION}/site-packages/
140+
EOF
141+
91142
# Fix permissions for OpenShift
92143
ARG PYTHON_VERSION
93144
USER 0
@@ -111,17 +162,22 @@ WORKDIR /opt/app-root/src
111162

112163
# Copy Python site-packages and CLI entry points from python-deps stage
113164
ARG PYTHON_VERSION
165+
ARG SRC_DIR
114166
COPY --from=python-deps /opt/app-root/lib/python${PYTHON_VERSION}/site-packages /opt/app-root/lib/python${PYTHON_VERSION}/site-packages
115167
COPY --from=python-deps /opt/app-root/bin /opt/app-root/bin
168+
# Copy Jupyter shared assets (lab static files, etc.)
169+
COPY --from=python-deps /opt/app-root/share/jupyter /opt/app-root/share/jupyter
170+
# Copy Jupyter etc configs (server extensions, settings)
171+
COPY --from=python-deps /opt/app-root/etc/jupyter /opt/app-root/etc/jupyter
116172

117173
# FIPS-friendly: Remove uv from final image
118174
RUN rm -f /opt/app-root/bin/uv
119175

120176
# Copy license file
121-
COPY LICENSE.md /licenses/cpu-license.md
177+
COPY ${SRC_DIR}/LICENSE.md /licenses/cpu-license.md
122178

123179
# Copy entrypoint
124-
COPY --chmod=0755 entrypoint-universal.sh /usr/local/bin/entrypoint-universal.sh
180+
COPY --chmod=0755 utils/entrypoint-universal.sh /usr/local/bin/entrypoint-universal.sh
125181

126182
# Fix permissions for OpenShift (final stage)
127183
RUN fix-permissions /opt/app-root -P \

images/universal/training/cpu-torch290-py312/pylock.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,6 +1343,12 @@ version = "0.1.1"
13431343
sdist = { url = "https://files.pythonhosted.org/packages/c7/77/19ddf2683bcf03ef73ff6f5d68e51ef6d6ff8c5ba4024f2330009dcc71ff/odh_jupyter_trash_cleanup-0.1.1.tar.gz", upload-time = 2025-09-18T14:04:56Z, size = 27480, hashes = { sha256 = "d97d3a6c70fcf4e47041dc89445505ba6b6964c9319254917f30393b656e6b5c" } }
13441344
wheels = [{ url = "https://files.pythonhosted.org/packages/5e/c2/37eaa2c4efd7c1a6403aaa59822f0df2730b0f43b3fc274f4bd14998bd12/odh_jupyter_trash_cleanup-0.1.1-py3-none-any.whl", upload-time = 2025-09-18T14:04:55Z, size = 27553, hashes = { sha256 = "19b409372d5781937b42e047bf23503f4d602461765a9b145b0a6974cd090623" } }]
13451345

1346+
[[packages]]
1347+
name = "olot"
1348+
version = "0.1.13"
1349+
sdist = { url = "https://files.pythonhosted.org/packages/9e/cc/e18944e3c6af81d690e5c3bc24068459cd3b84f38f196e11941a0235f701/olot-0.1.13.tar.gz", upload-time = 2025-11-26T15:30:50Z, size = 25432, hashes = { sha256 = "a553c0e1798156ec13257689d483993363303de8573cc91465c48edb1c66b70f" } }
1350+
wheels = [{ url = "https://files.pythonhosted.org/packages/46/dc/799d7027883a6cf70802f380f5715aa41870ea07b5017bd2916799909751/olot-0.1.13-py3-none-any.whl", upload-time = 2025-11-26T15:30:48Z, size = 32481, hashes = { sha256 = "fa5dff7f5766c62e49fc49983546b6e8050268b4733f9dd2fb305a402f6e939a" } }]
1351+
13461352
[[packages]]
13471353
name = "packaging"
13481354
version = "25.0"

images/universal/training/cpu-torch290-py312/pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ dependencies = [
177177
# Training Frameworks
178178
"training_hub==0.4.0",
179179

180+
# Misc
181+
"olot",
182+
180183
# Note: GPU-specific packages are NOT included:
181184
# - flash-attn (requires CUDA/ROCm)
182185
# - bitsandbytes (GPU-focused)

0 commit comments

Comments
 (0)