diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f2b6fce9..26341fbb 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -29,3 +29,9 @@ updates: schedule: interval: "weekly" day: "monday" + + - package-ecosystem: "docker" + directory: "/" + # Check for updates once a week + schedule: + interval: "weekly" diff --git a/deployments/container/Dockerfile.distroless b/deployments/container/Dockerfile.distroless index a987285a..761d793f 100644 --- a/deployments/container/Dockerfile.distroless +++ b/deployments/container/Dockerfile.distroless @@ -16,13 +16,19 @@ COPY requirements.txt . RUN pip install --no-cache-dir --prefix=/build/deps -r requirements.txt # upgrade urllib3 to address CVE. # Also see https://github.com/kubernetes-client/python/issues/2477#issuecomment-3628140179 -RUN pip install --upgrade --no-cache-dir --no-warn-conflicts --prefix=/build/deps urllib3==2.6.1 +RUN pip install --upgrade --no-cache-dir --no-warn-conflicts --prefix=/build/deps urllib3==2.6.3 + RUN git clone --branch ${GPU_ADMIN_TOOLS_VERSION} https://github.com/NVIDIA/gpu-admin-tools.git -# Stage 2: Distroless runtime -# Note: using dev version until gpu-operator is updated not to use -# hardcoded command. -FROM nvcr.io/nvidia/distroless/python:3.13-v3.1.1-dev +# Stage 2 build rm utility +FROM alpine:3.21 AS stg2 +RUN apk add --no-cache musl-dev gcc +WORKDIR /build +COPY rmsrc/rm.c rm.c +RUN gcc -static -Os rm.c -o rm + +# Stage 3: Distroless runtime +FROM nvcr.io/nvidia/distroless/python:3.13-v4.0.0 # Copy Python dependencies from builder COPY --from=builder /build/deps /usr/local @@ -31,12 +37,12 @@ COPY --from=builder /build/deps /usr/local COPY main.py /app/ COPY gpu_operator_eviction.py /app/ -# TODO: eliminate this when dev version is removed -COPY scripts/k8s-cc-manager /usr/bin/ - # Copy gpu-admin-tools from source tree COPY --from=builder /build/gpu-admin-tools /app/gpu-admin-tools +# Copy rm for the preStop hook +COPY --from=stg2 /build/rm /bin/rm + WORKDIR /app # Set PYTHONPATH to find installed packages @@ -44,11 +50,8 @@ ENV PYTHONPATH=/usr/local/lib/python3.13/site-packages # Run as non-root (distroless default) USER 0:0 -SHELL ["/busybox/sh", "-c"] -RUN ln -s /busybox/sh /bin/sh - # clean up left over dist-info -RUN rm -rf /usr/local/lib/python3.13/site-packages/urllib3-2.3.0.dist-info +RUN ["/bin/rm", "-rf", "/usr/local/lib/python3.13/site-packages/urllib3-2.3.0.dist-info"] ARG VERSION="N/A" ARG GIT_COMMIT="unknown" diff --git a/rmsrc/rm.c b/rmsrc/rm.c new file mode 100644 index 00000000..b0b3d176 --- /dev/null +++ b/rmsrc/rm.c @@ -0,0 +1,78 @@ +/* + * Implements the rm command + * + * Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#define _XOPEN_SOURCE 500 +#include +#include +#include +#include +#include +#include + +static int force = 0; + +static int unlink_cb(const char *path, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) +{ + int ret; + + (void)sb; + (void)ftwbuf; + + if (typeflag == FTW_DP) + ret = rmdir(path); + else + ret = unlink(path); + + if (ret != 0 && !force) { + perror(path); + return -1; + } + return 0; +} + +int main(int argc, char **argv) +{ + int recursive = 0; + int opt; + + while ((opt = getopt(argc, argv, "rf")) != -1) { + if (opt == 'r') recursive = 1; + else if (opt == 'f') force = 1; + else { + fprintf(stderr, "usage: rm [-r] [-f] file...\n"); + return 1; + } + } + + if (optind >= argc) + return 0; + + for (int i = optind; i < argc; i++) { + if (recursive) { + if (nftw(argv[i], unlink_cb, 64, FTW_DEPTH | FTW_PHYS) != 0 && !force) + return 1; + } else { + if (unlink(argv[i]) != 0 && !force) { + perror(argv[i]); + return 1; + } + } + } + + return 0; +} diff --git a/scripts/k8s-cc-manager b/scripts/k8s-cc-manager deleted file mode 100755 index 9fab009d..00000000 --- a/scripts/k8s-cc-manager +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -# Mimics the entrypoint used by existing gpu-operator manifest -# To be deprecated when the manifest is adjusted -# Run /app/main.py -python3 /app/main.py