diff --git a/.tekton/vllm-cuda-pull-request.yaml b/.tekton/vllm-cuda-pull-request.yaml new file mode 100644 index 000000000000..17c5367f1d9e --- /dev/null +++ b/.tekton/vllm-cuda-pull-request.yaml @@ -0,0 +1,695 @@ +apiVersion: tekton.dev/v1 +kind: PipelineRun +metadata: + annotations: + build.appstudio.openshift.io/repo: https://github.com/red-hat-data-services/vllm?rev={{revision}} + build.appstudio.redhat.com/commit_sha: '{{revision}}' + build.appstudio.redhat.com/target_branch: '{{target_branch}}' + build.appstudio.redhat.com/pull_request_number: '{{pull_request_number}}' + pipelinesascode.tekton.dev/max-keep-runs: "3" + pipelinesascode.tekton.dev/on-comment: "^/build-from-rhelai-wheels" + pipelinesascode.tekton.dev/cancel-in-progress: "true" + pipelinesascode.tekton.dev/on-cel-expression: | + false + creationTimestamp: null + labels: + appstudio.openshift.io/application: external-rhoai-v2-20 + appstudio.openshift.io/component: vllm-cuda-v2-20 + pipelines.appstudio.openshift.io/type: build + name: vllm-cuda-wheels-on-pull-request + namespace: rhoai-tenant +spec: + params: + - name: image-expires-after + value: 5d + - name: git-url + value: '{{source_url}}' + - name: revision + value: '{{revision}}' + - name: output-image + value: quay.io/modh/vllm:on-pr-cuda-{{revision}} + - name: dockerfile + value: Dockerfile.ubi + - name: path-context + value: . + - name: build-args-file + value: argfiles/argfile.ubi + - name: additional-build-secret + value: rhel-ai-private-index-auth + taskRunSpecs: + - pipelineTaskName: ecosystem-cert-preflight-checks + computeResources: + requests: + cpu: '8' + memory: 16Gi + limits: + cpu: '16' + memory: 32Gi + - pipelineTaskName: clair-scan + computeResources: + requests: + cpu: '8' + memory: 16Gi + limits: + cpu: '16' + memory: 32Gi + pipelineSpec: + description: | + This pipeline is ideal for building multi-arch container images from a Containerfile while maintaining trust after pipeline customization. + + _Uses `buildah` to create a multi-platform container image leveraging [trusted artifacts](https://konflux-ci.dev/architecture/ADR/0036-trusted-artifacts.html). It also optionally creates a source image and runs some build-time tests. This pipeline requires that the [multi platform controller](https://github.com/konflux-ci/multi-platform-controller) is deployed and configured on your Konflux instance. Information is shared between tasks using OCI artifacts instead of PVCs. EC will pass the [`trusted_task.trusted`](https://enterprisecontract.dev/docs/ec-policies/release_policy.html#trusted_task__trusted) policy as long as all data used to build the artifact is generated from trusted tasks. + This pipeline is pushed as a Tekton bundle to [quay.io](https://quay.io/repository/konflux-ci/tekton-catalog/pipeline-docker-build-multi-platform-oci-ta?tab=tags)_ + finally: + - name: show-sbom + params: + - name: IMAGE_URL + value: $(tasks.build-image-index.results.IMAGE_URL) + taskRef: + params: + - name: name + value: show-sbom + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-show-sbom:0.1@sha256:945a7c9066d3e0a95d3fddb7e8a6992e4d632a2a75d8f3a9bd2ff2fef0ec9aa0 + - name: kind + value: task + resolver: bundles + - name: send-slack-notification + params: + - name: message + value: "$(tasks.rhoai-init.results.slack-message-failure-text)" + - name: secret-name + value: rhoai-slack-webhook-secret + - name: key-name + value: rhoai-slack-webhook-key + taskRef: + params: + - name: name + value: slack-webhook-notification + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-slack-webhook-notification:0.1@sha256:dc17b70633363d78414b8c06dc1660d25742935f106a6116995638e1210c2730 + - name: kind + value: task + resolver: bundles + when: + - input: $(tasks.status) + operator: in + values: + - "Failed" + params: + - description: Source Repository URL + name: git-url + type: string + - default: "" + description: Revision of the Source Repository + name: revision + type: string + - description: Fully Qualified Output Image + name: output-image + type: string + - default: . + description: Path to the source code of an application's component from where + to build image. + name: path-context + type: string + - default: Dockerfile + description: Path to the Dockerfile inside the context specified by parameter + path-context + name: dockerfile + type: string + - default: "false" + description: Force rebuild image + name: rebuild + type: string + - default: "false" + description: Skip checks against built image + name: skip-checks + type: string + - default: "false" + description: Execute the build with network isolation + name: hermetic + type: string + - default: "" + description: Build dependencies to be prefetched by Cachi2 + name: prefetch-input + type: string + - default: "" + description: Image tag expiration time, time values could be something like + 1h, 2d, 3w for hours, days, and weeks, respectively. + name: image-expires-after + - default: "false" + description: Build a source image. + name: build-source-image + type: string + - default: "true" + description: Add built image into an OCI image index + name: build-image-index + type: string + - default: [max_jobs=48] + description: Array of --build-arg values ("arg=value" strings) for buildah + name: build-args + type: array + - default: "" + description: Path to a file with build arguments for buildah, see https://www.mankier.com/1/buildah-build#--build-arg-file + name: build-args-file + type: string + - description: Kubernetes secret to mount into build, see https://www.redhat.com/en/blog/sensitive-data-containers + name: additional-build-secret + type: string + - default: + - linux-extra-fast/amd64 + description: List of platforms to build the container images on. The available + set of values is determined by the configuration of the multi-platform-controller. + name: build-platforms + type: array + results: + - description: "" + name: IMAGE_URL + value: $(tasks.build-image-index.results.IMAGE_URL) + - description: "" + name: IMAGE_DIGEST + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + - description: "" + name: CHAINS-GIT_URL + value: $(tasks.clone-repository.results.url) + - description: "" + name: CHAINS-GIT_COMMIT + value: $(tasks.clone-repository.results.commit) + tasks: + - name: rhoai-init + params: + - name: pipelinerun-name + value: "$(context.pipelineRun.name)" + taskSpec: + results: + - description: Notification text to be posted to slack + name: slack-message-failure-text + steps: + - image: quay.io/rhoai-konflux/alpine:latest + name: rhoai-init + env: + - name: slack_message + valueFrom: + secretKeyRef: + name: rhoai-slack-message + key: slack-component-failure-notification + script: | + pipelinerun_name=$(params.pipelinerun-name) + echo "pipelinerun_name = $pipelinerun_name" + + target_branch={{target_branch}} + echo "target_branch = $target_branch" + + application_name=${target_branch/rhoai-/} + echo "application-name = $application_name" + + application_name=rhoai-v${application_name/./-} + echo "application-name = $application_name" + + component_name=${pipelinerun_name/-on-*/} + echo "component-name = $component_name" + + KONFLUX_SERVER="https://console.redhat.com" + build_url="${KONFLUX_SERVER}/application-pipeline/workspaces/rhoai/applications/${application_name}/pipelineruns/${pipelinerun_name}/logs" + + build_time="$(date +%Y-%m-%dT%H:%M:%S)" + + slack_message=${slack_message/__BUILD__URL__/$build_url} + slack_message=${slack_message/__PIPELINERUN__NAME__/$pipelinerun_name} + slack_message=${slack_message/__BUILD__TIME__/$build_time} + + echo -en "${slack_message}" > "$(results.slack-message-failure-text.path)" + - name: init + params: + - name: image-url + value: $(params.output-image) + - name: rebuild + value: $(params.rebuild) + - name: skip-checks + value: $(params.skip-checks) + taskRef: + params: + - name: name + value: init + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-init@sha256:4c6712db9419461b8c8a39523c012cb0dc061fb58563bb9170b3777d74f54659 + - name: kind + value: task + resolver: bundles + runAfter: + - rhoai-init + - name: clone-repository + params: + - name: url + value: $(params.git-url) + - name: revision + value: $(params.revision) + - name: ociStorage + value: $(params.output-image).git + - name: ociArtifactExpiresAfter + value: $(params.image-expires-after) + - name: fetchTags + value: "true" + - name: depth + value: "2147483647" + runAfter: + - init + taskRef: + params: + - name: name + value: git-clone-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-git-clone-oci-ta:0.1@sha256:f72fcca6732516339d55ac5f01660e287968e64e857a40a8608db27e298b5126 + - name: kind + value: task + resolver: bundles + when: + - input: $(tasks.init.results.build) + operator: in + values: + - "true" + workspaces: + - name: basic-auth + workspace: git-auth + - name: prefetch-dependencies + params: + - name: input + value: $(params.prefetch-input) + - name: SOURCE_ARTIFACT + value: $(tasks.clone-repository.results.SOURCE_ARTIFACT) + - name: ociStorage + value: $(params.output-image).prefetch + - name: ociArtifactExpiresAfter + value: $(params.image-expires-after) + runAfter: + - clone-repository + taskRef: + params: + - name: name + value: prefetch-dependencies-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta@sha256:6e3739fa3624783e72ea26885ca8eab2df7098b081485e89241ad3d518a5746d + - name: kind + value: task + resolver: bundles + workspaces: + - name: git-basic-auth + workspace: git-auth + - name: netrc + workspace: netrc + - matrix: + params: + - name: PLATFORM + value: + - $(params.build-platforms) + name: build-images + params: + - name: ADDITIONAL_SECRET + value: $(params.additional-build-secret) + - name: IMAGE + value: $(params.output-image) + - name: DOCKERFILE + value: $(params.dockerfile) + - name: CONTEXT + value: $(params.path-context) + - name: HERMETIC + value: $(params.hermetic) + - name: PREFETCH_INPUT + value: $(params.prefetch-input) + - name: IMAGE_EXPIRES_AFTER + value: $(params.image-expires-after) + - name: COMMIT_SHA + value: $(tasks.clone-repository.results.commit) + - name: BUILD_ARGS + value: + - $(params.build-args[*]) + - name: BUILD_ARGS_FILE + value: $(params.build-args-file) + - name: SOURCE_ARTIFACT + value: $(tasks.prefetch-dependencies.results.SOURCE_ARTIFACT) + - name: CACHI2_ARTIFACT + value: $(tasks.prefetch-dependencies.results.CACHI2_ARTIFACT) + - name: IMAGE_APPEND_PLATFORM + value: "true" + runAfter: + - prefetch-dependencies + taskRef: + params: + - name: name + value: buildah-remote-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.3@sha256:3070ee1a75e9a5a0a082008e1f9b3d2df7a9508ca107678b2613dc201eb2e279 + - name: kind + value: task + resolver: bundles + when: + - input: $(tasks.init.results.build) + operator: in + values: + - "true" + - name: build-image-index + params: + - name: IMAGE + value: $(params.output-image) + - name: COMMIT_SHA + value: $(tasks.clone-repository.results.commit) + - name: IMAGE_EXPIRES_AFTER + value: $(params.image-expires-after) + - name: ALWAYS_BUILD_INDEX + value: $(params.build-image-index) + - name: IMAGES + value: + - $(tasks.build-images.results.IMAGE_REF[*]) + runAfter: + - build-images + taskRef: + params: + - name: name + value: build-image-index + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.1@sha256:09344e6bda708f48ef759bbe84bce99515549f4cfdcbe89e417f695c19463260 + - name: kind + value: task + resolver: bundles + when: + - input: $(tasks.init.results.build) + operator: in + values: + - "true" + - name: build-source-image + params: + - name: BINARY_IMAGE + value: $(params.output-image) + - name: SOURCE_ARTIFACT + value: $(tasks.prefetch-dependencies.results.SOURCE_ARTIFACT) + - name: CACHI2_ARTIFACT + value: $(tasks.prefetch-dependencies.results.CACHI2_ARTIFACT) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: source-build-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.1@sha256:18241f95266a5e4316449f25a600f0f035d32a81c72ecd609a7e886de1843163 + - name: kind + value: task + resolver: bundles + when: + - input: $(tasks.init.results.build) + operator: in + values: + - "true" + - input: $(params.build-source-image) + operator: in + values: + - "true" + - name: sast-unicode-check + params: + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + - name: SOURCE_ARTIFACT + value: $(tasks.prefetch-dependencies.results.SOURCE_ARTIFACT) + - name: CACHI2_ARTIFACT + value: $(tasks.prefetch-dependencies.results.CACHI2_ARTIFACT) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: sast-unicode-check-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.1@sha256:424f2f659c02998dc3a43e1ce869e3148982c59adb74f953f8fa91ff1c9ab86e + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + workspaces: [] + - name: deprecated-base-image-check + params: + - name: IMAGE_URL + value: $(tasks.build-image-index.results.IMAGE_URL) + - name: IMAGE_DIGEST + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: deprecated-image-check + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.4@sha256:241f87f75a6e4303fbd64b32ba1715d76fe3805c48a6c21829e6a564bcc3a576 + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + - name: clair-scan + params: + - name: image-digest + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: clair-scan + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-clair-scan:0.2@sha256:4584647138af3efe5f1c523d0f56103c3b9647325634d17f04e2198a2c3c0c26 + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + - name: ecosystem-cert-preflight-checks + params: + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: ecosystem-cert-preflight-checks + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-ecosystem-cert-preflight-checks:0.1@sha256:df8a25a3431a70544172ed4844f9d0c6229d39130633960729f825a031a7dea9 + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + - name: sast-snyk-check + params: + - name: image-digest + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + - name: SOURCE_ARTIFACT + value: $(tasks.prefetch-dependencies.results.SOURCE_ARTIFACT) + - name: CACHI2_ARTIFACT + value: $(tasks.prefetch-dependencies.results.CACHI2_ARTIFACT) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: sast-snyk-check-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.3@sha256:b15a199b4a732fea1126b06bee28f878cf2d221e6d0f8e780af8230395fb4b19 + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + - name: clamav-scan + params: + - name: image-digest + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: clamav-scan + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.2@sha256:525ad6081d7d38082db057482bd9ecc59c38954656b1a4e33a28de9c19e71006 + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + - name: sast-coverity-check + params: + - name: image-digest + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + - name: SOURCE_ARTIFACT + value: $(tasks.prefetch-dependencies.results.SOURCE_ARTIFACT) + - name: CACHI2_ARTIFACT + value: $(tasks.prefetch-dependencies.results.CACHI2_ARTIFACT) + runAfter: + - coverity-availability-check + taskRef: + params: + - name: name + value: sast-coverity-check-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.1@sha256:87b966c4b2017aa38174180505409b2c5cc7c1c140d9879411dec34a37cfa8be + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + - input: $(tasks.coverity-availability-check.results.STATUS) + operator: in + values: + - success + - name: coverity-availability-check + params: + - name: image-digest + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + - name: SOURCE_ARTIFACT + value: $(tasks.prefetch-dependencies.results.SOURCE_ARTIFACT) + - name: CACHI2_ARTIFACT + value: $(tasks.prefetch-dependencies.results.CACHI2_ARTIFACT) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: coverity-availability-check-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check-oci-ta:0.1@sha256:b4e6d38f0717aa53f3dadee105ba559c2fd76b500a4d21d20fc8b828042ae955 + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + - name: sast-shell-check + params: + - name: image-digest + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + - name: SOURCE_ARTIFACT + value: $(tasks.prefetch-dependencies.results.SOURCE_ARTIFACT) + - name: CACHI2_ARTIFACT + value: $(tasks.prefetch-dependencies.results.CACHI2_ARTIFACT) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: sast-shell-check-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:9b0138a597445f3887697da69c9b8b91368f0b72b98e9304fa209b43523bd6fb + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + - name: apply-tags + params: + - name: IMAGE + value: $(tasks.build-image-index.results.IMAGE_URL) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: apply-tags + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-apply-tags@sha256:fa7aa88ffe01eeeaa07c8720b27e50e27f6f136ef33595efaa16a0eb4598ea02 + - name: kind + value: task + resolver: bundles + - name: push-dockerfile + params: + - name: IMAGE + value: $(tasks.build-image-index.results.IMAGE_URL) + - name: IMAGE_DIGEST + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + - name: DOCKERFILE + value: $(params.dockerfile) + - name: CONTEXT + value: $(params.path-context) + - name: SOURCE_ARTIFACT + value: $(tasks.prefetch-dependencies.results.SOURCE_ARTIFACT) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: push-dockerfile-oci-ta + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta@sha256:fcd9016f1cd5d1085b5e823cdf04a4e77ce80f67d0990af7853e70755aa25d54 + - name: kind + value: task + resolver: bundles + - name: rpms-signature-scan + params: + - name: image-url + value: $(tasks.build-image-index.results.IMAGE_URL) + - name: image-digest + value: $(tasks.build-image-index.results.IMAGE_DIGEST) + runAfter: + - build-image-index + taskRef: + params: + - name: name + value: rpms-signature-scan + - name: bundle + value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:39cd56ffa26ff5edfd5bf9b61e902cae35a345c078cd9dcbc0737d30f3ce5ef1 + - name: kind + value: task + resolver: bundles + when: + - input: $(params.skip-checks) + operator: in + values: + - "false" + workspaces: + - name: git-auth + optional: true + - name: netrc + optional: true + taskRunTemplate: {} + workspaces: + - name: git-auth + secret: + secretName: '{{ git_auth_secret }}' +status: {} diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 7b040cbbba19..2cfacca14c7d 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -2,11 +2,8 @@ ARG BASE_UBI_IMAGE_TAG=9.5-1742914212 ARG PYTHON_VERSION=3.12 -ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX" -ARG vllm_fa_cmake_gpu_arches='80-real;90-real' - ## Base Layer ################################################################## -FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base +FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base ARG PYTHON_VERSION ENV PYTHON_VERSION=${PYTHON_VERSION} RUN microdnf -y update && microdnf install -y --nodocs \ @@ -19,13 +16,14 @@ ENV LANG=C.UTF-8 \ LC_ALL=C.UTF-8 # Some utils for dev purposes - tar required for kubectl cp + RUN microdnf install -y --nodocs \ - which procps findutils tar vim git\ + which procps findutils tar vim git \ && microdnf clean all ## Python Installer ############################################################ -FROM base as python-install +FROM base AS python-install ARG PYTHON_VERSION ENV VIRTUAL_ENV=/opt/vllm @@ -33,11 +31,12 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH" ENV PYTHON_VERSION=${PYTHON_VERSION} RUN microdnf install -y --nodocs \ python${PYTHON_VERSION}-devel && \ - python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all + python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \ + microdnf clean all ## CUDA Base ################################################################### -FROM python-install as cuda-base +FROM python-install AS cuda-base RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \ https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo @@ -51,7 +50,6 @@ RUN microdnf install -y --nodocs \ ln -s ${CUDA_HOME}/lib64/stubs/libcuda.so /usr/lib64/ - ## Python cuda base ################################################################# FROM cuda-base AS python-cuda-base @@ -59,80 +57,23 @@ ENV VIRTUAL_ENV=/opt/vllm ENV PATH="$VIRTUAL_ENV/bin:$PATH" # install cuda and common dependencies -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/uv \ +RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ uv pip install \ -r requirements-cuda.txt -## Development ################################################################# -FROM python-cuda-base AS dev - -# install build and runtime dependencies -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ - --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ - --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \ - --mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \ - --mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \ - uv pip install \ - -r requirements-cuda.txt \ - -r requirements-dev.txt - -## Builder ##################################################################### -FROM dev AS build - -# install build dependencies -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \ - uv pip install -r requirements-build.txt - -# install compiler cache to speed up compilation leveraging local or remote caching -# git is required for the cutlass kernels -RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y --nodocs git ccache && microdnf clean all - -COPY . . - -ARG TORCH_CUDA_ARCH_LIST -ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST -ARG vllm_fa_cmake_gpu_arches -ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} - -# max jobs used by Ninja to build extensions -ARG max_jobs=2 -ENV MAX_JOBS=${max_jobs} -# number of threads used by nvcc -ARG nvcc_threads=8 -ENV NVCC_THREADS=$nvcc_threads -# make sure punica kernels are built (for LoRA) -ENV VLLM_INSTALL_PUNICA_KERNELS=1 - -# Make sure the cuda environment is in the PATH -ENV PATH=/usr/local/cuda/bin:$PATH - -ENV CCACHE_DIR=/root/.cache/ccache -RUN --mount=type=cache,target=/root/.cache/ccache \ - --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,src=.git,target=/workspace/.git \ - env CFLAGS="-march=haswell" \ - CXXFLAGS="$CFLAGS $CXXFLAGS" \ - CMAKE_BUILD_TYPE=Release \ - python3 setup.py bdist_wheel --dist-dir=dist #################### libsodium Build IMAGE #################### -FROM base as libsodium-builder +FROM base AS libsodium-builder RUN microdnf install -y --nodocs gcc gzip \ && microdnf clean all WORKDIR /usr/src/libsodium -ARG LIBSODIUM_VERSION=1.0.20 +ARG LIBSODIUM_VERSION RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \ && tar -xzvf libsodium*.tar.gz \ && rm -f libsodium*.tar.gz \ @@ -156,25 +97,29 @@ ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nv ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/nvtx/lib:${LD_LIBRARY_PATH}" # Triton needs a CC compiler + RUN microdnf install -y --nodocs gcc \ rsync \ && microdnf clean all -# install vllm wheel first, so that torch etc will be installed -RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ - --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/uv \ - uv pip install "$(echo dist/*.whl)[tensorizer]" --verbose # Install libsodium for Tensorizer encryption RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \ - cd /usr/src/libsodium \ - && make install + make -C /usr/src/libsodium install -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/uv \ - uv pip install \ - "https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.0.post2/flashinfer_python-0.2.0.post2+cu124torch2.5-cp312-cp312-linux_x86_64.whl" +COPY LICENSE /licenses/vllm.md +COPY examples/*.jinja /app/data/template/ + +# install vllm by running the payload script and then install flashinfer + +ARG WHEEL_RELEASE +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,src=payload,target=/workspace/payload \ + --mount=type=secret,id=rhel-ai-private-index-auth/BOT_PAT \ + source ${VIRTUAL_ENV}/bin/activate && \ + env BOT_PAT=$(cat /run/secrets/rhel-ai-private-index-auth/BOT_PAT) \ + WHEEL_RELEASE=${WHEEL_RELEASE} \ + ./payload/run.sh ENV HF_HUB_OFFLINE=1 \ HOME=/home/vllm \ @@ -199,10 +144,7 @@ ENV HF_HUB_OFFLINE=1 \ RUN umask 002 && \ useradd --uid 2000 --gid 0 vllm && \ mkdir -p /home/vllm && \ - chmod g+rwx /home/vllm /usr/src /workspace - -COPY LICENSE /licenses/vllm.md -COPY examples/*.jinja /app/data/template/ + chmod g+rwx /home/vllm USER 2000 WORKDIR /home/vllm @@ -210,14 +152,22 @@ WORKDIR /home/vllm ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] -FROM vllm-openai as vllm-grpc-adapter +## TGIS Adapter layer ##################################################################### +FROM vllm-openai AS vllm-grpc-adapter USER root -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ - HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.6.3 +ARG WHEEL_RELEASE +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,src=payload,target=/workspace/payload \ + --mount=type=secret,id=rhel-ai-private-index-auth/BOT_PAT \ + cd /workspace && \ + source ${VIRTUAL_ENV}/bin/activate && \ + env HOME=/root \ + BOT_PAT=$(cat /run/secrets/rhel-ai-private-index-auth/BOT_PAT) \ + WHEEL_RELEASE=${WHEEL_RELEASE} \ + ./payload/run.sh + ENV GRPC_PORT=8033 \ PORT=8000 \ diff --git a/argfiles/argfile.ubi b/argfiles/argfile.ubi new file mode 100644 index 000000000000..3be1e47c388c --- /dev/null +++ b/argfiles/argfile.ubi @@ -0,0 +1,5 @@ +BASE_UBI_IMAGE_TAG=9.5-1739420147 +PYTHON_VERSION=3.11 +LIBSODIUM_VERSION=1.0.20 +WHEEL_RELEASE=2.20.55+vllm-cuda-ubi9-x86_64 +# can view releases at https://gitlab.com/redhat/rhel-ai/rhoai/pipeline/-/releases diff --git a/payload/run.sh b/payload/run.sh new file mode 100755 index 000000000000..e4e88f21ba2e --- /dev/null +++ b/payload/run.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Script assumes python venv is already properly configured +# required env vars: +# $BOT_PAT +# $WHEEL_RELEASE +# $WHEEL_BASEURL +set -eux + +cat < ${HOME}/.netrc +machine gitlab.com +login rhel-ai-wheels-prefetch-token-rhoai +password $BOT_PAT +EOF + +trap "rm -rf ${HOME}/.netrc release release.tar.gz" EXIT + +# WHEEL_RELEASE="2.20.55+vllm-cuda-ubi9-x86_64" + +# Gitlab project ID, etc should be static +WHEEL_RELEASE_ARTIFACTS="https://gitlab.com/api/v4/projects/68045055/packages/generic/vllm-wheels/${WHEEL_RELEASE}/wheels-${WHEEL_RELEASE}.tar.gz" + + +# NOTE - ensure that flashinfer is included in wheel thing + +curl --netrc -o release.tar.gz ${WHEEL_RELEASE_ARTIFACTS} +tar zxvf release.tar.gz +./release/install_wheels.sh +