ROCm
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 2 additions & 12 deletions b/‎.ci/docker/build.sh‎
Lines changed: 2 additions & 12 deletions
diff --git a/‎.ci/docker/ci_commit_pins/triton-xpu.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/triton-xpu.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_xpu.sh‎
Lines changed: 7 additions & 8 deletions b/‎.ci/docker/common/install_xpu.sh‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎.ci/docker/requirements-ci.txt‎
Lines changed: 3 additions & 0 deletions b/‎.ci/docker/requirements-ci.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py‎
Lines changed: 0 additions & 1 deletion b/‎.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.ci/pytorch/test.sh‎
Lines changed: 4 additions & 2 deletions b/‎.ci/pytorch/test.sh‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.circleci/scripts/binary_linux_test.sh‎
Lines changed: 12 additions & 18 deletions b/‎.circleci/scripts/binary_linux_test.sh‎
Lines changed: 12 additions & 18 deletions
diff --git a/‎.github/ci_commit_pins/audio.txt‎
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/audio.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ci_commit_pins/vision.txt‎
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/vision.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ci_configs/vllm/Dockerfile‎
Lines changed: 5 additions & 30 deletions b/‎.github/ci_configs/vllm/Dockerfile‎
Lines changed: 5 additions & 30 deletions
@@ -125,10 +125,10 @@ case "$tag" in
     UCC_COMMIT=${_UCC_COMMIT}
     TRITON=yes
     ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
+  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks)
     CUDA_VERSION=12.8.1
     ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
+    GCC_VERSION=11
     VISION=yes
     KATEX=yes
     UCX_COMMIT=${_UCX_COMMIT}
@@ -146,16 +146,6 @@ case "$tag" in
     UCC_COMMIT=${_UCC_COMMIT}
     TRITON=yes
     ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.8.1
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    TRITON=yes
-    ;;
   pytorch-linux-jammy-py3-clang12-onnx)
     ANACONDA_PYTHON_VERSION=3.10
     CLANG_VERSION=12
 
@@ -1 +1 @@
-1b0418a9a454b2b93ab8d71f40e59d2297157fae
+aa01f5c2cd4db2b7bfa53ea98a1a8dfbd6d77c92
@@ -64,14 +64,13 @@ function install_ubuntu() {
 
 function install_rhel() {
     . /etc/os-release
-    if [[ "${ID}" == "rhel" ]]; then
-        if [[ ! " 8.8 8.10 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
-            echo "RHEL version ${VERSION_ID} not supported"
-            exit
-        fi
-    elif [[ "${ID}" == "almalinux" ]]; then
-        # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64
-        VERSION_ID="8.8"
+    if [[ ! " 8.8 8.10 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
+        echo "RHEL version ${VERSION_ID} not supported"
+        exit
+    fi
+    # Using testing channel for CD build
+    if [[ "${ID}" == "almalinux" ]]; then
+        XPU_DRIVER_VERSION="/testing"
     fi
 
     dnf install -y 'dnf-command(config-manager)'
 
@@ -397,3 +397,6 @@ scikit-build==0.18.1
 pyre-extensions==0.0.32
 tabulate==0.9.0
 #Description: These package are needed to build FBGEMM and torchrec on PyTorch CI
+
+Jinja2==3.1.6
+#Description: required for torch.distributed.debug
@@ -84,7 +84,6 @@ def __init__(self, args: Any):
         self.VLLM_TEST_WHLS_REGEX = [
             "xformers/*.whl",
             "vllm/vllm*.whl",
-            "flashinfer-python/flashinfer*.whl",
         ]
 
     def prepare(self):
 
@@ -1763,12 +1763,14 @@ test_operator_microbenchmark() {
   mkdir -p "$TEST_REPORTS_DIR"
   TEST_DIR=$(pwd)
 
+  test_inductor_set_cpu_affinity
+
   cd benchmarks/operator_benchmark/pt_extension
-  python -m pip install .
+  python -m pip install . -v --no-build-isolation
 
   cd "${TEST_DIR}"/benchmarks/operator_benchmark
 
-  for OP_BENCHMARK_TESTS in matmul mm addmm bmm conv; do
+  for OP_BENCHMARK_TESTS in matmul mm addmm bmm conv optimizer; do
     $TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
       --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
       --benchmark-name "PyTorch operator microbenchmark" --use-compile
 
@@ -31,23 +31,6 @@ if [[ "$PACKAGE_TYPE" != libtorch ]]; then
   export PATH="\${python_path}/bin:\$PATH"
 fi
 
-EXTRA_CONDA_FLAGS=""
-NUMPY_PIN=""
-PROTOBUF_PACKAGE="defaults::protobuf"
-
-if [[ "\$python_nodot" = *310* ]]; then
-  # There's an issue with conda channel priority where it'll randomly pick 1.19 over 1.20
-  # we set a lower boundary here just to be safe
-  NUMPY_PIN=">=1.21.2"
-  PROTOBUF_PACKAGE="protobuf>=3.19.0"
-fi
-
-if [[ "\$python_nodot" = *39* ]]; then
-  # There's an issue with conda channel priority where it'll randomly pick 1.19 over 1.20
-  # we set a lower boundary here just to be safe
-  NUMPY_PIN=">=1.20"
-fi
-
 # Move debug wheels out of the package dir so they don't get installed
 mkdir -p /tmp/debug_final_pkgs
 mv /final_pkgs/debug-*.zip /tmp/debug_final_pkgs || echo "no debug packages to move"
@@ -66,12 +49,23 @@ fi
 if [[ "$PACKAGE_TYPE" != libtorch ]]; then
   if [[ "\$BUILD_ENVIRONMENT" != *s390x* ]]; then
     pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
-    retry pip install -q numpy protobuf typing-extensions
+
+    # numpy tests:
+    # We test 1 version no numpy. 1 version with numpy 1.x and rest with numpy 2.x
+    if [[ "\$python_nodot" = *311* ]]; then
+      retry pip install -q numpy==1.23.5 protobuf typing-extensions
+    elif [[ "\$python_nodot" = *312* ]]; then
+      retry pip install -q protobuf typing-extensions
+    else
+      retry pip install -q numpy protobuf typing-extensions
+    fi
+
   else
     pip install "\$pkg"
     retry pip install -q numpy protobuf typing-extensions
   fi
 fi
+
 if [[ "$PACKAGE_TYPE" == libtorch ]]; then
   pkg="\$(ls /final_pkgs/*-latest.zip)"
   unzip "\$pkg" -d /tmp
 
@@ -1 +1 @@
-ee1a1350eb37804b94334768f328144f058f14e9
+32ce8c011855adb15438ddc9bf6c139d23f8cee5
@@ -1 +1 @@
-2d82dc5caa336d179d9b46ac4a0fb8c43d84c5cc
+617079d944b0e72632311c30ae2bbdf1168b901e
@@ -1,4 +1,4 @@
-ARG CUDA_VERSION=12.8.1
+ARG CUDA_VERSION=12.9.1
 ARG PYTHON_VERSION=3.12
 
 # BUILD_BASE_IMAGE: used to setup python build xformers, and vllm wheels, It can be replaced with a different base image from local machine,
@@ -124,7 +124,7 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
     git clone https://github.com/facebookresearch/xformers.git
 
     pushd xformers
-    git checkout v0.0.32.post2
+    git checkout v0.0.33.post1
     git submodule update --init --recursive
     python3 setup.py bdist_wheel --dist-dir=../xformers-dist --verbose
     popd
@@ -256,7 +256,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-# Install build and runtime dependencies, this is needed for flashinfer install
+# Install build and runtime dependencies
 COPY requirements/build.txt requirements/build.txt
 COPY use_existing_torch.py use_existing_torch.py
 RUN python3 use_existing_torch.py
@@ -294,33 +294,9 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system /wheels/xformers/*.whl --verbose
 
-# Build FlashInfer from source
-ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0'
-ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
-
-# TODO(elainewy): remove this once vllm commit is updated, and install flashinfer from pip
-# see https://github.com/pytorch/pytorch/pull/165274#issuecomment-3408531784
-ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
-ARG FLASHINFER_GIT_REF="v0.2.14.post1"
-
-RUN --mount=type=cache,target=/root/.cache/uv \
-    git clone --depth 1 --recursive --shallow-submodules \
-        --branch ${FLASHINFER_GIT_REF} \
-        ${FLASHINFER_GIT_REPO} flashinfer \
-    && echo "Building FlashInfer with AOT for arches: ${torch_cuda_arch_list}" \
-    && cd flashinfer \
-    && python3 -m flashinfer.aot \
-    && python3 -m build --no-isolation --wheel --outdir ../wheels/flashinfer \
-    && cd .. \
-    && rm -rf flashinfer
-
-# Install FlashInfer
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system wheels/flashinfer/*.whl --verbose
-
 # Logging to confirm the torch versions
-RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
-RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm\|^flashinfer' > build_summary.txt
+RUN pip freeze | grep -E 'torch|xformers|vllm'
+RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm' > build_summary.txt
 ################### VLLM INSTALLED IMAGE ####################
 
 
@@ -331,4 +307,3 @@ FROM scratch as export-wheels
 COPY --from=base /workspace/xformers-dist /wheels/xformers
 COPY --from=build /workspace/vllm-dist /wheels/vllm
 COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt
-COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-1b0418a9a454b2b93ab8d71f40e59d2297157fae`
	`1`	`+aa01f5c2cd4db2b7bfa53ea98a1a8dfbd6d77c92`
Original file line number	Diff line number	Diff line change
`@@ -84,7 +84,6 @@ def __init__(self, args: Any):`
`84`	`84`	`self.VLLM_TEST_WHLS_REGEX = [`
`85`	`85`	`"xformers/*.whl",`
`86`	`86`	`"vllm/vllm*.whl",`
`87`		`- "flashinfer-python/flashinfer*.whl",`
`88`	`87`	`]`
`89`	`88`
`90`	`89`	`def prepare(self):`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-ee1a1350eb37804b94334768f328144f058f14e9`
	`1`	`+32ce8c011855adb15438ddc9bf6c139d23f8cee5`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-2d82dc5caa336d179d9b46ac4a0fb8c43d84c5cc`
	`1`	`+617079d944b0e72632311c30ae2bbdf1168b901e`