From d3f32fa0d235a0e6f249fc707098eae4d9506df8 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 16 Jul 2025 09:02:26 +0800
Subject: [PATCH 001/160] modify build

---
 .github/scripts/build.sh           | 64 +++++++++++++++--------------
 .github/scripts/env.sh             | 12 +++---
 .github/workflows/_linux_build.yml | 65 +++++++++++++-----------------
 3 files changed, 67 insertions(+), 74 deletions(-)
diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
index 9dcd170aa1..f10f095934 100755
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -1,18 +1,18 @@
 #!/bin/bash
 # Usage:
 #   ./build.sh --WORKSPACE=<path/to/dir> \
-#       --PYTORCH_REPO=<pytorch repo url> --PYTORCH_VERSION=<pytorch branch or commit> \
+#       --PYTORCH_REPO=<pytorch repo url> --PYTORCH_COMMIT=<pytorch branch or commit> \
 #       --TORCH_XPU_OPS_REPO=<torch-xpu-ops repo url> \
-#       --TORCH_XPU_OPS_VERSION=<torch-xpu-ops branch, commit or pinned(use pytorch pinned commit)>
+#       --TORCH_XPU_OPS_COMMIT=<torch-xpu-ops branch, commit or pinned(use pytorch pinned commit)>
 set -xe
 export GIT_PAGER=cat
 
 # Init params
 WORKSPACE=$(realpath ${WORKSPACE:-"/tmp"})
 PYTORCH_REPO=${PYTORCH_REPO:-"https://github.com/pytorch/pytorch.git"}
-PYTORCH_VERSION=${PYTORCH_VERSION:-"main"}
+PYTORCH_COMMIT=${PYTORCH_COMMIT:-"main"}
 TORCH_XPU_OPS_REPO=${TORCH_XPU_OPS_REPO:-"https://github.com/intel/torch-xpu-ops.git"}
-TORCH_XPU_OPS_VERSION=${TORCH_XPU_OPS_VERSION:-"main"}
+TORCH_XPU_OPS_COMMIT=${TORCH_XPU_OPS_COMMIT:-"main"}
 for var; do
     eval "export $(echo ${var@Q} |sed "s/^'-*//g;s/=/='/")"
 done
@@ -21,20 +21,20 @@ done
 rm -rf ${WORKSPACE}/pytorch
 git clone ${PYTORCH_REPO} ${WORKSPACE}/pytorch
 cd ${WORKSPACE}/pytorch
-git checkout ${PYTORCH_VERSION}
+git checkout ${PYTORCH_COMMIT}
 git remote -v && git branch && git show -s
 git rev-parse HEAD > ${WORKSPACE}/pytorch.commit
 
 # Set torch-xpu-ops
-if [ "${TORCH_XPU_OPS_VERSION,,}" == "pinned" ];then
+if [ "${TORCH_XPU_OPS_COMMIT,,}" == "pinned" ];then
     TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
-    TORCH_XPU_OPS_VERSION="$(cat ${WORKSPACE}/pytorch/third_party/xpu.txt)"
+    TORCH_XPU_OPS_COMMIT="$(cat ${WORKSPACE}/pytorch/third_party/xpu.txt)"
 fi
-if [ "${TORCH_XPU_OPS_VERSION,,}" != "cicd" ];then
+if [ "${TORCH_XPU_OPS_COMMIT,,}" != "cicd" ];then
     rm -rf ${WORKSPACE}/torch-xpu-ops
     git clone ${TORCH_XPU_OPS_REPO} ${WORKSPACE}/torch-xpu-ops
     cd ${WORKSPACE}/torch-xpu-ops
-    git checkout ${TORCH_XPU_OPS_VERSION}
+    git checkout ${TORCH_XPU_OPS_COMMIT}
 fi
 cd ${WORKSPACE}/torch-xpu-ops
 git remote -v && git branch && git show -s
@@ -51,28 +51,30 @@ python -m pip install -r requirements.txt
 python -m pip install mkl-static mkl-include
 export USE_STATIC_MKL=1
 export USE_XCCL=1
-export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
-    intel-cmplr-lib-rt==2025.1.1 | \
-    intel-cmplr-lib-ur==2025.1.1 | \
-    intel-cmplr-lic-rt==2025.1.1 | \
-    intel-sycl-rt==2025.1.1 | \
-    oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
-    oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
-    impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | \
-    onemkl-sycl-blas==2025.1.0 | \
-    onemkl-sycl-dft==2025.1.0 | \
-    onemkl-sycl-lapack==2025.1.0 | \
-    onemkl-sycl-rng==2025.1.0 | \
-    onemkl-sycl-sparse==2025.1.0 | \
-    dpcpp-cpp-rt==2025.1.1 | \
-    intel-opencl-rt==2025.1.1 | \
-    mkl==2025.1.0 | \
-    intel-openmp==2025.1.1 | \
-    tbb==2022.1.0 | \
-    tcmlib==1.3.0 | \
-    umf==0.10.0 | \
-    intel-pti==0.12.3
-"
+if [ "${XPU_ONEAPI_PATH}" != "" ];then
+    export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
+        intel-cmplr-lib-rt==2025.1.1 | \
+        intel-cmplr-lib-ur==2025.1.1 | \
+        intel-cmplr-lic-rt==2025.1.1 | \
+        intel-sycl-rt==2025.1.1 | \
+        oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+        oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+        impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+        onemkl-sycl-blas==2025.1.0 | \
+        onemkl-sycl-dft==2025.1.0 | \
+        onemkl-sycl-lapack==2025.1.0 | \
+        onemkl-sycl-rng==2025.1.0 | \
+        onemkl-sycl-sparse==2025.1.0 | \
+        dpcpp-cpp-rt==2025.1.1 | \
+        intel-opencl-rt==2025.1.1 | \
+        mkl==2025.1.0 | \
+        intel-openmp==2025.1.1 | \
+        tbb==2022.1.0 | \
+        tcmlib==1.3.0 | \
+        umf==0.10.0 | \
+        intel-pti==0.12.3
+    "
+fi
 
 # Build
 sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh
index 831864d6d4..d0f7cfd338 100644
--- a/.github/scripts/env.sh
+++ b/.github/scripts/env.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
 
-source /opt/intel/oneapi/compiler/latest/env/vars.sh
-source /opt/intel/oneapi/pti/latest/env/vars.sh
-source /opt/intel/oneapi/umf/latest/env/vars.sh
-source /opt/intel/oneapi/ccl/latest/env/vars.sh
-source /opt/intel/oneapi/mpi/latest/env/vars.sh
+XPU_ONEAPI_PATH="${XPU_ONEAPI_PATH:-"/opt/intel/oneapi"}"
+
+source ${XPU_ONEAPI_PATH}/compiler/latest/env/vars.sh
+source ${XPU_ONEAPI_PATH}/pti/latest/env/vars.sh
+source ${XPU_ONEAPI_PATH}/umf/latest/env/vars.sh
+source ${XPU_ONEAPI_PATH}/ccl/latest/env/vars.sh
+source ${XPU_ONEAPI_PATH}/mpi/latest/env/vars.sh
 icpx --version
 sycl-ls
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 60dd2c49b6..89f29ac34f 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -4,34 +4,26 @@ on:
   workflow_call:
     inputs:
       pytorch:
-        required: true
         type: string
         default: 'main'
-        description: Pytorch branch/commit
-      keep_torch_xpu_ops:
-        required: false
-        type: string
-        default: 'false'
-        description: Keep torch-xpu-ops pin. `true` means use pined commit
-      driver:
-        required: false
+        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      torch_xpu_ops:
         type: string
-        default: 'lts'
-        description: Driver lts/rolling
+        default: 'main'
+        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
       python:
-        required: false
         type: string
         default: '3.10'
         description: Python version
+      oneapi:
+        type: string
+        default: 'host'
+        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
       runner:
         required: true
         type: string
-        default: 'linux.idc.xpu'
+        default: 'pvc_rolling'
         description: Runner label
-    outputs:
-      torch_commit_id:
-        description: The commit id of the torch build
-        value: ${{ jobs.build.outputs.TORCH_COMMIT_ID }}
 
 permissions: read-all
 
@@ -44,18 +36,13 @@ jobs:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
         PATH: /opt/xpu-build/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-        commit_issue: 1280
         GH_TOKEN: ${{ github.token }}
-        NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-        DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-    outputs:
-      TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }}
     timeout-minutes: 300
     steps:
       - name: Setup based env
         run: |
           # Cleanup workspace
-          rm -rf ${{ github.workspace }}/*
+          rm -rf ./*
           # Install gh
           dnf install 'dnf-command(config-manager)'
           dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
@@ -73,35 +60,37 @@ jobs:
       - name: Build Pytorch XPU
         run: |
           set -xe -o pipefail
-          if [ "${{ inputs.driver }}" == "lts" ]; then
-              export TORCH_XPU_ARCH_LIST='pvc'
-          fi
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-            PYTORCH_VERSION="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
+            PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
           else
             PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-            PYTORCH_VERSION="${{ inputs.pytorch }}"
+            PYTORCH_COMMIT="${{ inputs.pytorch }}"
           fi
-          if [[ "${{ inputs.keep_torch_xpu_ops }}" == *"https://"* ]];then
-            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.keep_torch_xpu_ops }} |sed 's/@.*//')"
-            TORCH_XPU_OPS_VERSION="$(echo ${{ inputs.keep_torch_xpu_ops }} |sed 's/.*@//')"
-          elif [ "${{ inputs.keep_torch_xpu_ops }}" == "true" ];then
-            TORCH_XPU_OPS_VERSION="pinned"
+          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
+            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
+            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
           else
-            TORCH_XPU_OPS_VERSION="cicd"
+            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
+            TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
           # oneAPI DLE
+          if [ "${{ inputs.oneapi }}" != "host" ];then
+            rm -rf ~/intel ~/.intel /opt/intel
+            wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+            bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+            export XPU_ONEAPI_PATH="${HOME}/intel/oneapi"
+          fi
           source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
           # gcc 11
           source /opt/rh/gcc-toolset-11/enable
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
             --WORKSPACE="${{ github.workspace }}" \
             --PYTORCH_REPO="${PYTORCH_REPO}" \
-            --PYTORCH_VERSION="${PYTORCH_VERSION}" \
+            --PYTORCH_COMMIT="${PYTORCH_COMMIT}" \
             --TORCH_XPU_OPS_REPO="${TORCH_XPU_OPS_REPO}" \
-            --TORCH_XPU_OPS_VERSION="${TORCH_XPU_OPS_VERSION}" \
-            2>&1 |tee ${{ github.workspace }}/pytorch_build_${PYTORCH_VERSION//\//-}.log
+            --TORCH_XPU_OPS_COMMIT="${TORCH_XPU_OPS_COMMIT}" \
+            2>&1 |tee ${{ github.workspace }}/pytorch_build_${PYTORCH_COMMIT//\//-}.log
       - name: Torch Config
         run: |
           python -c "import torch; print(torch.__config__.show())"
@@ -128,4 +117,4 @@ jobs:
         if: always()
         run: |
           chmod 777 . -R
-          rm -rf pytorch torch-xpu-ops pytorch_*.log torch*.whl
+          rm -rf ./*

From bdc58d75c4854b480755e583523f04955c59eb46 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 16 Jul 2025 09:03:36 +0800
Subject: [PATCH 002/160] modify ut

---
 .github/workflows/_linux_ut.yml | 39 ++++++++++++++-------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index d43fe6b809..82ef33df1a 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -7,17 +7,26 @@ on:
         required: false
         type: string
         default: 'main'
-        description: Pytorch branch/commit
-      keep_torch_xpu_ops:
+        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      torch_xpu_ops:
         required: false
         type: string
-        default: 'false'
-        description: Keep torch-xpu-ops pin. `true` means use pined commit
+        default: 'main'
+        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
       triton:
         required: false
         type: string
-        default: ''
-        description: Triton commit. Use pytorch pined commit by default
+        default: 'pinned'
+        description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      oneapi:
+        type: string
+        default: 'installed'
+        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+      python:
+        required: false
+        type: string
+        default: '3.10'
+        description: Python version
       ut:
         required: true
         type: string
@@ -28,21 +37,11 @@ on:
         type: string
         default: ''
         description: List disabled tests, such as disable_ut or disable_distributed
-      python:
-        required: false
-        type: string
-        default: '3.10'
-        description: Python version
       runner:
         required: true
         type: string
         default: 'linux.idc.xpu'
         description: Runner label
-      driver:
-        required: false
-        type: string
-        default: 'lts'
-        description: Driver lts/rolling
 
 permissions: read-all
 
@@ -53,9 +52,6 @@ jobs:
     timeout-minutes: 300
     env:
       GH_TOKEN: ${{ github.token }}
-      NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-      DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-      ut_skip_issue: 1624
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
@@ -263,7 +259,7 @@ jobs:
           eval $test_cmd 2>${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test_error.log | \
             tee ${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test.log
       - name: Run Torch XPU Profile UT
-        if: ${{ inputs.driver == 'rolling' && contains(inputs.ut, 'xpu_profiling') }} 
+        if: ${{ contains(inputs.ut, 'xpu_profiling') }}
         run: |
           source activate xpu_op_${ZE_AFFINITY_MASK}
           mkdir -p ${{ github.workspace }}/ut_log/profile_test
@@ -379,9 +375,6 @@ jobs:
     timeout-minutes: 60
     env:
       GH_TOKEN: ${{ github.token }}
-      NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-      DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-      ut_skip_issue: 1624
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4

From b07b490c5ec579959524150975f07e6b719c9f0e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 16 Jul 2025 09:03:46 +0800
Subject: [PATCH 003/160] modify e2e

---
 .github/workflows/nightly_ondemand.yml | 247 +++++++++++--------------
 1 file changed, 112 insertions(+), 135 deletions(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 25c3af0245..48b6521829 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -3,31 +3,42 @@ name: Nightly-OnDemand Tests
 on:
   schedule:
     # GMT+8 21:00 every workday
-    - cron: '0 13 * * 0-4'
-    # GMT+8 0:00 Saturday
-    - cron: '0 16 * * 5'
+    - cron: '10 13 * * 0-4' # build from source
+    - cron: '30 13 * * 0-4' # nightly wheel
+    # GMT+8 00:00 Saturday
+    - cron: '10 16 * * 5' # build from source
+    - cron: '30 16 * * 5' # nightly wheel
   workflow_dispatch:
     inputs:
       pytorch:
         required: false
         type: string
         default: 'main'
-        description: Pytorch branch/commit
-      keep_torch_xpu_ops:
+        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      torch_xpu_ops:
         required: false
         type: string
-        default: 'false'
-        description: Keep torch-xpu-ops pin. `true` means use pined commit
-      ut:
+        default: 'main'
+        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
+      triton:
         required: false
         type: string
-        default: 'torch_xpu'
-        description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,torch_xpu,xpu_profiling`. Delimiter is comma
-      triton:
+        default: 'pinned'
+        description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      oneapi:
+        type: string
+        default: 'installed'
+        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+      python:
         required: false
         type: string
-        default: ''
-        description: Triton commit. Use pytorch pined commit by default
+        default: '3.10'
+        description: Python version
+      ut:
+        required: false
+        type: string
+        default: 'op_regression'
+        description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_profiling,xpu_distributed`. Delimiter is comma
       suite:
         required: true
         type: string
@@ -53,75 +64,120 @@ on:
         type: string
         default: ''
         description: Model. Will only run this one mode if set
-      python:
-        required: false
-        type: string
-        default: '3.10'
-        description: Python version
 
 permissions: read-all
 
-concurrency:
-  group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }}
-  cancel-in-progress: ${{ github.event_name != 'schedule' }}
-
 jobs:
-  Linux-Nightly-Ondemand-Build:
+  Conditions-Filter:
+    name: conditions-filter
     if: ${{ github.repository_owner == 'intel' }}
-    name: linux-nightly-ondemand
+    runs-on: ubuntu-latest
+    timeout-minutes: 3
+    outputs:
+      test_type: ${{ steps.inputs-check.outputs.test_type }}
+      pytorch: ${{ steps.inputs-check.outputs.pytorch }}
+      torch_xpu_ops: ${{ steps.inputs-check.outputs.torch_xpu_ops }}
+    steps:
+      - name: Inputs check
+        id: inputs-check
+        run: |
+          if [ "${{ github.event_name }}" == "schedule" ];then
+            if [ "${{ github.event.schedule }}" == "10 13 * * 0-4" ];then
+              test_type="build-nightly"
+              pytorch="main"
+              torch_xpu_ops="main"
+            elif [ "${{ github.event.schedule }}" == "30 13 * * 0-4" ];then
+              test_type="wheel-nightly"
+              pytorch="nightly_wheel"
+              torch_xpu_ops="pinned"
+            elif [ "${{ github.event.schedule }}" == "10 16 * * 5" ];then
+              test_type="build-weekly"
+              pytorch="main"
+              torch_xpu_ops="main"
+            elif [ "${{ github.event.schedule }}" == "30 16 * * 5" ];then
+              test_type="wheel-weekly"
+              pytorch="nightly_wheel"
+              torch_xpu_ops="pinned"
+            else
+              test_type="unknown"
+              pytorch="main"
+              torch_xpu_ops="main"
+            fi
+          else
+            if [ "${{ inputs.pytorch }}" == "nightly_wheel" ] || [ "${{ inputs.pytorch }}" == "release_wheel" ];then
+              test_type="wheel-ondemand"
+              pytorch="${{ inputs.pytorch }}"
+              torch_xpu_ops="pinned"
+            else
+              test_type="build-ondemand"
+              pytorch="${{ inputs.pytorch }}"
+              torch_xpu_ops="${{ inputs.torch_xpu_ops }}"
+            fi
+          fi
+          echo "test_type=${test_type}" >> ${GITHUB_OUTPUT}
+          echo "pytorch=${pytorch}" >> ${GITHUB_OUTPUT}
+          echo "torch_xpu_ops=${torch_xpu_ops}" >> ${GITHUB_OUTPUT}
+
+  Linux-Nightly-Ondemand-Build:
+    if: ${{ needs.Conditions-Filter.outputs.pytorch != 'nightly_wheel' && needs.Conditions-Filter.outputs.pytorch != 'release_wheel' }}
+    needs: [Conditions-Filter]
+    name: linux
     secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
-      pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
-      keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
+      pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
+      torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
+      oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      runner: pvc_e2e
+      runner: pvc_rolling
 
   Linux-Nightly-Ondemand-UT-Tests:
-    if: ${{ github.event_name == 'schedule' || inputs.ut != '' }}
-    name: linux-nightly-ondemand
-    needs: Linux-Nightly-Ondemand-Build
+    if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'p') }}
+    name: linux
+    needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
-      keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
-      ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
-      pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }}
+      pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
+      torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
+      triton: ${{ github.event_name == 'schedule' && 'pinned' || inputs.triton }}
+      oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
+      ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
       runner: linux.idc.xpu
 
   Linux-Nightly-Ondemand-E2E-Tests:
-    runs-on: pvc_e2e
-    name: linux-nightly-ondemand / e2e_test
-    needs: Linux-Nightly-Ondemand-Build
+    runs-on: pvc_rolling
+    name: linux / e2e_test
+    needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     timeout-minutes: 3600
     permissions:
       issues: write
+    container:
+      image: 'xpu:test'
+      volumes:
+        - ${{ github.workspace }}:${{ github.workspace }}
+      options: --device=/dev/mem --device=/dev/dri --privileged --shm-size=8g
     env:
+      AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
       GH_TOKEN: ${{ github.token }}
       reference_issue: 1645
-      pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }}
-      keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
+      pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
+      torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
+      triton: ${{ github.event_name == 'schedule' && 'pinned' || inputs.triton }}
+      oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      run_type: ${{ (github.event_name == 'schedule' && (github.event.schedule == '0 16 * * 5' && 'weekly' || 'nightly')) || 'on-demand' }}
-    outputs:
-      TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }}
-      TORCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCH_COMMIT_ID }}
-      DRIVER_VERSION: ${{ steps.pinned.outputs.DRIVER_VERSION }}
-      KERNEL_VERSION: ${{ steps.pinned.outputs.KERNEL_VERSION }}
-      BUNDLE_VERSION: ${{ steps.pinned.outputs.BUNDLE_VERSION }}
-      OS_PRETTY_NAME: ${{ steps.pinned.outputs.OS_PRETTY_NAME }}
-      GCC_VERSION: ${{ steps.pinned.outputs.GCC_VERSION }}
-      TORCHBENCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCHBENCH_COMMIT_ID }}
-      TORCHVISION_COMMIT_ID: ${{ steps.pinned.outputs.TORCHVISION_COMMIT_ID }}
-      TORCHAUDIO_COMMIT_ID: ${{ steps.pinned.outputs.TORCHAUDIO_COMMIT_ID }}
-      TRANSFORMERS_VERSION: ${{ steps.pinned.outputs.TRANSFORMERS_VERSION }}
-      TIMM_COMMIT_ID: ${{ steps.pinned.outputs.TIMM_COMMIT_ID }}
-      TRITON_COMMIT_ID: ${{ steps.pinned.outputs.TRITON_COMMIT_ID }}
-      TIMEOUT_MODELS: ${{ steps.summary.outputs.TIMEOUT_MODELS }}
     steps:
+      - name: Cleanup workspace
+        run: |
+          rm -rf ./* || sudo rm -rf ./*
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
+        with:
+          path: torch-xpu-ops
+      - name: Setup python ${{ env.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.python }}
       - name: Prepare Conda ENV
         run: |
           which conda && conda clean -ay
@@ -357,88 +413,9 @@ jobs:
     name: Windows-nightly-ondemand
     uses: ./.github/workflows/_windows_ut.yml
     with:
-      keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
+      torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
       ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}
       python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
       src_changed: false
       has_label: true
       runner: Windows_CI
-
-  Tests-Failure-And-Report:
-    if: ${{ ! cancelled() }}
-    runs-on: [ self-hosted, Linux ]
-    permissions:
-      issues: write
-    env:
-      GH_TOKEN: ${{ github.token }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-    needs: Linux-Nightly-Ondemand-E2E-Tests
-    steps:
-      - name: Report github issue for XPU OPS nightly
-        if: github.repository_owner == 'intel'
-        run: |
-          set -xe
-          # Test env
-          build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-          repo="${{ github.repository }}"
-          TORCH_BRANCH_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCH_BRANCH_ID }}"
-          TORCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCH_COMMIT_ID }}"
-          DRIVER_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.DRIVER_VERSION }}"
-          KERNEL_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.KERNEL_VERSION }}"
-          BUNDLE_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.BUNDLE_VERSION }}"
-          OS_PRETTY_NAME="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.OS_PRETTY_NAME }}"
-          GCC_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.GCC_VERSION }}"
-          TORCHBENCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCHBENCH_COMMIT_ID }}"
-          TORCHVISION_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCHVISION_COMMIT_ID }}"
-          TORCHAUDIO_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCHAUDIO_COMMIT_ID }}"
-          TRANSFORMERS_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TRANSFORMERS_VERSION }}"
-          TIMM_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TIMM_COMMIT_ID }}"
-          TRITON_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TRITON_COMMIT_ID }}"
-          TIMEOUT_MODELS="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TIMEOUT_MODELS }}"
-          # Test status
-          if [ "${{ needs.Linux-Nightly-Ondemand-E2E-Tests.result }}" == "success" ];then
-            test_status=Success
-          elif [ "${{ needs.Linux-Nightly-Ondemand-E2E-Tests.result }}" == "failure" ];then
-            test_status=Failure
-            cc_comment="CC ${{ secrets.NIGHTLY_EMAIL_LIST }}"
-          else
-            test_status=None
-            exit 0
-          fi
-          # Test Type
-          if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
-            test_type="On-demand"
-            test_issue_id=426
-            cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}"
-          elif [ "${{ github.event.schedule }}" == "0 16 * * 5" ];then
-            test_type="Weekly"
-            test_issue_id=432
-          else
-            test_type="Nightly"
-            test_issue_id=432
-          fi
-          # Test report
-          echo -e "**${test_status}** $test_type Test on $(date +'%F'), See: $build_url\n" > ${{ github.workspace }}/report.txt
-          printf "Torch-xpu-ops | PyTorch | Triton\n--- | --- | ---\n${GITHUB_WORKFLOW_SHA:0:7} on ${GITHUB_REF_NAME} | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCH_COMMIT_ID:0:7}](https://github.com/pytorch/pytorch/commit/${TORCH_COMMIT_ID:0:7}) on $TORCH_BRANCH_ID | " >> ${{ github.workspace }}/report.txt
-          echo -e "[${TRITON_COMMIT_ID:0:7}](https://github.com/intel/intel-xpu-backend-for-triton/commit/${TRITON_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt
-          printf "Transformers | Timm | Torchbench | Torchvision | Torchaudio\n--- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt
-          printf "[${TRANSFORMERS_VERSION:0:7}](https://github.com/huggingface/transformers/commit/${TRANSFORMERS_VERSION:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TIMM_COMMIT_ID:0:7}](https://github.com/huggingface/pytorch-image-models/commit/${TIMM_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCHBENCH_COMMIT_ID:0:7}](https://github.com/pytorch/benchmark/commit/${TORCHBENCH_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCHVISION_COMMIT_ID:0:7}](https://github.com/pytorch/vision/commit/${TORCHVISION_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          echo -e "[${TORCHAUDIO_COMMIT_ID:0:7}](https://github.com/pytorch/audio/commit/${TORCHAUDIO_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt
-          printf "Device | OS | GCC | Python | Driver(DKMS) | Kernel | Bundle(DPCPP)\n--- | --- | --- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt
-          echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION | $KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt
-          if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
-            test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}"
-            if [ "${{ inputs.model }}" != "" ];then
-              test_scope+="; model=${{ inputs.model }}"
-            fi
-            echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt
-          fi
-          echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt
-          echo "$cc_comment" >> ${{ github.workspace }}/report.txt
-          # Report
-          report_txt=$(cat ${{ github.workspace }}/report.txt)
-          gh --repo $repo issue comment $test_issue_id --body "$report_txt"

From 7b4582bc48ea9222c1ef416ecdfefa362d46fc06 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 16 Jul 2025 17:06:50 +0800
Subject: [PATCH 004/160] update

---
 .../actions/inductor-xpu-e2e-test/action.yml  |  53 ++---
 .github/workflows/_linux_build.yml            |  66 ++++--
 .github/workflows/_linux_ut.yml               |  47 +----
 .github/workflows/nightly_ondemand.yml        | 197 +++++++++---------
 4 files changed, 167 insertions(+), 196 deletions(-)

diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml
index 1631f399f2..8f9b90780f 100644
--- a/.github/actions/inductor-xpu-e2e-test/action.yml
+++ b/.github/actions/inductor-xpu-e2e-test/action.yml
@@ -29,9 +29,6 @@ inputs:
     type: string
     default: 'all'
     description: which cards can be used in the test
-  hf_token:
-    required: false
-    description: HUGGING_FACE_HUB_TOKEN for torchbench test
   pytorch:
     required: false
     type: string
@@ -50,23 +47,16 @@ runs:
       if: ${{ inputs.env_prepare }}
       shell: bash
       run: |
-        source activate e2e_ci
         if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            cd ../ && rm -rf audio && git clone --single-branch -b main https://github.com/pytorch/audio.git
-            cd audio && git checkout $TORCHAUDIO_COMMIT_ID
-            python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install dist/*.whl
-            cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git
-            cd vision && git checkout $TORCHVISION_COMMIT_ID
-            python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
-          fi
-          cd ../ && python -c "import torch, torchvision, torchaudio"
-          rm -rf benchmark && git clone https://github.com/pytorch/benchmark.git
-          cd benchmark && git checkout $TORCHBENCH_COMMIT_ID
+          python -c "import torch, torchvision, torchaudio"
+          cd ./pytorch
+          TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt)
+          git clone https://github.com/pytorch/benchmark.git xpu-benchmark
+          cd xpu-benchmark && git checkout $TORCHBENCH_COMMIT_ID
           # remove deps which will reinstall torch
           pip install --no-deps accelerate
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID
-          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/$TIMM_COMMIT_ID/requirements.txt | grep -vE torch)
+          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
+          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
           pip install -U transformers==4.44.2
           sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g'  requirements.txt
           git status && git diff
@@ -81,28 +71,17 @@ runs:
           pip install -U transformers==4.44.2
         fi
         if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git
-            cd vision && git checkout $TORCHVISION_COMMIT_ID
-            python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
-          fi
           # install timm without dependencies
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID
+          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
           # install timm dependencies without torch and torchvision
-          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/$TIMM_COMMIT_ID/requirements.txt | grep -vE torch)
+          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
         fi
-        pip install numpy==1.26.4
+        pip list |grep -E 'intel|torch'
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-      env:
-        HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }}
-        NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-        DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
       shell: bash
       run: |
-        source activate e2e_ci
-        cp .github/scripts/inductor_xpu_test.sh ../pytorch
-        cd ../pytorch
-
+        cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
+        cd ./pytorch
         # check param
         function contains() {
             contains_status="echo 'Start $2 ...'"
@@ -164,18 +143,16 @@ runs:
         HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }}
       shell: bash
       run: |
-        cd ../pytorch
+        cd ./pytorch
         rm -f inductor_log/summary_accuracy.csv
         for var in $(find inductor_log/ -name "inductor_*_xpu_accuracy.csv")
         do
           sed -i "s/$/,$(basename $var)/" $var
           cat $var >> inductor_log/summary_accuracy.csv
         done
-
-        source activate e2e_ci
         cd ${{ github.workspace }}
-        cp .github/scripts/inductor_summary.py ../pytorch
-        cd ../pytorch
+        cp ./.github/scripts/inductor_summary.py ./pytorch
+        cd ./pytorch
         pip install styleFrame scipy pandas
         set -xe
         dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 89f29ac34f..d22b63955a 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -3,6 +3,10 @@ name: Linux PyTorch XPU Build
 on:
   workflow_call:
     inputs:
+      test_type:
+        type: string
+        default: 'build-from-source'
+        description: Build from source or install nightly wheel
       pytorch:
         type: string
         default: 'main'
@@ -11,14 +15,19 @@ on:
         type: string
         default: 'main'
         description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
-      python:
+      triton:
+        required: false
         type: string
-        default: '3.10'
-        description: Python version
+        default: 'pinned'
+        description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch'
       oneapi:
         type: string
-        default: 'host'
+        default: 'installed'
         description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+      python:
+        type: string
+        default: '3.10'
+        description: Python version
       runner:
         required: true
         type: string
@@ -29,6 +38,7 @@ permissions: read-all
 
 jobs:
   build:
+    if: ${{ ! contains(inputs.test_type, 'wheel') }}
     runs-on: ${{ inputs.runner }}
     container:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
@@ -75,7 +85,7 @@ jobs:
             TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
           # oneAPI DLE
-          if [ "${{ inputs.oneapi }}" != "host" ];then
+          if [ "${{ inputs.oneapi }}" != "installed" ];then
             rm -rf ~/intel ~/.intel /opt/intel
             wget -q -O oneapi.sh "${{ inputs.oneapi }}"
             bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
@@ -90,29 +100,61 @@ jobs:
             --PYTORCH_COMMIT="${PYTORCH_COMMIT}" \
             --TORCH_XPU_OPS_REPO="${TORCH_XPU_OPS_REPO}" \
             --TORCH_XPU_OPS_COMMIT="${TORCH_XPU_OPS_COMMIT}" \
-            2>&1 |tee ${{ github.workspace }}/pytorch_build_${PYTORCH_COMMIT//\//-}.log
+            2>&1 |tee ${{ github.workspace }}/build_pytorch_${PYTORCH_COMMIT//\//-}.log
+      - name: Build Triton
+        run: |
+          cd ./pytorch
+          pip install cmake ninja pybind11
+          rm -rf pytorch_triton_xpu-*.whl
+          if [ "${{ inputs.triton }}" != "pinned" ];then
+            TRITON_COMMIT_ID="${{ inputs.triton }}"
+          else
+            TRITON_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/triton-xpu.txt)"
+          fi
+          TRITON_VERSION_NAME="$(
+            curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
+                    grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
+          )"
+          python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME} \
+            2>&1 |tee ${{ github.workspace }}/build_triton_${TRITON_COMMIT_ID}.log
+          pip install pytorch_triton_xpu-*.whl
+          cp pytorch_triton_xpu-*.whl ${{ github.workspace }}
+      - name: Build Torchvision and Torchaudio
+        run: |
+          cd ./pytorch
+          TORCHVISION_COMMIT_ID="$(cat .github/ci_commit_pins/vision.txt)"
+          TORCHAUDIO_COMMIT_ID="$(cat .github/ci_commit_pins/audio.txt)"
+          git clone --single-branch -b main https://github.com/pytorch/vision.git xpu-vision
+          cd xpu-vision && git checkout ${TORCHVISION_COMMIT_ID}
+          python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_vision_${TRITON_COMMIT_ID}.log
+          pip install dist/*.whl
+          cp dist/*.whl ${{ github.workspace }}
+          git clone --single-branch -b main https://github.com/pytorch/audio.git xpu-audio
+          cd xpu-audio && git checkout ${TORCHAUDIO_COMMIT_ID}
+          python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_audio_${TRITON_COMMIT_ID}.log
+          pip install dist/*.whl
+          cp dist/*.whl ${{ github.workspace }}
       - name: Torch Config
         run: |
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import triton; print(triton.__version__)"
+          python -c "import torchvision; print(torchvision.__version__)"
+          python -c "import torchaudio; print(torchaudio.__version__)"
           python pytorch/torch/utils/collect_env.py
-      - name: Identify Build version
-        id: build_version
-        run: |
-          echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}"
       - name: Upload Torch XPU Wheel
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
           name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/torch*.whl
+          path: ${{ github.workspace }}/*.whl
       - name: Upload Build Log
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
           name: Torch-XPU-Build-Log-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/pytorch_*.log
+          path: ${{ github.workspace }}/build_*.log
       - name: Cleanup
         if: always()
         run: |
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 82ef33df1a..a5d543fd9c 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -13,11 +13,6 @@ on:
         type: string
         default: 'main'
         description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
-      triton:
-        required: false
-        type: string
-        default: 'pinned'
-        description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch'
       oneapi:
         type: string
         default: 'installed'
@@ -75,7 +70,7 @@ jobs:
           git clone https://github.com/pytorch/pytorch pytorch
           source activate xpu_op_${ZE_AFFINITY_MASK}
           if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install --force-reinstall ${{ github.workspace }}/torch*.whl
+            pip install --force-reinstall ${{ github.workspace }}/*.whl
             TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
             cd ./pytorch
             git checkout ${TORCH_COMMIT_ID}
@@ -103,25 +98,6 @@ jobs:
             cd third_party/torch-xpu-ops
             git checkout ${TORCH_XPU_OPS_COMMIT}
           fi
-      - name: Triton Installation
-        run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
-          cd ../pytorch
-          if [ -z ${{ inputs.triton }} ]; then
-            TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
-          else
-            TRITON_COMMIT_ID="${{ inputs.triton }}"
-          fi
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install cmake ninja pybind11
-            rm -rf pytorch_triton_xpu-*.whl
-            TRITON_VERSION_NAME="$(
-              curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
-                      grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
-            )"
-            python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
-            pip install pytorch_triton_xpu-*.whl
-          fi
       - name: Torch Config
         run: |
           source activate xpu_op_${ZE_AFFINITY_MASK}
@@ -398,7 +374,7 @@ jobs:
           git clone https://github.com/pytorch/pytorch pytorch
           source activate xpu_op_${ZE_AFFINITY_MASK}
           if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install --force-reinstall ${{ github.workspace }}/torch*.whl
+            pip install --force-reinstall ${{ github.workspace }}/*.whl
             TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
             cd ./pytorch
             git checkout ${TORCH_COMMIT_ID}
@@ -426,25 +402,6 @@ jobs:
             cd third_party/torch-xpu-ops
             git checkout ${TORCH_XPU_OPS_COMMIT}
           fi
-      - name: Triton Installation
-        run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
-          cd ../pytorch
-          if [ -z ${{ inputs.triton }} ]; then
-            TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
-          else
-            TRITON_COMMIT_ID="${{ inputs.triton }}"
-          fi
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install cmake ninja pybind11
-            rm -rf pytorch_triton_xpu-*.whl
-            TRITON_VERSION_NAME="$(
-              curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
-                      grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
-            )"
-            python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
-            pip install pytorch_triton_xpu-*.whl
-          fi
       - name: Torch Config
         run: |
           source activate xpu_op_${ZE_AFFINITY_MASK}
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 48b6521829..edc86e7c7f 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -104,7 +104,7 @@ jobs:
               torch_xpu_ops="main"
             fi
           else
-            if [ "${{ inputs.pytorch }}" == "nightly_wheel" ] || [ "${{ inputs.pytorch }}" == "release_wheel" ];then
+            if [["${{ inputs.pytorch }}" == *"_wheel"]];then
               test_type="wheel-ondemand"
               pytorch="${{ inputs.pytorch }}"
               torch_xpu_ops="pinned"
@@ -119,14 +119,15 @@ jobs:
           echo "torch_xpu_ops=${torch_xpu_ops}" >> ${GITHUB_OUTPUT}
 
   Linux-Nightly-Ondemand-Build:
-    if: ${{ needs.Conditions-Filter.outputs.pytorch != 'nightly_wheel' && needs.Conditions-Filter.outputs.pytorch != 'release_wheel' }}
     needs: [Conditions-Filter]
     name: linux
     secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
+      triton: ${{ github.event_name == 'schedule' && 'pinned' || inputs.triton }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
       runner: pvc_rolling
@@ -137,9 +138,9 @@ jobs:
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
-      triton: ${{ github.event_name == 'schedule' && 'pinned' || inputs.triton }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
       ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
@@ -160,43 +161,51 @@ jobs:
     env:
       AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
       GH_TOKEN: ${{ github.token }}
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       reference_issue: 1645
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
-      torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
-      triton: ${{ github.event_name == 'schedule' && 'pinned' || inputs.triton }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
     steps:
       - name: Cleanup workspace
         run: |
-          rm -rf ./* || sudo rm -rf ./*
+          rm -rf ~/.triton /tmp ./* || sudo rm -rf ~/.triton /tmp ./*
+          mkdir -m 777 /tmp || sudo mkdir -m 777 /tmp
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-        with:
-          path: torch-xpu-ops
       - name: Setup python ${{ env.python }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ env.python }}
       - name: Prepare Conda ENV
         run: |
-          which conda && conda clean -ay
-          conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci
-          conda create -n e2e_ci python=${{ env.python }} cmake ninja -y
-          source activate e2e_ci
+          which python
+          pip list
           pip install pandas scipy psutil requests
+      - name: Install oneAPI DLE
+        if: ${{ env.oneapi != 'installed' }}
+        run: |
+          rm -rf ~/intel ~/.intel /opt/intel
+          wget -q -O oneapi.sh "${{ env.oneapi }}"
+          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
       - name: Download Pytorch wheel
-        if: ${{ inputs.pytorch != 'nightly_wheel' }}
+        if: ${{ ! contains(env.test_type, 'wheel') }}
         uses: actions/download-artifact@v4
         with:
-          name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
+          pattern: Torch-XPU-Wheel-*
       - name: Prepare Stock Pytorch
         run: |
-          pwd
-          cd ../
-          rm -rf pytorch || sudo rm -rf pytorch
-          source activate e2e_ci
-          pip install --force-reinstall ${{ github.workspace }}/torch*.whl
+          if [ "${{ env.pytorch }}" == "release_wheel" ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
+          elif [ "${{ env.pytorch }}" == "test_wheel" ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
+          elif [ "${{ env.pytorch }}" == "nightly_wheel" ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
+          else
+            pip install --force-reinstall ${{ github.workspace }}/*.whl
+          fi
           TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
           git clone https://github.com/pytorch/pytorch pytorch
           cd pytorch
@@ -204,141 +213,128 @@ jobs:
           # apply extra PRs for stock pytorch
           python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
           git status && git diff && git show -s
-      - name: Identify pinned versions
-        id: pinned
+      - name: Install deps
         run: |
-          source .github/scripts/env.sh
-          cd ../pytorch
-          if [ -z ${{ inputs.triton }} ]; then
-            echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          else
-            echo "TRITON_COMMIT_ID=${{ inputs.triton }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
+          if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
+            python -c "import torch, torchvision, torchaudio"
+            cd pytorch
+            TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt)
+            git clone https://github.com/pytorch/benchmark.git xpu-benchmark
+            cd xpu-benchmark && git checkout $TORCHBENCH_COMMIT_ID
+            # remove deps which will reinstall torch
+            pip install --no-deps accelerate
+            pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
+            pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
+            pip install -U transformers==4.44.2
+            sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g'  requirements.txt
+            git status && git diff
+            pip install -r requirements.txt
+            python install.py --continue_on_fail
+            # deps for torchrec_dlrm
+            pip install pyre_extensions
+            pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
+            pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
           fi
-          echo "TORCH_BRANCH_ID=${{ inputs.pytorch }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          . /etc/os-release
-          echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo ${GITHUB_ENV}
-      - name: Triton Installation
-        run: |
-          source activate e2e_ci
-          cd ../pytorch
-          pip install cmake ninja pybind11
-          rm -rf pytorch_triton_xpu-*.whl
-          TRITON_VERSION_NAME="$(
-            curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
-                    grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
-          )"
-          python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
-          pip install pytorch_triton_xpu-*.whl
-      - name: Show GITHUB_ENV
+          if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then
+            pip install -U transformers==4.44.2
+          fi
+          if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
+            pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
+            pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
+          fi
+      - name: Torch Config
         run: |
-          echo "$GITHUB_ENV"
-          rm -rf ../pytorch/inductor_log || sudo rm -rf ../pytorch/inductor_log
-          rm -rf /tmp/torchinductor_* || sudo rm -rf /tmp/torchinductor_*
-          rm -rf ~/.triton/cache || sudo rm -rf ~/.triton/cache
+          printenv
+          python -c "import torch; print(torch.__config__.show())"
+          python -c "import torch; print(torch.__config__.parallel_info())"
+          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import triton; print(triton.__version__)"
+          python pytorch/torch/utils/collect_env.py
+          pip list |grep -E 'torch|intel'
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+          source /opt/intel/oneapi/setvars.sh
+          sycl-ls
 
       # Nihglty launch
-      - name: Nightly Huggingface FP32/BF16/FP16 Inference & Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
+      - name: Nightly Huggingface Full Test
+        if: ${{ contains(env.test_type, 'nightly') }}
         uses: ./.github/actions/inductor-xpu-e2e-test
         with:
-          suite: huggingface
           env_prepare: true
+          suite: huggingface
           dt: float32,bfloat16,float16,amp_bf16,amp_fp16
           mode: inference,training
-          scenario: accuracy
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Nightly Torchbench BF16 Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
+          scenario: accuracy,performance
+      - name: Nightly Torchbench BF16 Training Test
+        if: ${{ contains(env.test_type, 'nightly') }}
         uses: ./.github/actions/inductor-xpu-e2e-test
         with:
+          env_prepare: true
           suite: torchbench
           dt: bfloat16
           mode: training
-          scenario: accuracy
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Nightly Timm_models FP16 Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
+          scenario: accuracy,performance
+      - name: Nightly Timm_models FP16 Training Test
+        if: ${{ contains(env.test_type, 'nightly') }}
         uses: ./.github/actions/inductor-xpu-e2e-test
         with:
+          env_prepare: true
           suite: timm_models
           dt: float16
           mode: training
-          scenario: accuracy
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          scenario: accuracy,performance
       - name: Nightly PT2E Full Test
-        if: ${{ env.run_type == 'nightly' }}
+        if: ${{ contains(env.test_type, 'nightly') }}
         uses: ./.github/actions/pt2e
         with:
           dt: float32,int8
           scenario: accuracy,performance
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
       # Weekly launch
-      - name: Weekly Huggingface Full Test
-        if: ${{ env.run_type == 'weekly' }}
+      - name: Nightly Huggingface Full Test
+        if: ${{ contains(env.test_type, 'weekly') }}
         uses: ./.github/actions/inductor-xpu-e2e-test
         with:
-          suite: huggingface
           env_prepare: true
+          suite: huggingface
           dt: float32,bfloat16,float16,amp_bf16,amp_fp16
           mode: inference,training
           scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Weekly Torchbench Full Test
-        if: ${{ env.run_type == 'weekly' }}
+      - name: Nightly Torchbench BF16 Training Test
+        if: ${{ contains(env.test_type, 'weekly') }}
         uses: ./.github/actions/inductor-xpu-e2e-test
         with:
-          suite: torchbench
           env_prepare: true
+          suite: torchbench
           dt: float32,bfloat16,float16,amp_bf16,amp_fp16
           mode: inference,training
           scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Weekly Timm_models Full Test
-        if: ${{ env.run_type == 'weekly' }}
+      - name: Nightly Timm_models FP16 Training Test
+        if: ${{ contains(env.test_type, 'weekly') }}
         uses: ./.github/actions/inductor-xpu-e2e-test
         with:
-          suite: timm_models
           env_prepare: true
+          suite: timm_models
           dt: float32,bfloat16,float16,amp_bf16,amp_fp16
           mode: inference,training
           scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Weekly PT2E Full Test
-        if: ${{ env.run_type == 'weekly' }}
+      - name: Nightly PT2E Full Test
+        if: ${{ contains(env.test_type, 'weekly') }}
         uses: ./.github/actions/pt2e
         with:
-          env_prepare: true
           dt: float32,int8
           scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
       # On-demand launch
       - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
         if: ${{ github.event_name != 'schedule' && inputs.suite != 'pt2e' }}
         uses: ./.github/actions/inductor-xpu-e2e-test
         with:
-          suite: ${{ inputs.suite }}
           env_prepare: true
+          suite: ${{ inputs.suite }}
           dt: ${{ inputs.dt }}
           mode: ${{ inputs.mode }}
           scenario: ${{ inputs.scenario }}
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
         if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }}
         uses: ./.github/actions/pt2e
@@ -346,7 +342,6 @@ jobs:
           env_prepare: true
           dt: ${{ inputs.dt }}
           scenario: ${{ inputs.scenario }}
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
       - name: Download Reference Artifact
         id: reference_id
@@ -354,10 +349,10 @@ jobs:
           set -xe
           source activate e2e_ci
           conda install gh --channel conda-forge -y
-          if [ "${{ env.run_type }}" == "on-demand" ];then
+          if [ "${{ env.pytorch }}" == "on-demand" ];then
             artifact_type="weekly"
           else
-            artifact_type="${{ env.run_type }}"
+            artifact_type="${{ env.pytorch }}"
           fi
           REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
             --json body -q .body |grep "Inductor-${artifact_type}-LTS-XPU-E2E" |sed 's/.*: *//')"
@@ -399,13 +394,13 @@ jobs:
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-${{ env.run_type }}-LTS-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
+          name: Inductor-${{ env.pytorch }}-LTS-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
           path: ${{ github.workspace }}/upload_files
       - name: Upload Reference Run ID
-        if: ${{ env.run_type != 'on-demand' }}
+        if: ${{ env.pytorch != 'on-demand' }}
         run: |
           gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body | \
-            sed "s/Inductor-${{ env.run_type }}-LTS-XPU-E2E:.*/Inductor-${{ env.run_type }}-LTS-XPU-E2E: ${GITHUB_RUN_ID}/" | sed '/^$/d' > new_body.txt
+            sed "s/Inductor-${{ env.pytorch }}-LTS-XPU-E2E:.*/Inductor-${{ env.pytorch }}-LTS-XPU-E2E: ${GITHUB_RUN_ID}/" | sed '/^$/d' > new_body.txt
           gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt
 
   Windows-Nightly-Ondemand-UT-Tests:

From 3ae4b09be641601bec791dc5bcd1e87d2d53dc29 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 16 Jul 2025 17:12:16 +0800
Subject: [PATCH 005/160] update

---
 .github/workflows/nightly_ondemand.yml | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index edc86e7c7f..2c52d87dca 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -29,11 +29,6 @@ on:
         type: string
         default: 'installed'
         description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
-      python:
-        required: false
-        type: string
-        default: '3.10'
-        description: Python version
       ut:
         required: false
         type: string
@@ -129,7 +124,7 @@ jobs:
       torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       triton: ${{ github.event_name == 'schedule' && 'pinned' || inputs.triton }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
+      python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       runner: pvc_rolling
 
   Linux-Nightly-Ondemand-UT-Tests:
@@ -142,7 +137,7 @@ jobs:
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
+      python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
       runner: linux.idc.xpu
 
@@ -166,7 +161,7 @@ jobs:
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
+      python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
     steps:
       - name: Cleanup workspace
         run: |
@@ -410,7 +405,7 @@ jobs:
     with:
       torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
       ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
+      python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       src_changed: false
       has_label: true
       runner: Windows_CI

From fe06ca3aad64f9617e8aa76b156775320d5ccc32 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 16 Jul 2025 18:01:09 +0800
Subject: [PATCH 006/160] update

---
 .github/workflows/_linux_build.yml     |  10 +-
 .github/workflows/_linux_e2e.yml       | 277 +++++++++++++++++++++++++
 .github/workflows/_linux_ut.yml        |  14 +-
 .github/workflows/nightly_ondemand.yml | 273 ++----------------------
 4 files changed, 303 insertions(+), 271 deletions(-)
 create mode 100644 .github/workflows/_linux_e2e.yml

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index d22b63955a..481c9527b2 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -3,6 +3,11 @@ name: Linux PyTorch XPU Build
 on:
   workflow_call:
     inputs:
+      runner:
+        required: true
+        type: string
+        default: 'pvc_rolling'
+        description: Runner label
       test_type:
         type: string
         default: 'build-from-source'
@@ -28,11 +33,6 @@ on:
         type: string
         default: '3.10'
         description: Python version
-      runner:
-        required: true
-        type: string
-        default: 'pvc_rolling'
-        description: Runner label
 
 permissions: read-all
 
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
new file mode 100644
index 0000000000..fb6405cf48
--- /dev/null
+++ b/.github/workflows/_linux_e2e.yml
@@ -0,0 +1,277 @@
+name: Linux PyTorch XPU Build
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        required: true
+        type: string
+        default: 'pvc_rolling'
+        description: Runner label
+      test_type:
+        type: string
+        default: 'build-from-source'
+        description: Build from source or install nightly wheel
+      pytorch:
+        type: string
+        default: 'main'
+        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      oneapi:
+        type: string
+        default: 'installed'
+        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+      python:
+        type: string
+        default: '3.10'
+        description: Python version
+      suite:
+        required: true
+        type: string
+        default: 'huggingface'
+        description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
+      dt:
+        required: true
+        type: string
+        default: 'float32'
+        description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma
+      mode:
+        required: true
+        type: string
+        default: 'inference'
+        description: Test mode. `inference,training`. Delimiter is comma
+      scenario:
+        required: true
+        type: string
+        default: 'accuracy'
+        description: Test scenario. `accuracy,performance`. Delimiter is comma
+      model:
+        required: false
+        type: string
+        default: ''
+        description: Model. Will only run this one mode if set
+
+permissions: read-all
+
+jobs:
+  e2e:
+    runs-on: ${{ inputs.runner }}
+    timeout-minutes: 3600
+    permissions:
+      issues: write
+    container:
+      image: 'xpu:test'
+      volumes:
+        - ${{ github.workspace }}:${{ github.workspace }}
+      options: --device=/dev/mem --device=/dev/dri --privileged --shm-size=8g
+      env:
+        AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
+        GH_TOKEN: ${{ github.token }}
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+        reference_issue: 1645
+    steps:
+      - name: Cleanup workspace
+        run: |
+          rm -rf ~/.triton /tmp ./* || sudo rm -rf ~/.triton /tmp ./*
+          mkdir -m 777 /tmp || sudo mkdir -m 777 /tmp
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
+      - name: Setup python ${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
+      - name: Prepare Conda ENV
+        run: |
+          which python
+          pip list
+          pip install pandas scipy psutil requests
+      - name: Install oneAPI DLE
+        if: ${{ inputs.oneapi != 'installed' }}
+        run: |
+          rm -rf ~/intel ~/.intel /opt/intel
+          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
+      - name: Download Pytorch wheel
+        if: ${{ ! contains(inputs.test_type, 'wheel') }}
+        uses: actions/download-artifact@v4
+        with:
+          pattern: Torch-XPU-Wheel-*
+      - name: Prepare Stock Pytorch
+        run: |
+          if [ "${{ inputs.pytorch }}" == "release_wheel" ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
+          elif [ "${{ inputs.pytorch }}" == "test_wheel" ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
+          elif [ "${{ inputs.pytorch }}" == "nightly_wheel" ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
+          else
+            pip install --force-reinstall ${{ github.workspace }}/*.whl
+          fi
+          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
+          git clone https://github.com/pytorch/pytorch pytorch
+          cd pytorch
+          git checkout ${TORCH_COMMIT_ID}
+          # apply extra PRs for stock pytorch
+          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
+          git status && git diff && git show -s
+      - name: Torch Config
+        run: |
+          printenv
+          python -c "import torch; print(torch.__config__.show())"
+          python -c "import torch; print(torch.__config__.parallel_info())"
+          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import triton; print(triton.__version__)"
+          python pytorch/torch/utils/collect_env.py
+          pip list |grep -E 'torch|intel'
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+          source /opt/intel/oneapi/setvars.sh
+          sycl-ls
+
+      # Nihglty launch
+      - name: Nightly Huggingface Full Test
+        if: ${{ contains(inputs.test_type, 'nightly') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: huggingface
+          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
+          mode: inference,training
+          scenario: accuracy,performance
+      - name: Nightly Torchbench BF16 Training Test
+        if: ${{ contains(inputs.test_type, 'nightly') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: torchbench
+          dt: bfloat16
+          mode: training
+          scenario: accuracy,performance
+      - name: Nightly Timm_models FP16 Training Test
+        if: ${{ contains(inputs.test_type, 'nightly') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: timm_models
+          dt: float16
+          mode: training
+          scenario: accuracy,performance
+      - name: Nightly PT2E Full Test
+        if: ${{ contains(inputs.test_type, 'nightly') }}
+        uses: ./.github/actions/pt2e
+        with:
+          dt: float32,int8
+          scenario: accuracy,performance
+
+      # Weekly launch
+      - name: Nightly Huggingface Full Test
+        if: ${{ contains(inputs.test_type, 'weekly') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: huggingface
+          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
+          mode: inference,training
+          scenario: accuracy,performance
+      - name: Nightly Torchbench BF16 Training Test
+        if: ${{ contains(inputs.test_type, 'weekly') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: torchbench
+          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
+          mode: inference,training
+          scenario: accuracy,performance
+      - name: Nightly Timm_models FP16 Training Test
+        if: ${{ contains(inputs.test_type, 'weekly') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: timm_models
+          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
+          mode: inference,training
+          scenario: accuracy,performance
+      - name: Nightly PT2E Full Test
+        if: ${{ contains(inputs.test_type, 'weekly') }}
+        uses: ./.github/actions/pt2e
+        with:
+          dt: float32,int8
+          scenario: accuracy,performance
+
+      # On-demand launch
+      - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
+        if: ${{ contains(inputs.test_type, 'ondemand') && inputs.suite != 'pt2e' }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: ${{ inputs.suite }}
+          dt: ${{ inputs.dt }}
+          mode: ${{ inputs.mode }}
+          scenario: ${{ inputs.scenario }}
+      - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
+        if: ${{ contains(inputs.test_type, 'ondemand') && contains(inputs.suite, 'pt2e') }}
+        uses: ./.github/actions/pt2e
+        with:
+          env_prepare: true
+          dt: ${{ inputs.dt }}
+          scenario: ${{ inputs.scenario }}
+
+      - name: Download Reference Artifact
+        id: reference_id
+        run: |
+          set -xe
+          source activate e2e_ci
+          conda install gh --channel conda-forge -y
+          artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/')"
+          REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
+            --json body -q .body |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo 'n/a')"
+          gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*" && \
+            rm -rf reference && mv Inductor-*-XPU-E2E-* reference || echo 'No reference'
+      - name: Summarize archieve files
+        id: summary
+        if: ${{ ! cancelled() }}
+        run: |
+          set -x -e -o pipefail
+          rm -rf ${{ github.workspace }}/upload_files
+          cp -r ${{ github.workspace }}/pytorch/inductor_log ${{ github.workspace }}/upload_files
+          mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
+          find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
+          tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
+          # Print summary
+          if [ "${{ inputs.suite }}" != 'pt2e' ];then
+            source activate e2e_ci
+            bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
+              ${{ github.workspace }}/upload_files \
+              ${{ github.workspace }}/reference \
+            >> ${GITHUB_STEP_SUMMARY}
+            exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
+            if [ ${exit_label} -ne 0 ];then
+              grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
+              echo "There are ${exit_label} cases that need look into!!! Please check them"
+              exit ${exit_label}
+            fi
+          fi
+          pt2e_summary_csv="$(find ${{ github.workspace }}/upload_files/ -name "summary.csv")"
+          if [ -f "${pt2e_summary_csv}" ];then
+            cat ${pt2e_summary_csv}
+            failed_num=$(grep ',failed' ${pt2e_summary_csv} |wc -l)
+            if [ ${failed_num} -ne 0 ];then
+              echo "[Warning] PT2E has failures!"
+            fi
+          fi
+      - name: Upload Inductor XPU E2E Data
+        if: ${{ ! cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
+          path: ${{ github.workspace }}/upload_files
+      - name: Upload Reference Run ID
+        if: ${{ ! contains(inputs.test_type, 'ondemand') }}
+        run: |
+          gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body 2>&1 |tee new_body.txt 2>&1
+          has_or_not="$(grep 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt |wc -l)"
+          if [ ${has_or_not} -ne 0 ];then
+            sed -i "s/Inductor-${{ inputs.test_type }}-LTS2:.*/Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}/" new_body.txt
+          else
+            echo "Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}" |tee -a new_body.txt
+          fi
+          gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index a5d543fd9c..1511d686f4 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -3,6 +3,15 @@ name: Linux UT Test
 on:
   workflow_call:
     inputs:
+      runner:
+        required: true
+        type: string
+        default: 'linux.idc.xpu'
+        description: Runner label
+      test_type:
+        type: string
+        default: 'build-from-source'
+        description: Build from source or install nightly wheel
       pytorch:
         required: false
         type: string
@@ -32,11 +41,6 @@ on:
         type: string
         default: ''
         description: List disabled tests, such as disable_ut or disable_distributed
-      runner:
-        required: true
-        type: string
-        default: 'linux.idc.xpu'
-        description: Runner label
 
 permissions: read-all
 
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 2c52d87dca..b1a3754dcf 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -11,17 +11,14 @@ on:
   workflow_dispatch:
     inputs:
       pytorch:
-        required: false
         type: string
         default: 'main'
         description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
       torch_xpu_ops:
-        required: false
         type: string
         default: 'main'
         description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
       triton:
-        required: false
         type: string
         default: 'pinned'
         description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch'
@@ -30,32 +27,26 @@ on:
         default: 'installed'
         description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
       ut:
-        required: false
         type: string
         default: 'op_regression'
         description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_profiling,xpu_distributed`. Delimiter is comma
       suite:
-        required: true
         type: string
         default: 'huggingface'
         description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
       dt:
-        required: true
         type: string
         default: 'float32'
         description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma
       mode:
-        required: true
         type: string
         default: 'inference'
         description: Test mode. `inference,training`. Delimiter is comma
       scenario:
-        required: true
         type: string
         default: 'accuracy'
         description: Test scenario. `accuracy,performance`. Delimiter is comma
       model:
-        required: false
         type: string
         default: ''
         description: Model. Will only run this one mode if set
@@ -119,13 +110,13 @@ jobs:
     secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
+      runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       triton: ${{ github.event_name == 'schedule' && 'pinned' || inputs.triton }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
-      runner: pvc_rolling
 
   Linux-Nightly-Ondemand-UT-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'p') }}
@@ -133,270 +124,30 @@ jobs:
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
+      runner: linux.idc.xpu
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
-      runner: linux.idc.xpu
 
   Linux-Nightly-Ondemand-E2E-Tests:
-    runs-on: pvc_rolling
-    name: linux / e2e_test
+    if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
+    name: linux
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
-    timeout-minutes: 3600
-    permissions:
-      issues: write
-    container:
-      image: 'xpu:test'
-      volumes:
-        - ${{ github.workspace }}:${{ github.workspace }}
-      options: --device=/dev/mem --device=/dev/dri --privileged --shm-size=8g
-    env:
-      AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
-      GH_TOKEN: ${{ github.token }}
-      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      reference_issue: 1645
+    uses: ./.github/workflows/_linux_e2e.yml
+    with:
+      runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
-    steps:
-      - name: Cleanup workspace
-        run: |
-          rm -rf ~/.triton /tmp ./* || sudo rm -rf ~/.triton /tmp ./*
-          mkdir -m 777 /tmp || sudo mkdir -m 777 /tmp
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-      - name: Setup python ${{ env.python }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.python }}
-      - name: Prepare Conda ENV
-        run: |
-          which python
-          pip list
-          pip install pandas scipy psutil requests
-      - name: Install oneAPI DLE
-        if: ${{ env.oneapi != 'installed' }}
-        run: |
-          rm -rf ~/intel ~/.intel /opt/intel
-          wget -q -O oneapi.sh "${{ env.oneapi }}"
-          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
-          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
-      - name: Download Pytorch wheel
-        if: ${{ ! contains(env.test_type, 'wheel') }}
-        uses: actions/download-artifact@v4
-        with:
-          pattern: Torch-XPU-Wheel-*
-      - name: Prepare Stock Pytorch
-        run: |
-          if [ "${{ env.pytorch }}" == "release_wheel" ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-          elif [ "${{ env.pytorch }}" == "test_wheel" ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-          elif [ "${{ env.pytorch }}" == "nightly_wheel" ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-          else
-            pip install --force-reinstall ${{ github.workspace }}/*.whl
-          fi
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          git clone https://github.com/pytorch/pytorch pytorch
-          cd pytorch
-          git checkout ${TORCH_COMMIT_ID}
-          # apply extra PRs for stock pytorch
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-          git status && git diff && git show -s
-      - name: Install deps
-        run: |
-          if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
-            python -c "import torch, torchvision, torchaudio"
-            cd pytorch
-            TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt)
-            git clone https://github.com/pytorch/benchmark.git xpu-benchmark
-            cd xpu-benchmark && git checkout $TORCHBENCH_COMMIT_ID
-            # remove deps which will reinstall torch
-            pip install --no-deps accelerate
-            pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
-            pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
-            pip install -U transformers==4.44.2
-            sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g'  requirements.txt
-            git status && git diff
-            pip install -r requirements.txt
-            python install.py --continue_on_fail
-            # deps for torchrec_dlrm
-            pip install pyre_extensions
-            pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
-            pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
-          fi
-          if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then
-            pip install -U transformers==4.44.2
-          fi
-          if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
-            pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
-            pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
-          fi
-      - name: Torch Config
-        run: |
-          printenv
-          python -c "import torch; print(torch.__config__.show())"
-          python -c "import torch; print(torch.__config__.parallel_info())"
-          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-          python -c "import triton; print(triton.__version__)"
-          python pytorch/torch/utils/collect_env.py
-          pip list |grep -E 'torch|intel'
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-          source /opt/intel/oneapi/setvars.sh
-          sycl-ls
-
-      # Nihglty launch
-      - name: Nightly Huggingface Full Test
-        if: ${{ contains(env.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          env_prepare: true
-          suite: huggingface
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-      - name: Nightly Torchbench BF16 Training Test
-        if: ${{ contains(env.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          env_prepare: true
-          suite: torchbench
-          dt: bfloat16
-          mode: training
-          scenario: accuracy,performance
-      - name: Nightly Timm_models FP16 Training Test
-        if: ${{ contains(env.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          env_prepare: true
-          suite: timm_models
-          dt: float16
-          mode: training
-          scenario: accuracy,performance
-      - name: Nightly PT2E Full Test
-        if: ${{ contains(env.test_type, 'nightly') }}
-        uses: ./.github/actions/pt2e
-        with:
-          dt: float32,int8
-          scenario: accuracy,performance
-
-      # Weekly launch
-      - name: Nightly Huggingface Full Test
-        if: ${{ contains(env.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          env_prepare: true
-          suite: huggingface
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-      - name: Nightly Torchbench BF16 Training Test
-        if: ${{ contains(env.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          env_prepare: true
-          suite: torchbench
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-      - name: Nightly Timm_models FP16 Training Test
-        if: ${{ contains(env.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          env_prepare: true
-          suite: timm_models
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-      - name: Nightly PT2E Full Test
-        if: ${{ contains(env.test_type, 'weekly') }}
-        uses: ./.github/actions/pt2e
-        with:
-          dt: float32,int8
-          scenario: accuracy,performance
-
-      # On-demand launch
-      - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-        if: ${{ github.event_name != 'schedule' && inputs.suite != 'pt2e' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          env_prepare: true
-          suite: ${{ inputs.suite }}
-          dt: ${{ inputs.dt }}
-          mode: ${{ inputs.mode }}
-          scenario: ${{ inputs.scenario }}
-      - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-        if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }}
-        uses: ./.github/actions/pt2e
-        with:
-          env_prepare: true
-          dt: ${{ inputs.dt }}
-          scenario: ${{ inputs.scenario }}
-
-      - name: Download Reference Artifact
-        id: reference_id
-        run: |
-          set -xe
-          source activate e2e_ci
-          conda install gh --channel conda-forge -y
-          if [ "${{ env.pytorch }}" == "on-demand" ];then
-            artifact_type="weekly"
-          else
-            artifact_type="${{ env.pytorch }}"
-          fi
-          REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
-            --json body -q .body |grep "Inductor-${artifact_type}-LTS-XPU-E2E" |sed 's/.*: *//')"
-          gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
-          rm -rf reference && mv Inductor-*-XPU-E2E-* reference
-      - name: Summarize archieve files
-        id: summary
-        if: ${{ ! cancelled() }}
-        run: |
-          set -x -e -o pipefail
-          rm -rf ${{ github.workspace }}/upload_files
-          cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
-          mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
-          find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
-          tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
-          # Print summary
-          if [ "${{ inputs.suite }}" != 'pt2e' ];then
-            source activate e2e_ci
-            bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
-              ${{ github.workspace }}/upload_files \
-              ${{ github.workspace }}/reference \
-            >> ${GITHUB_STEP_SUMMARY}
-            exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
-            if [ ${exit_label} -ne 0 ];then
-              grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
-              echo "There are ${exit_label} cases that need look into!!! Please check them"
-              exit ${exit_label}
-            fi
-          fi
-          pt2e_summary_csv="$(find ${{ github.workspace }}/upload_files/ -name "summary.csv")"
-          if [ -f "${pt2e_summary_csv}" ];then
-            cat ${pt2e_summary_csv}
-            failed_num=$(grep ',failed' ${pt2e_summary_csv} |wc -l)
-            if [ ${failed_num} -ne 0 ];then
-              echo "[Warning] PT2E has failures!"
-            fi
-          fi
-      - name: Upload Inductor XPU E2E Data
-        if: ${{ ! cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: Inductor-${{ env.pytorch }}-LTS-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/upload_files
-      - name: Upload Reference Run ID
-        if: ${{ env.pytorch != 'on-demand' }}
-        run: |
-          gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body | \
-            sed "s/Inductor-${{ env.pytorch }}-LTS-XPU-E2E:.*/Inductor-${{ env.pytorch }}-LTS-XPU-E2E: ${GITHUB_RUN_ID}/" | sed '/^$/d' > new_body.txt
-          gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt
+      suite: ${{ github.event_name == 'schedule' && 'huggingface' || inputs.suite }}
+      dt: ${{ github.event_name == 'schedule' && 'float32' || inputs.dt }}
+      mode: ${{ github.event_name == 'schedule' && 'inference' || inputs.mode }}
+      scenario: ${{ github.event_name == 'schedule' && 'accuracy' || inputs.scenario }}
+      model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
 
   Windows-Nightly-Ondemand-UT-Tests:
     if: ${{ github.event_name == 'schedule' || inputs.ut != '' }}

From be531f753c8eff4eab8943420eb347dc55e2f80e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@Intel.com>
Date: Wed, 16 Jul 2025 20:57:53 +0800
Subject: [PATCH 007/160] Update nightly_ondemand.yml

---
 .github/workflows/nightly_ondemand.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index b1a3754dcf..c87e7fae33 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -135,6 +135,7 @@ jobs:
   Linux-Nightly-Ondemand-E2E-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
     name: linux
+    secrets: inherit
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_e2e.yml
     with:

From 1df6138ed15387d8a2c3e220b0708617068dd96f Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 17 Jul 2025 09:03:34 +0800
Subject: [PATCH 008/160] update

---
 .github/workflows/nightly_ondemand.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index c87e7fae33..b83268f6ec 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -107,7 +107,6 @@ jobs:
   Linux-Nightly-Ondemand-Build:
     needs: [Conditions-Filter]
     name: linux
-    secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
       runner: pvc_rolling
@@ -135,7 +134,8 @@ jobs:
   Linux-Nightly-Ondemand-E2E-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
     name: linux
-    secrets: inherit
+    permissions:
+      issues: write
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_e2e.yml
     with:

From 9fe4dcba262ae44359c5b0a2c8af3a58789eac99 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 17 Jul 2025 13:50:51 +0800
Subject: [PATCH 009/160] update

---
 .github/workflows/nightly_ondemand.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index b83268f6ec..3e461ef82b 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -107,6 +107,7 @@ jobs:
   Linux-Nightly-Ondemand-Build:
     needs: [Conditions-Filter]
     name: linux
+    secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
       runner: pvc_rolling

From ef919846b3d6c966e57e54c08b275c444155d292 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 17 Jul 2025 17:05:47 +0800
Subject: [PATCH 010/160] update

---
 .github/workflows/_linux_build.yml     |  7 +++++++
 .github/workflows/_linux_e2e.yml       | 15 +++++++++------
 .github/workflows/_windows_ut.yml      |  4 ++--
 .github/workflows/nightly_ondemand.yml |  9 ++++-----
 4 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 481c9527b2..91efb43611 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -37,6 +37,13 @@ on:
 permissions: read-all
 
 jobs:
+  wheel:
+    if: ${{ contains(inputs.test_type, 'wheel') }}
+    name: ${{ inputs.pytorch }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Use ${{ inputs.pytorch }}
+        run: echo 'Use ${{ inputs.pytorch }}'
   build:
     if: ${{ ! contains(inputs.test_type, 'wheel') }}
     runs-on: ${{ inputs.runner }}
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index fb6405cf48..80a609bc9b 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -1,4 +1,4 @@
-name: Linux PyTorch XPU Build
+name: Linux E2E Test
 
 on:
   workflow_call:
@@ -53,24 +53,26 @@ on:
 permissions: read-all
 
 jobs:
-  e2e:
+  e2e_test:
     runs-on: ${{ inputs.runner }}
     timeout-minutes: 3600
     permissions:
       issues: write
     container:
-      image: 'xpu:test'
+      image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: --device=/dev/mem --device=/dev/dri --privileged --shm-size=8g
+        - /etc/group:/etc/group
+      options: --device=/dev/mem --device=/dev/dri --privileged --shm-size=8g -u jenkins
       env:
-        AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
+        AGENT_TOOLSDIRECTORY: "/opt/_tools"
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         reference_issue: 1645
     steps:
       - name: Cleanup workspace
         run: |
+          whoami
           rm -rf ~/.triton /tmp ./* || sudo rm -rf ~/.triton /tmp ./*
           mkdir -m 777 /tmp || sudo mkdir -m 777 /tmp
       - name: Checkout torch-xpu-ops
@@ -79,9 +81,10 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ inputs.python }}
-      - name: Prepare Conda ENV
+      - name: Check python
         run: |
           which python
+          python -V
           pip list
           pip install pandas scipy psutil requests
       - name: Install oneAPI DLE
diff --git a/.github/workflows/_windows_ut.yml b/.github/workflows/_windows_ut.yml
index ee628792f0..9ca7f7eb8d 100644
--- a/.github/workflows/_windows_ut.yml
+++ b/.github/workflows/_windows_ut.yml
@@ -8,7 +8,7 @@ on:
         type: string
         default: 'main'
         description: Pytorch branch/commit
-      keep_torch_xpu_ops:
+      torch_xpu_ops:
         required: false
         type: string
         default: 'false'
@@ -89,7 +89,7 @@ jobs:
           git status 
           git show -s 
           git submodule sync && git submodule update --init --recursive
-          if ${{ inputs.keep_torch_xpu_ops }} == 'true' (
+          if ${{ inputs.torch_xpu_ops }} == 'pinned' (
             echo "Don't replace torch-xpu-ops!"
           ) else (
             echo "Replace torch-xpu-ops!"
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 3e461ef82b..7df0cfe763 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -90,7 +90,7 @@ jobs:
               torch_xpu_ops="main"
             fi
           else
-            if [["${{ inputs.pytorch }}" == *"_wheel"]];then
+            if [[ "${{ inputs.pytorch }}" == *"_wheel" ]];then
               test_type="wheel-ondemand"
               pytorch="${{ inputs.pytorch }}"
               torch_xpu_ops="pinned"
@@ -124,7 +124,7 @@ jobs:
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
-      runner: linux.idc.xpu
+      runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
@@ -135,8 +135,7 @@ jobs:
   Linux-Nightly-Ondemand-E2E-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
     name: linux
-    permissions:
-      issues: write
+    permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_e2e.yml
     with:
@@ -152,7 +151,7 @@ jobs:
       model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
 
   Windows-Nightly-Ondemand-UT-Tests:
-    if: ${{ github.event_name == 'schedule' || inputs.ut != '' }}
+    if: ${{ github.event_name == 'schedule' }}
     name: Windows-nightly-ondemand
     uses: ./.github/workflows/_windows_ut.yml
     with:

From 01fbe460f240863e83747470476e8e0c7bc9fd2e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 17 Jul 2025 17:15:51 +0800
Subject: [PATCH 011/160] update

---
 .github/workflows/nightly_ondemand.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 7df0cfe763..57800e2eb4 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -28,23 +28,23 @@ on:
         description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
       ut:
         type: string
-        default: 'op_regression'
+        default: ''
         description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_profiling,xpu_distributed`. Delimiter is comma
       suite:
         type: string
-        default: 'huggingface'
+        default: ''
         description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
       dt:
         type: string
-        default: 'float32'
+        default: ''
         description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma
       mode:
         type: string
-        default: 'inference'
+        default: ''
         description: Test mode. `inference,training`. Delimiter is comma
       scenario:
         type: string
-        default: 'accuracy'
+        default: ''
         description: Test scenario. `accuracy,performance`. Delimiter is comma
       model:
         type: string

From 191b5c046ca68fd2cf35e6231826dae51d0d40c5 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 17 Jul 2025 17:21:39 +0800
Subject: [PATCH 012/160] update

---
 .github/workflows/_linux_e2e.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 80a609bc9b..4dbe713f0c 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -73,6 +73,7 @@ jobs:
       - name: Cleanup workspace
         run: |
           whoami
+          sudo chmod 777 . -R
           rm -rf ~/.triton /tmp ./* || sudo rm -rf ~/.triton /tmp ./*
           mkdir -m 777 /tmp || sudo mkdir -m 777 /tmp
       - name: Checkout torch-xpu-ops

From f313b856074882518689f78f23fbd1578fa69a30 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 09:38:51 +0800
Subject: [PATCH 013/160] update

---
 .github/workflows/_linux_e2e.yml | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 4dbe713f0c..168b843446 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -72,8 +72,8 @@ jobs:
     steps:
       - name: Cleanup workspace
         run: |
-          whoami
-          sudo chmod 777 . -R
+          hostname && whoami
+          chmod 777 . -R || sudo chmod 777 . -R
           rm -rf ~/.triton /tmp ./* || sudo rm -rf ~/.triton /tmp ./*
           mkdir -m 777 /tmp || sudo mkdir -m 777 /tmp
       - name: Checkout torch-xpu-ops
@@ -102,11 +102,12 @@ jobs:
           pattern: Torch-XPU-Wheel-*
       - name: Prepare Stock Pytorch
         run: |
-          if [ "${{ inputs.pytorch }}" == "release_wheel" ];then
+          # install pytorch
+          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
             pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-          elif [ "${{ inputs.pytorch }}" == "test_wheel" ];then
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
             pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-          elif [ "${{ inputs.pytorch }}" == "nightly_wheel" ];then
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
             pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
           else
             pip install --force-reinstall ${{ github.workspace }}/*.whl
@@ -128,7 +129,7 @@ jobs:
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
           dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-          source /opt/intel/oneapi/setvars.sh
+          source ${{ github.workspace }}/.github/scripts/env.sh
           sycl-ls
 
       # Nihglty launch

From 7d4488b79aa970c9d72008b3d4af7cdee015f268 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 09:41:37 +0800
Subject: [PATCH 014/160] update

---
 .github/workflows/_linux_e2e.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 168b843446..ee36264e8e 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -73,9 +73,9 @@ jobs:
       - name: Cleanup workspace
         run: |
           hostname && whoami
-          chmod 777 . -R || sudo chmod 777 . -R
-          rm -rf ~/.triton /tmp ./* || sudo rm -rf ~/.triton /tmp ./*
-          mkdir -m 777 /tmp || sudo mkdir -m 777 /tmp
+          sudo chmod 777 . -R
+          sudo rm -rf ~/.triton /tmp ./*
+          sudo mkdir -m 777 /tmp
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Setup python ${{ inputs.python }}

From 66e28dafd114c1c51b51b05b45367cdb360bf05f Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 09:53:52 +0800
Subject: [PATCH 015/160] update

---
 .github/workflows/_linux_e2e.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index ee36264e8e..4a82ec2eb1 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -73,9 +73,7 @@ jobs:
       - name: Cleanup workspace
         run: |
           hostname && whoami
-          sudo chmod 777 . -R
-          sudo rm -rf ~/.triton /tmp ./*
-          sudo mkdir -m 777 /tmp
+          sudo find ./ /tmp ${HOME}/.triton |grep -vE "^(./|/tmp|${HOME}/.triton)$" |xargs sudo rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Setup python ${{ inputs.python }}

From 8b224186ca6486a7c67947c3278c9daf7b171226 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 10:01:17 +0800
Subject: [PATCH 016/160] update

---
 .github/workflows/_linux_e2e.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 4a82ec2eb1..148f725358 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -65,7 +65,7 @@ jobs:
         - /etc/group:/etc/group
       options: --device=/dev/mem --device=/dev/dri --privileged --shm-size=8g -u jenkins
       env:
-        AGENT_TOOLSDIRECTORY: "/opt/_tools"
+        AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         reference_issue: 1645
@@ -115,7 +115,7 @@ jobs:
           cd pytorch
           git checkout ${TORCH_COMMIT_ID}
           # apply extra PRs for stock pytorch
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
+          python ../.github/scripts/apply_torch_pr.py
           git status && git diff && git show -s
       - name: Torch Config
         run: |
@@ -233,7 +233,7 @@ jobs:
         id: summary
         if: ${{ ! cancelled() }}
         run: |
-          set -x -e -o pipefail
+          set -xe -o pipefail
           rm -rf ${{ github.workspace }}/upload_files
           cp -r ${{ github.workspace }}/pytorch/inductor_log ${{ github.workspace }}/upload_files
           mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/

From acf94d148fc4e8a58362e75e20aac6bd0089d904 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 10:28:21 +0800
Subject: [PATCH 017/160] update

---
 .github/scripts/env.sh             |  0
 .github/workflows/_linux_build.yml |  2 +-
 .github/workflows/_linux_e2e.yml   | 14 ++++----------
 .github/workflows/pull.yml         |  2 +-
 4 files changed, 6 insertions(+), 12 deletions(-)
 mode change 100644 => 100755 .github/scripts/env.sh

diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh
old mode 100644
new mode 100755
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 91efb43611..c4b1b25ef3 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -98,7 +98,7 @@ jobs:
             bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
             export XPU_ONEAPI_PATH="${HOME}/intel/oneapi"
           fi
-          source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
+          ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
           # gcc 11
           source /opt/rh/gcc-toolset-11/enable
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 148f725358..04cde983f6 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -69,6 +69,9 @@ jobs:
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         reference_issue: 1645
+    defaults:
+      run:
+        shell: bash -xe
     steps:
       - name: Cleanup workspace
         run: |
@@ -127,7 +130,7 @@ jobs:
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
           dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-          source ${{ github.workspace }}/.github/scripts/env.sh
+          ${{ github.workspace }}/.github/scripts/env.sh
           sycl-ls
 
       # Nihglty launch
@@ -221,9 +224,6 @@ jobs:
       - name: Download Reference Artifact
         id: reference_id
         run: |
-          set -xe
-          source activate e2e_ci
-          conda install gh --channel conda-forge -y
           artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/')"
           REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
             --json body -q .body |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo 'n/a')"
@@ -233,15 +233,9 @@ jobs:
         id: summary
         if: ${{ ! cancelled() }}
         run: |
-          set -xe -o pipefail
           rm -rf ${{ github.workspace }}/upload_files
           cp -r ${{ github.workspace }}/pytorch/inductor_log ${{ github.workspace }}/upload_files
-          mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
-          find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
-          tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
-          # Print summary
           if [ "${{ inputs.suite }}" != 'pt2e' ];then
-            source activate e2e_ci
             bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
               ${{ github.workspace }}/upload_files \
               ${{ github.workspace }}/reference \
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 3f3b1c1b58..5b539f800d 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -169,7 +169,7 @@ jobs:
           . /etc/os-release
           echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
           echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          source ../torch-xpu-ops/.github/scripts/env.sh
+          ../torch-xpu-ops/.github/scripts/env.sh
           echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
           echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
           echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"

From a6fa7da12b08d087fa9dcd312f30c60e154287a0 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 10:47:27 +0800
Subject: [PATCH 018/160] update

---
 .github/workflows/_linux_e2e.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 04cde983f6..c3bc6d011b 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -71,7 +71,7 @@ jobs:
         reference_issue: 1645
     defaults:
       run:
-        shell: bash -xe
+        shell: bash
     steps:
       - name: Cleanup workspace
         run: |

From 053bed35ff20a4accb5dfc6558ebf03052cd11a4 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 10:51:32 +0800
Subject: [PATCH 019/160] update

---
 .github/workflows/_linux_e2e.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index c3bc6d011b..59806f22da 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -76,7 +76,8 @@ jobs:
       - name: Cleanup workspace
         run: |
           hostname && whoami
-          sudo find ./ /tmp ${HOME}/.triton |grep -vE "^(./|/tmp|${HOME}/.triton)$" |xargs sudo rm -rf
+          sudo find ./ /tmp |grep -vE "^(./|/tmp)$" |xargs sudo rm -rf
+          sudo rm -rf ~/.triton ${HOME}/.triton
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Setup python ${{ inputs.python }}

From 7ee8d4bf76e94beb914bfdc01770a0ac2fcdc545 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 10:59:54 +0800
Subject: [PATCH 020/160] update

---
 .github/workflows/_linux_e2e.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 59806f22da..f162dc590a 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -71,13 +71,14 @@ jobs:
         reference_issue: 1645
     defaults:
       run:
-        shell: bash
+        shell: bash -xe {0}
     steps:
       - name: Cleanup workspace
         run: |
           hostname && whoami
           sudo find ./ /tmp |grep -vE "^(./|/tmp)$" |xargs sudo rm -rf
           sudo rm -rf ~/.triton ${HOME}/.triton
+          clinfo --list
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Setup python ${{ inputs.python }}

From 428e4832f825db449f3930c2e3bc8a9f81d7f0d5 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 11:01:58 +0800
Subject: [PATCH 021/160] update

---
 .github/workflows/_linux_e2e.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index f162dc590a..34861ce551 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -75,9 +75,10 @@ jobs:
     steps:
       - name: Cleanup workspace
         run: |
-          hostname && whoami
+          hostname && whoami && id
           sudo find ./ /tmp |grep -vE "^(./|/tmp)$" |xargs sudo rm -rf
           sudo rm -rf ~/.triton ${HOME}/.triton
+          sudo chmod . 777 -R
           clinfo --list
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4

From c483968b50d109fdfbb368fed6a9b5d8b22d26ee Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 11:20:00 +0800
Subject: [PATCH 022/160] update

---
 .github/workflows/_linux_e2e.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 34861ce551..6a296801e4 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -62,8 +62,7 @@ jobs:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-        - /etc/group:/etc/group
-      options: --device=/dev/mem --device=/dev/dri --privileged --shm-size=8g -u jenkins
+      options: --device=/dev/mem --device=/dev/dri --group-add video --group-add 109 --privileged --shm-size=8g -u jenkins:109
       env:
         AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
         GH_TOKEN: ${{ github.token }}
@@ -78,7 +77,7 @@ jobs:
           hostname && whoami && id
           sudo find ./ /tmp |grep -vE "^(./|/tmp)$" |xargs sudo rm -rf
           sudo rm -rf ~/.triton ${HOME}/.triton
-          sudo chmod . 777 -R
+          sudo chmod 777 . -R
           clinfo --list
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4

From 32474f8efb83454326f117ada9e42cce472bc278 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 11:24:14 +0800
Subject: [PATCH 023/160] update

---
 .github/workflows/_linux_e2e.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 6a296801e4..5cd6d1504f 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -77,7 +77,7 @@ jobs:
           hostname && whoami && id
           sudo find ./ /tmp |grep -vE "^(./|/tmp)$" |xargs sudo rm -rf
           sudo rm -rf ~/.triton ${HOME}/.triton
-          sudo chmod 777 . -R
+          sudo chmod 777 . /__w -R
           clinfo --list
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4

From 59760992ee195fd3fa208a4fb65914eedc311d6b Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 11:24:39 +0800
Subject: [PATCH 024/160] update

---
 .github/workflows/_linux_e2e.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 5cd6d1504f..2c02abeed8 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -77,7 +77,7 @@ jobs:
           hostname && whoami && id
           sudo find ./ /tmp |grep -vE "^(./|/tmp)$" |xargs sudo rm -rf
           sudo rm -rf ~/.triton ${HOME}/.triton
-          sudo chmod 777 . /__w -R
+          sudo chmod 777 . /__w -R || true
           clinfo --list
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4

From 28e53b287110e7c088f986e43ed9e55bc6b65f4d Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 11:42:38 +0800
Subject: [PATCH 025/160] update

---
 .github/workflows/_linux_e2e.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 2c02abeed8..f237125867 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -62,7 +62,8 @@ jobs:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: --device=/dev/mem --device=/dev/dri --group-add video --group-add 109 --privileged --shm-size=8g -u jenkins:109
+      options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g \
+                -u $(id -u):$(getent group render | cut -d ':' -f3)
       env:
         AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
         GH_TOKEN: ${{ github.token }}

From 2e9921e655a1d6cfb4480dcba391dccfc3a63c28 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 11:45:55 +0800
Subject: [PATCH 026/160] update

---
 .github/workflows/_linux_e2e.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index f237125867..4b224a920c 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -62,8 +62,9 @@ jobs:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g \
-                -u $(id -u):$(getent group render | cut -d ':' -f3)
+      options: --device=/dev/mem --device=/dev/dri --group-add video 
+               --privileged --shm-size=8g
+               -u $(id -u):$(getent group render | cut -d ':' -f3)
       env:
         AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
         GH_TOKEN: ${{ github.token }}

From b058b1a605a79cbb45723c43cc0683060c43e42f Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 12:12:26 +0800
Subject: [PATCH 027/160] update

---
 .github/workflows/_linux_e2e.yml | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 4b224a920c..9e3d77c9b6 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -53,8 +53,22 @@ on:
 permissions: read-all
 
 jobs:
-  e2e_test:
+  get_runner:
     runs-on: ${{ inputs.runner }}
+    outputs:
+      test_host: ${{ steps.runner-info.outputs.test_host }}
+      test_user: ${{ steps.runner-info.outputs.test_user }}
+      test_group: ${{ steps.runner-info.outputs.test_group }}
+    steps:
+      - name: Get runner info
+        id: runner-info
+        run: |
+          echo "test_host=${RUNNER_NAME}" >> ${GITHUB_OUTPUT}
+          echo "test_user=$(id -u)" >> ${GITHUB_OUTPUT}
+          echo "test_group=$(getent group render |cut -d: -f3)" >> ${GITHUB_OUTPUT}
+  e2e_test:
+    runs-on: ${{ needs.get_runner.outputs.test_host }}
+    needs: [get_runner]
     timeout-minutes: 3600
     permissions:
       issues: write
@@ -62,9 +76,8 @@ jobs:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: --device=/dev/mem --device=/dev/dri --group-add video 
-               --privileged --shm-size=8g
-               -u $(id -u):$(getent group render | cut -d ':' -f3)
+      options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
+              -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
       env:
         AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
         GH_TOKEN: ${{ github.token }}
@@ -77,9 +90,8 @@ jobs:
       - name: Cleanup workspace
         run: |
           hostname && whoami && id
-          sudo find ./ /tmp |grep -vE "^(./|/tmp)$" |xargs sudo rm -rf
-          sudo rm -rf ~/.triton ${HOME}/.triton
-          sudo chmod 777 . /__w -R || true
+          find ./ |grep -v "^\./$" |xargs rm -rf
+          rm -rf ~/.triton ${HOME}/.triton
           clinfo --list
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4

From 93e5444128d9422f2c80c0c29135c7248b9e1ecb Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 12:36:05 +0800
Subject: [PATCH 028/160] update

---
 .github/workflows/_linux_e2e.yml | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 9e3d77c9b6..27ea4d365f 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -55,6 +55,7 @@ permissions: read-all
 jobs:
   get_runner:
     runs-on: ${{ inputs.runner }}
+    name: ${{ github.runner.name }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
       test_user: ${{ steps.runner-info.outputs.test_user }}
@@ -66,6 +67,14 @@ jobs:
           echo "test_host=${RUNNER_NAME}" >> ${GITHUB_OUTPUT}
           echo "test_user=$(id -u)" >> ${GITHUB_OUTPUT}
           echo "test_group=$(getent group render |cut -d: -f3)" >> ${GITHUB_OUTPUT}
+
+          # show host info
+          cat /etc/os-release
+          uname -a
+          source /opt/intel/oneapi/setvars.sh
+          sycl-ls
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+
   e2e_test:
     runs-on: ${{ needs.get_runner.outputs.test_host }}
     needs: [get_runner]
@@ -146,8 +155,6 @@ jobs:
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
           dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-          ${{ github.workspace }}/.github/scripts/env.sh
-          sycl-ls
 
       # Nihglty launch
       - name: Nightly Huggingface Full Test

From 8baec841fcbb14d3ac09056bd96d748c39cbecee Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 15:25:27 +0800
Subject: [PATCH 029/160] update

---
 .github/workflows/_linux_e2e.yml | 115 ++++++++++++++++++++-----------
 1 file changed, 76 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 27ea4d365f..648ac304af 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -53,9 +53,8 @@ on:
 permissions: read-all
 
 jobs:
-  get_runner:
+  get_e2e_runner:
     runs-on: ${{ inputs.runner }}
-    name: ${{ github.runner.name }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
       test_user: ${{ steps.runner-info.outputs.test_user }}
@@ -64,54 +63,57 @@ jobs:
       - name: Get runner info
         id: runner-info
         run: |
+          # get test runner
           echo "test_host=${RUNNER_NAME}" >> ${GITHUB_OUTPUT}
           echo "test_user=$(id -u)" >> ${GITHUB_OUTPUT}
           echo "test_group=$(getent group render |cut -d: -f3)" >> ${GITHUB_OUTPUT}
-
           # show host info
           cat /etc/os-release
           uname -a
           source /opt/intel/oneapi/setvars.sh
           sycl-ls
           dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+      - name: Cleanup workspace
+        if: ${{ always() }}
+        run: |
+          # clean docker cache
+          docker stop $(docker ps -aq) || true
+          docker system prune -af || true
+          # clean files
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
 
   e2e_test:
-    runs-on: ${{ needs.get_runner.outputs.test_host }}
-    needs: [get_runner]
+    runs-on: ${{ needs.get_e2e_runner.outputs.test_host }}
+    needs: get_e2e_runner
     timeout-minutes: 3600
-    permissions:
-      issues: write
     container:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
+              -u ${{ needs.get_e2e_runner.outputs.test_user }}:${{ needs.get_e2e_runner.outputs.test_group }}
       env:
         AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-        reference_issue: 1645
     defaults:
       run:
         shell: bash -xe {0}
     steps:
-      - name: Cleanup workspace
-        run: |
-          hostname && whoami && id
-          find ./ |grep -v "^\./$" |xargs rm -rf
-          rm -rf ~/.triton ${HOME}/.triton
-          clinfo --list
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Setup python ${{ inputs.python }}
+      - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ inputs.python }}
-      - name: Check python
+      - name: Check runner
         run: |
-          which python
+          hostname && whoami && id
+          clinfo --list
+          gcc -v && g++ -v
+          which python && which pip
           python -V
+          pip install -U pip wheel setuptools
           pip list
           pip install pandas scipy psutil requests
       - name: Install oneAPI DLE
@@ -244,24 +246,61 @@ jobs:
           dt: ${{ inputs.dt }}
           scenario: ${{ inputs.scenario }}
 
-      - name: Download Reference Artifact
-        id: reference_id
+      - name: Get archieve files
+        if: ${{ ! cancelled() }}
+        run: |
+          rm -rf ${{ github.workspace }}/upload_files
+          cp -r ${{ github.workspace }}/pytorch/inductor_log ${{ github.workspace }}/upload_files
+      - name: Upload Inductor XPU E2E Data
+        if: ${{ ! cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
+          path: ${{ github.workspace }}/upload_files
+
+  e2e_summary:
+    runs-on: ubuntu-latest
+    if: ${{ always() }}
+    needs: e2e_test
+    permissions:
+      issues: write
+    env:
+      GH_TOKEN: ${{ github.token }}
+      REFERENCE_ISSUE_ID: 1645
+    steps:
+      - name: Install gh
+        run: |
+          apt-get update
+          apt-get install gh rsync -y
+          find ./ |grep -v "^\./$" |xargs rm -rf
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
+      - name: Download Target Artifact
+        run: |
+          target_dir="Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}"
+          gh --repo intel/torch-xpu-ops run download ${GITHUB_RUN_ID} -n "${target_dir}"
+          if [ -d "${target_dir}" ];then
+            rsync -avzq --delete ${target_dir}/ target/
+            rm -rf ${target_dir}/
+          fi
+      - name: Download Baseline Artifact
         run: |
           artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/')"
-          REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
+          REFERENCE_RUN_ID="$(gh --repo intel/torch-xpu-ops issue view ${REFERENCE_ISSUE_ID} \
             --json body -q .body |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo 'n/a')"
-          gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*" && \
-            rm -rf reference && mv Inductor-*-XPU-E2E-* reference || echo 'No reference'
-      - name: Summarize archieve files
-        id: summary
+          gh --repo intel/torch-xpu-ops run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*" || true
+          baseline_dir="$(find . -name 'Inductor-*-XPU-E2E-*')"
+          if [ -d "${baseline_dir}" ];then
+            rsync -avzq --delete ${baseline_dir}/ baseline/
+            rm -rf ${baseline_dir}/
+          fi
+      - name: Get summary
         if: ${{ ! cancelled() }}
         run: |
-          rm -rf ${{ github.workspace }}/upload_files
-          cp -r ${{ github.workspace }}/pytorch/inductor_log ${{ github.workspace }}/upload_files
           if [ "${{ inputs.suite }}" != 'pt2e' ];then
             bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
-              ${{ github.workspace }}/upload_files \
-              ${{ github.workspace }}/reference \
+              ${{ github.workspace }}/target \
+              ${{ github.workspace }}/baseline \
             >> ${GITHUB_STEP_SUMMARY}
             exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
             if [ ${exit_label} -ne 0 ];then
@@ -270,7 +309,7 @@ jobs:
               exit ${exit_label}
             fi
           fi
-          pt2e_summary_csv="$(find ${{ github.workspace }}/upload_files/ -name "summary.csv")"
+          pt2e_summary_csv="$(find ${{ github.workspace }}/target/ -name "summary.csv")"
           if [ -f "${pt2e_summary_csv}" ];then
             cat ${pt2e_summary_csv}
             failed_num=$(grep ',failed' ${pt2e_summary_csv} |wc -l)
@@ -278,20 +317,18 @@ jobs:
               echo "[Warning] PT2E has failures!"
             fi
           fi
-      - name: Upload Inductor XPU E2E Data
-        if: ${{ ! cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/upload_files
       - name: Upload Reference Run ID
-        if: ${{ ! contains(inputs.test_type, 'ondemand') }}
+        if: ${{ ! contains(inputs.test_type, 'ondemand') && github.repository_owner == 'intel' }}
         run: |
-          gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body 2>&1 |tee new_body.txt 2>&1
+          gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee new_body.txt 2>&1
           has_or_not="$(grep 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt |wc -l)"
           if [ ${has_or_not} -ne 0 ];then
             sed -i "s/Inductor-${{ inputs.test_type }}-LTS2:.*/Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}/" new_body.txt
           else
             echo "Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}" |tee -a new_body.txt
           fi
-          gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt
+          gh --repo ${GITHUB_REPOSITORY} issue edit ${REFERENCE_ISSUE_ID} --body-file new_body.txt
+      - name: Cleanup workspace
+        if: ${{ always() }}
+        run: |
+          find ./ |grep -v "^\./$" |xargs rm -rf

From d4c78aa4992c0ef8338c01641d6e10ec0b441dc5 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 15:32:27 +0800
Subject: [PATCH 030/160] update

---
 .../actions/inductor-xpu-e2e-test/action.yml  | 26 +++----------------
 .github/workflows/_linux_e2e.yml              |  1 +
 2 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml
index 8f9b90780f..68a0e5f5a1 100644
--- a/.github/actions/inductor-xpu-e2e-test/action.yml
+++ b/.github/actions/inductor-xpu-e2e-test/action.yml
@@ -1,14 +1,14 @@
 name: inductor-xpu-e2e-test
 
 inputs:
+  env_prepare:
+    required: false
+    description: If set to any value, will prepare suite test env
   suite:
     required: true
     type: string
     default: 'huggingface'
     description: Dynamo benchmarks test suite. huggingface,timm_models,torchbench. Delimiter is comma
-  env_prepare:
-    required: false
-    description: If set to any value, will prepare suite test env
   dt:
     required: true
     type: string
@@ -24,28 +24,12 @@ inputs:
     type: string
     default: 'accuracy'
     description: accuracy,performance. Delimiter is comma
-  cards:
-    required: false
-    type: string
-    default: 'all'
-    description: which cards can be used in the test
-  pytorch:
-    required: false
-    type: string
-    default: 'main'
-    description: Pytorch branch/commit
-  driver:
-    required: false
-    type: string
-    default: 'lts'
-    description: Driver lts/rolling
 
 runs:
   using: composite
   steps:
     - name: Prepare ENV
       if: ${{ inputs.env_prepare }}
-      shell: bash
       run: |
         if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
           python -c "import torch, torchvision, torchaudio"
@@ -78,7 +62,6 @@ runs:
         fi
         pip list |grep -E 'intel|torch'
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-      shell: bash
       run: |
         cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
         cd ./pytorch
@@ -139,9 +122,6 @@ runs:
         done
 
     - name: Summary E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-      env:
-        HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }}
-      shell: bash
       run: |
         cd ./pytorch
         rm -f inductor_log/summary_accuracy.csv
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 648ac304af..b501baaf3c 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -96,6 +96,7 @@ jobs:
         AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+        MODEL_ONLY_NAME: ${{ inputs.model }}
     defaults:
       run:
         shell: bash -xe {0}

From a8154f14d41a762d9b29735fa3309dcdc8cb9c5a Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 15:42:49 +0800
Subject: [PATCH 031/160] update

---
 .github/workflows/_linux_e2e.yml | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index b501baaf3c..45d560c0c1 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -64,9 +64,9 @@ jobs:
         id: runner-info
         run: |
           # get test runner
-          echo "test_host=${RUNNER_NAME}" >> ${GITHUB_OUTPUT}
-          echo "test_user=$(id -u)" >> ${GITHUB_OUTPUT}
-          echo "test_group=$(getent group render |cut -d: -f3)" >> ${GITHUB_OUTPUT}
+          echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT}
+          echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
+          echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
           # show host info
           cat /etc/os-release
           uname -a
@@ -80,6 +80,7 @@ jobs:
           docker stop $(docker ps -aq) || true
           docker system prune -af || true
           # clean files
+          ls -al
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
 
   e2e_test:
@@ -93,7 +94,7 @@ jobs:
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
               -u ${{ needs.get_e2e_runner.outputs.test_user }}:${{ needs.get_e2e_runner.outputs.test_group }}
       env:
-        AGENT_TOOLSDIRECTORY: "${{ github.workspace }}/_tools"
+        AGENT_TOOLSDIRECTORY: /opt/_tools
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         MODEL_ONLY_NAME: ${{ inputs.model }}
@@ -101,8 +102,6 @@ jobs:
       run:
         shell: bash -xe {0}
     steps:
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
@@ -117,6 +116,10 @@ jobs:
           pip install -U pip wheel setuptools
           pip list
           pip install pandas scipy psutil requests
+          ls -al
+          find ./ |grep -v "^\./$" |xargs rm -rf
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Install oneAPI DLE
         if: ${{ inputs.oneapi != 'installed' }}
         run: |

From f25ecfeeec280d77f222f774e15f081633dc31fb Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 15:52:32 +0800
Subject: [PATCH 032/160] update

---
 .github/actions/inductor-xpu-e2e-test/action.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml
index 68a0e5f5a1..904c6c8840 100644
--- a/.github/actions/inductor-xpu-e2e-test/action.yml
+++ b/.github/actions/inductor-xpu-e2e-test/action.yml
@@ -27,6 +27,9 @@ inputs:
 
 runs:
   using: composite
+  defaults:
+    run:
+      shell: bash -xe {0}
   steps:
     - name: Prepare ENV
       if: ${{ inputs.env_prepare }}
@@ -75,7 +78,6 @@ runs:
               contains_status="continue"
             }
         }
-        set -xe
         xpu_num=$(clinfo --list |awk 'BEGIN{gpu=0;}{if(gpu==1 && $0~/Platform/){gpu=0;}; if(gpu==1){print $0;}; if($0~/Platform.*Graphics/){gpu=1;}}' |wc -l)
         cores_per_instance="$(lscpu |grep -E 'Core\(s\) per socket:|Socket\(s\):' |awk -v i="${xpu_num}" 'BEGIN{sum=1}{sum*=$NF}END{print sum/i}')"
         export OMP_NUM_THREADS=${cores_per_instance}
@@ -134,7 +136,6 @@ runs:
         cp ./.github/scripts/inductor_summary.py ./pytorch
         cd ./pytorch
         pip install styleFrame scipy pandas
-        set -xe
         dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
         mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
         suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')

From e06e1bd1c1e5feb1ffc55cd50f83329d5f92e6e7 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 16:01:17 +0800
Subject: [PATCH 033/160] update

---
 .github/actions/inductor-xpu-e2e-test/action.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml
index 904c6c8840..7f7b4f3165 100644
--- a/.github/actions/inductor-xpu-e2e-test/action.yml
+++ b/.github/actions/inductor-xpu-e2e-test/action.yml
@@ -25,11 +25,12 @@ inputs:
     default: 'accuracy'
     description: accuracy,performance. Delimiter is comma
 
+defaults:
+  run:
+    shell: bash -xe {0}
+
 runs:
   using: composite
-  defaults:
-    run:
-      shell: bash -xe {0}
   steps:
     - name: Prepare ENV
       if: ${{ inputs.env_prepare }}

From c437f29b1a2adfd32397323390d2fdd535c8a1d4 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 16:15:24 +0800
Subject: [PATCH 034/160] update

---
 .../actions/inductor-xpu-e2e-test/action.yml  |  7 +++---
 .github/actions/pt2e/action.yml               | 23 ++++---------------
 .github/workflows/_linux_e2e.yml              |  2 ++
 3 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml
index 7f7b4f3165..d269ce6d12 100644
--- a/.github/actions/inductor-xpu-e2e-test/action.yml
+++ b/.github/actions/inductor-xpu-e2e-test/action.yml
@@ -25,15 +25,12 @@ inputs:
     default: 'accuracy'
     description: accuracy,performance. Delimiter is comma
 
-defaults:
-  run:
-    shell: bash -xe {0}
-
 runs:
   using: composite
   steps:
     - name: Prepare ENV
       if: ${{ inputs.env_prepare }}
+      shell: bash -xe {0}
       run: |
         if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
           python -c "import torch, torchvision, torchaudio"
@@ -66,6 +63,7 @@ runs:
         fi
         pip list |grep -E 'intel|torch'
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
+      shell: bash -xe {0}
       run: |
         cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
         cd ./pytorch
@@ -125,6 +123,7 @@ runs:
         done
 
     - name: Summary E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
+      shell: bash -xe {0}
       run: |
         cd ./pytorch
         rm -f inductor_log/summary_accuracy.csv
diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml
index ac4067e7ce..7343913e7a 100644
--- a/.github/actions/pt2e/action.yml
+++ b/.github/actions/pt2e/action.yml
@@ -14,28 +14,19 @@ inputs:
     type: string
     default: 'accuracy'
     description: accuracy,performance. Delimiter is comma
-  hf_token:
-    required: false
-    description: HUGGING_FACE_HUB_TOKEN for torchbench test
   pytorch:
     required: false
     type: string
     default: 'main'
     description: Pytorch branch/commit
-  driver:
-    required: false
-    type: string
-    default: 'lts'
-    description: Driver lts/rolling
 
 runs:
   using: composite
   steps:
     - name: Prepare ENV
       if: ${{ inputs.env_prepare }}
-      shell: bash
+      shell: bash -xe {0}
       run: |
-        source activate e2e_ci
         # accuracy code
         if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then
           rm -rf pt2e-accuracy
@@ -65,8 +56,8 @@ runs:
           cd pt2e-performance
           # remove deps which will reinstall torch
           pip install --no-deps accelerate
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID
-          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/$TIMM_COMMIT_ID/requirements.txt | grep -vE torch)
+          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
+          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
           pip install -U transformers==4.44.2
           sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g'  requirements.txt
           git status && git diff
@@ -89,14 +80,8 @@ runs:
           bash valprep.sh
         fi
     - name: PT2E Test (${{ inputs.dt }}  ${{ inputs.scenario }})
-      env:
-        HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }}
-        NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-        DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-      shell: bash
+      shell: bash -xe {0}
       run: |
-        source activate e2e_ci
-        set -xe
         pt2e_logs_dir="${{ github.workspace }}/../pytorch/inductor_log/pt2e"
         rm -rf "${pt2e_logs_dir}" && mkdir -p "${pt2e_logs_dir}"
         echo "Mode,Model,Dtype,Result" |tee ${pt2e_logs_dir}/summary.csv
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 45d560c0c1..5b56b7ded0 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -194,6 +194,7 @@ jobs:
         if: ${{ contains(inputs.test_type, 'nightly') }}
         uses: ./.github/actions/pt2e
         with:
+          env_prepare: true
           dt: float32,int8
           scenario: accuracy,performance
 
@@ -229,6 +230,7 @@ jobs:
         if: ${{ contains(inputs.test_type, 'weekly') }}
         uses: ./.github/actions/pt2e
         with:
+          env_prepare: true
           dt: float32,int8
           scenario: accuracy,performance
 

From 0ae0bb1f85fb68f53fa14f83badf096d373ed228 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 16:19:26 +0800
Subject: [PATCH 035/160] update

---
 .github/actions/pt2e/action.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml
index 7343913e7a..65fde6a03b 100644
--- a/.github/actions/pt2e/action.yml
+++ b/.github/actions/pt2e/action.yml
@@ -92,14 +92,14 @@ runs:
           do
             if [[ "${{ inputs.dt }}" == *"float32"* ]];then
               ${cmd_line} --model_list ${model_name} --is_fp32 2>&1 |tee "${pt2e_logs_dir}/accuracy-float32-${model_name}.log" || true
-              grep -i 'Acc.1.*Acc.5' "${pt2e_logs_dir}/accuracy-float32-${model_name}.log" |tail -n 1 |awk -v m="${model_name}" '
+              (grep -i 'Acc.1.*Acc.5' "${pt2e_logs_dir}/accuracy-float32-${model_name}.log" || echo "failed a failed") 2>&1 |tail -n 1 |awk -v m="${model_name}" '
                       BEGIN{acc1 = "failed"; acc5 = "failed";}
                       {acc1 = $(NF - 2); acc5 = $NF;}
                       END{printf("Accuracy,%s,float32,%s,%s\n", m, acc1, acc5) }' |tee -a ${pt2e_logs_dir}/summary.csv
             fi
             if [[ "${{ inputs.dt }}" == *"int8"* ]];then
               ${cmd_line}  --model_list ${model_name} 2>&1 |tee "${pt2e_logs_dir}/accuracy-int8-${model_name}.log" || true
-              grep -i 'Acc.1.*Acc.5' "${pt2e_logs_dir}/accuracy-int8-${model_name}.log" |tail -n 1 |awk -v m="${model_name}" '
+              (grep -i 'Acc.1.*Acc.5' "${pt2e_logs_dir}/accuracy-int8-${model_name}.log" || echo "failed a failed") 2>&1 |tail -n 1 |awk -v m="${model_name}" '
                       BEGIN{acc1 = "failed"; acc5 = "failed";}
                       {acc1 = $(NF - 2); acc5 = $NF;}
                       END{printf("Accuracy,%s,int8,%s,%s\n", m, acc1, acc5) }' |tee -a ${pt2e_logs_dir}/summary.csv

From d4da95d9593323be1d2e807183fae747135f58ae Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 17:07:57 +0800
Subject: [PATCH 036/160] update

---
 .github/workflows/_linux_e2e.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 5b56b7ded0..89d002da98 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -276,8 +276,8 @@ jobs:
     steps:
       - name: Install gh
         run: |
-          apt-get update
-          apt-get install gh rsync -y
+          sudo apt-get update
+          sudo apt-get install gh rsync -y
           find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4

From db17d7db6a8d2c6620dee410fc234f97b12dc9f1 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 17:37:18 +0800
Subject: [PATCH 037/160] update

---
 .github/workflows/_linux_e2e.yml | 48 +++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 89d002da98..50d9ddc96e 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -265,40 +265,54 @@ jobs:
           path: ${{ github.workspace }}/upload_files
 
   e2e_summary:
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, Linux, X64]
     if: ${{ always() }}
     needs: e2e_test
     permissions:
       issues: write
-    env:
-      GH_TOKEN: ${{ github.token }}
-      REFERENCE_ISSUE_ID: 1645
+    container:
+      image: ubuntu:24.04
+      env:
+        GH_TOKEN: ${{ github.token }}
+        REFERENCE_ISSUE_ID: 1645
+    defaults:
+      run:
+        shell: bash -xe {0}
     steps:
       - name: Install gh
         run: |
-          sudo apt-get update
-          sudo apt-get install gh rsync -y
-          find ./ |grep -v "^\./$" |xargs rm -rf
+          apt-get update
+          apt-get install gh rsync -y
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download Target Artifact
         run: |
           target_dir="Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}"
-          gh --repo intel/torch-xpu-ops run download ${GITHUB_RUN_ID} -n "${target_dir}"
+          gh --repo ${GITHUB_REPOSITORY} run download ${GITHUB_RUN_ID} -n "${target_dir}"
           if [ -d "${target_dir}" ];then
             rsync -avzq --delete ${target_dir}/ target/
+            ls -al target/
             rm -rf ${target_dir}/
+          else
+            echo "No artifacts!"
+            exit 1
           fi
       - name: Download Baseline Artifact
         run: |
           artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/')"
-          REFERENCE_RUN_ID="$(gh --repo intel/torch-xpu-ops issue view ${REFERENCE_ISSUE_ID} \
-            --json body -q .body |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo 'n/a')"
-          gh --repo intel/torch-xpu-ops run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*" || true
-          baseline_dir="$(find . -name 'Inductor-*-XPU-E2E-*')"
-          if [ -d "${baseline_dir}" ];then
-            rsync -avzq --delete ${baseline_dir}/ baseline/
-            rm -rf ${baseline_dir}/
+          gh --repo intel/torch-xpu-ops issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee body.txt
+          REFERENCE_RUN_ID="$(cat body.txt |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo '')"
+          if [ "${REFERENCE_RUN_ID}" != "" ];then
+            gh --repo intel/torch-xpu-ops run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
+            baseline_dir="$(find . -name 'Inductor-*-XPU-E2E-*' -type d)"
+            if [ -d "${baseline_dir}" ];then
+              rsync -avzq --delete ${baseline_dir}/ baseline/
+              ls -al baseline/
+              rm -rf ${baseline_dir}/
+            fi
+          else
+            echo "No reference!"
+            mkdir -p baseline
           fi
       - name: Get summary
         if: ${{ ! cancelled() }}
@@ -334,7 +348,3 @@ jobs:
             echo "Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}" |tee -a new_body.txt
           fi
           gh --repo ${GITHUB_REPOSITORY} issue edit ${REFERENCE_ISSUE_ID} --body-file new_body.txt
-      - name: Cleanup workspace
-        if: ${{ always() }}
-        run: |
-          find ./ |grep -v "^\./$" |xargs rm -rf

From b9c247ab80d7665e55998da63943697fc603fb08 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 18:07:43 +0800
Subject: [PATCH 038/160] update

---
 .github/workflows/_linux_e2e.yml | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 50d9ddc96e..5da82c05b1 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -282,23 +282,18 @@ jobs:
       - name: Install gh
         run: |
           apt-get update
-          apt-get install gh rsync -y
+          apt-get install gh rsync ca-certificates -y
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download Target Artifact
         run: |
+          mkdir target/
+          cd target/
           target_dir="Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}"
           gh --repo ${GITHUB_REPOSITORY} run download ${GITHUB_RUN_ID} -n "${target_dir}"
-          if [ -d "${target_dir}" ];then
-            rsync -avzq --delete ${target_dir}/ target/
-            ls -al target/
-            rm -rf ${target_dir}/
-          else
-            echo "No artifacts!"
-            exit 1
-          fi
       - name: Download Baseline Artifact
         run: |
+          mkdir baseline/
           artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/')"
           gh --repo intel/torch-xpu-ops issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee body.txt
           REFERENCE_RUN_ID="$(cat body.txt |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo '')"
@@ -310,18 +305,12 @@ jobs:
               ls -al baseline/
               rm -rf ${baseline_dir}/
             fi
-          else
-            echo "No reference!"
-            mkdir -p baseline
           fi
       - name: Get summary
         if: ${{ ! cancelled() }}
         run: |
           if [ "${{ inputs.suite }}" != 'pt2e' ];then
-            bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
-              ${{ github.workspace }}/target \
-              ${{ github.workspace }}/baseline \
-            >> ${GITHUB_STEP_SUMMARY}
+            bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
             exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
             if [ ${exit_label} -ne 0 ];then
               grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
@@ -329,7 +318,7 @@ jobs:
               exit ${exit_label}
             fi
           fi
-          pt2e_summary_csv="$(find ${{ github.workspace }}/target/ -name "summary.csv")"
+          pt2e_summary_csv="$(find ./target/ -name "summary.csv")"
           if [ -f "${pt2e_summary_csv}" ];then
             cat ${pt2e_summary_csv}
             failed_num=$(grep ',failed' ${pt2e_summary_csv} |wc -l)

From 9ae98ea3f46f7f343e60da632294c113bbeb04b9 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 18:31:38 +0800
Subject: [PATCH 039/160] update

---
 .github/workflows/_linux_build.yml | 29 ++++-------------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 4774e7e327..776c03a59e 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -74,30 +74,6 @@ jobs:
         uses: actions/checkout@v4
         with:
           path: torch-xpu-ops
-      - name: Build Triton XPU
-        run: |
-          # gcc 13
-          dnf install -y gcc-toolset-13-gcc-c++
-          source /opt/rh/gcc-toolset-13/enable
-          dnf install -y zlib-devel
-          cd ../ && rm -rf pytorch
-          git clone https://github.com/pytorch/pytorch pytorch
-          cd pytorch
-          if [ -z ${{ inputs.triton }} ]; then
-            TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
-          else
-            TRITON_COMMIT_ID="${{ inputs.triton }}"
-          fi
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install cmake ninja pybind11
-            rm -rf pytorch_triton_xpu-*.whl
-            TRITON_VERSION_NAME="$(
-              curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
-                      grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
-            )"
-            python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
-            cp pytorch_triton_xpu-*.whl ${{ github.workspace }}
-          fi
       - name: Build Pytorch XPU
         run: |
           set -xe -o pipefail
@@ -122,7 +98,7 @@ jobs:
             bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
             export XPU_ONEAPI_PATH="${HOME}/intel/oneapi"
           fi
-          ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
+          source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
           # gcc 11
           source /opt/rh/gcc-toolset-11/enable
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
@@ -134,6 +110,9 @@ jobs:
             2>&1 |tee ${{ github.workspace }}/build_pytorch_${PYTORCH_COMMIT//\//-}.log
       - name: Build Triton
         run: |
+          # gcc 13
+          dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
+          source /opt/rh/gcc-toolset-13/enable
           cd ./pytorch
           pip install cmake ninja pybind11
           rm -rf pytorch_triton_xpu-*.whl

From c06f1eea46da0b703d40e8bfa140f329f957f91e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 18 Jul 2025 18:42:37 +0800
Subject: [PATCH 040/160] update

---
 .github/workflows/_linux_ut.yml | 84 ++++++++++++---------------------
 1 file changed, 30 insertions(+), 54 deletions(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 29292b6285..1a10201e40 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -3,46 +3,38 @@ name: Linux UT Test
 on:
   workflow_call:
     inputs:
+      runner:
+        required: true
+        type: string
+        description: Runner label
+      test_type:
+        required: true
+        type: string
+        description: Test scope
       pytorch:
-        required: false
         type: string
         default: 'main'
-        description: Pytorch branch/commit
-      keep_torch_xpu_ops:
-        required: false
+        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      torch_xpu_ops:
         type: string
-        default: 'false'
-        description: Keep torch-xpu-ops pin. `true` means use pined commit
-      triton:
-        required: false
+        default: 'main'
+        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
+      oneapi:
         type: string
-        default: ''
-        description: Triton commit. Use pytorch pined commit by default
+        default: 'installed'
+        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+      python:
+        type: string
+        default: '3.10'
+        description: Python version
       ut:
         required: true
         type: string
-        default: ''
         description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,torch_xpu` Delimiter is comma
       disabled_tests:
-        required: false
         type: string
         default: ''
         description: List disabled tests, such as disable_ut or disable_distributed
-      python:
-        required: false
-        type: string
-        default: '3.10'
-        description: Python version
-      runner:
-        required: true
-        type: string
-        default: 'linux.idc.xpu'
-        description: Runner label
-      driver:
-        required: false
-        type: string
-        default: 'lts'
-        description: Driver lts/rolling
 
 permissions: read-all
 
@@ -53,8 +45,6 @@ jobs:
     timeout-minutes: 300
     env:
       GH_TOKEN: ${{ github.token }}
-      NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-      DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
       ut_skip_issue: 1624
     strategy:
       fail-fast: false
@@ -151,7 +141,7 @@ jobs:
             additional_steps: |
               pip install pytest pytest-timeout
           - name: 'xpu_profiling'
-            condition: ${{ inputs.driver == 'rolling' && contains(inputs.ut, 'xpu_profiling') }}
+            condition: ${{ contains(inputs.ut, 'xpu_profiling') }}
             command_script: |
               # RN50 Test
               PROFILE=1 python -u test/profiling/rn50.py -a resnet50 --dummy ./ --num-iterations 20 --xpu 0
@@ -200,11 +190,18 @@ jobs:
                 rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
           conda create -n $CONDA_ENV_NAME python=${{ inputs.python }} cmake ninja -y
           source activate $CONDA_ENV_NAME
+      - name: Install oneAPI DLE
+        if: ${{ inputs.oneapi != 'installed' }}
+        run: |
+          rm -rf ~/intel ~/.intel /opt/intel
+          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
       - name: Download Pytorch wheel
-        if: ${{ inputs.pytorch != 'nightly_wheel' }}
+        if: ${{ ! contains(inputs.test_type, 'wheel') }}
         uses: actions/download-artifact@v4
         with:
-          name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
+          pattern: Torch-XPU-Wheel-*
       - name: Prepare Stock Pytorch
         run: |
           cd ../
@@ -344,7 +341,7 @@ jobs:
         - name: 'torch_xpu'
           condition: ${{ contains(inputs.ut, 'torch_xpu') }}
         - name: 'xpu_profiling'
-          condition: ${{ inputs.driver == 'rolling' && contains(inputs.ut, 'xpu_profiling') }}
+          condition: ${{ contains(inputs.ut, 'xpu_profiling') }}
     steps:
       - name: Get matrix UT value
         run: |
@@ -396,8 +393,6 @@ jobs:
     timeout-minutes: 60
     env:
       GH_TOKEN: ${{ github.token }}
-      NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-      DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
       ut_skip_issue: 1624
     steps:
       - name: Checkout torch-xpu-ops
@@ -450,25 +445,6 @@ jobs:
             cd third_party/torch-xpu-ops
             git checkout ${TORCH_XPU_OPS_COMMIT}
           fi
-      - name: Triton Installation
-        run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
-          cd ../pytorch
-          if [ -z ${{ inputs.triton }} ]; then
-            TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
-          else
-            TRITON_COMMIT_ID="${{ inputs.triton }}"
-          fi
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install cmake ninja pybind11
-            rm -rf pytorch_triton_xpu-*.whl
-            TRITON_VERSION_NAME="$(
-              curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
-                      grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
-            )"
-            python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
-            pip install pytorch_triton_xpu-*.whl
-          fi
       - name: Torch Config
         run: |
           source activate xpu_op_${ZE_AFFINITY_MASK}

From 6e14f8b58306859a4de90ddf107f470f92b2a581 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 10:52:41 +0800
Subject: [PATCH 041/160] update

---
 .github/workflows/_linux_e2e.yml |  16 +-
 .github/workflows/_linux_ut.yml  | 331 ++++++++++++++++++-------------
 2 files changed, 204 insertions(+), 143 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 5da82c05b1..021f906d09 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -108,6 +108,8 @@ jobs:
           python-version: ${{ inputs.python }}
       - name: Check runner
         run: |
+          ls -al
+          find ./ |grep -v "^\./$" |xargs rm -rf
           hostname && whoami && id
           clinfo --list
           gcc -v && g++ -v
@@ -115,15 +117,14 @@ jobs:
           python -V
           pip install -U pip wheel setuptools
           pip list
-          pip install pandas scipy psutil requests
-          ls -al
-          find ./ |grep -v "^\./$" |xargs rm -rf
+          uname -a
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Install oneAPI DLE
         if: ${{ inputs.oneapi != 'installed' }}
         run: |
-          rm -rf ~/intel ~/.intel /opt/intel
+          rm -rf ~/intel ~/.intel
           wget -q -O oneapi.sh "${{ inputs.oneapi }}"
           bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
           echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
@@ -145,7 +146,12 @@ jobs:
             pip install --force-reinstall ${{ github.workspace }}/*.whl
           fi
           TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          git clone https://github.com/pytorch/pytorch pytorch
+          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
+            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
+          else
+            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
+          fi
+          git clone ${PYTORCH_REPO} pytorch
           cd pytorch
           git checkout ${TORCH_COMMIT_ID}
           # apply extra PRs for stock pytorch
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 1a10201e40..b905428035 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -39,60 +39,93 @@ on:
 permissions: read-all
 
 jobs:
+  get_ut_runner:
+    runs-on: ${{ inputs.runner }}
+    outputs:
+      test_host: ${{ steps.runner-info.outputs.test_host }}
+      test_user: ${{ steps.runner-info.outputs.test_user }}
+      test_group: ${{ steps.runner-info.outputs.test_group }}
+    steps:
+      - name: Get runner info
+        id: runner-info
+        run: |
+          # get test runner
+          echo "test_host=${NODE_LABEL}" |tee -a ${GITHUB_OUTPUT}
+          echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
+          echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
+          # show host info
+          cat /etc/os-release
+          uname -a
+          source /opt/intel/oneapi/setvars.sh
+          sycl-ls
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+      - name: Cleanup workspace
+        if: ${{ always() }}
+        run: |
+          # clean docker cache
+          # docker stop $(docker ps -aq) || true
+          docker system prune -af || true
+          # clean files
+          ls -al
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
   ut_test:
-    runs-on: ${{ matrix.test.runner || inputs.runner }}
+    needs: get_ut_runner
+    runs-on: ${{ matrix.test.runner || needs.get_ut_runner.outputs.test_host }}
     if: ${{ inputs.ut != 'xpu_distributed' && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 300
+    container:
+      image: mengfeili/intel-pvc-driver:1146-1136
+      volumes:
+        - ${{ github.workspace }}:${{ github.workspace }}
+      options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
+              -u ${{ needs.get_ut_runner.outputs.test_user }}:${{ needs.get_ut_runner.outputs.test_group }}
+      env:
+        AGENT_TOOLSDIRECTORY: /opt/_tools
+        GH_TOKEN: ${{ github.token }}
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
     env:
-      GH_TOKEN: ${{ github.token }}
-      ut_skip_issue: 1624
+      UT_NAME: ${{ matrix.test.name }}
+    defaults:
+      run:
+        shell: bash -xe {0}
     strategy:
       fail-fast: false
       matrix:
         test:
           - name: 'op_regression'
             condition: ${{ contains(inputs.ut, 'op_regression') }}
-            directory: 'test/regressions'
+            directory: 'pytorch/third_party/torch-xpu-ops/test/regressions'
             command: 'pytest --timeout 600 -v --junit-xml=../../ut_log/op_regression.xml'
             log_prefix: 'op_regression'
-            timeout: 8000
-            additional_steps: |
-              clinfo --list
-              pip install pytest pytest-timeout
+            timeout: 3600
           - name: 'op_regression_dev1'
             condition: ${{ contains(inputs.ut, 'op_regression_dev1') }}
-            directory: 'test/regressions'
+            directory: 'pytorch/third_party/torch-xpu-ops/test/regressions'
             command: 'pytest --timeout 600 -v test_operation_on_device_1.py --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml'
             log_prefix: 'op_regression_dev1'
-            timeout: 8000
-            additional_steps: |
-              clinfo --list
-              unset ZE_AFFINITY_MASK
-              pip install pytest pytest-timeout
-            runner: 'pvc_e2e'
+            timeout: 300
+            runner: 'pvc_rolling'
           - name: 'op_transformers'
             condition: ${{ contains(inputs.ut, 'op_transformers') }}
-            directory: '../pytorch'
+            directory: 'pytorch'
             command: 'pytest --timeout 600 -v test/test_transformers.py -k xpu --junit-xml=$GITHUB_WORKSPACE/ut_log/op_transformers.xml'
             log_prefix: 'op_transformers'
             timeout: 3600
             additional_steps: |
-              pip install pytest pytest-timeout
               export PYTORCH_TEST_WITH_SLOW=1
           - name: 'op_extended'
             condition: ${{ contains(inputs.ut, 'op_extended') }}
-            directory: '../pytorch/third_party/torch-xpu-ops/test/xpu/extended/'
+            directory: 'pytorch/third_party/torch-xpu-ops/test/xpu/extended/'
             command: 'python run_test_with_skip.py'
             log_prefix: 'op_extended'
-            timeout: 10000
+            timeout: 3600
             additional_steps: |
-              pip install pytest pytest-timeout
               export PYTORCH_TEST_WITH_SLOW=1
             xml_post_processing: |
               cp op_extended.xml $GITHUB_WORKSPACE/ut_log
           - name: 'op_ut'
             condition: ${{ contains(inputs.ut, 'op_ut') }}
-            directory: '../pytorch/third_party/torch-xpu-ops/test/xpu'
+            directory: 'pytorch/third_party/torch-xpu-ops/test/xpu'
             log_prefix: 'op_ut'
             command_script: |
               export PYTORCH_ENABLE_XPU_FALLBACK=1
@@ -122,11 +155,9 @@ jobs:
                 2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test_error.log | \
                 tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test.log
               cp op_ut_with_only.xml $GITHUB_WORKSPACE/ut_log
-            additional_steps: |
-              pip install pytest pytest-timeout
           - name: 'torch_xpu'
             condition: ${{ contains(inputs.ut, 'torch_xpu') }}
-            directory: '../pytorch'
+            directory: 'pytorch'
             command_script: |
               export PYTORCH_TEST_WITH_SLOW=1
               export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
@@ -138,11 +169,10 @@ jobs:
                 tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log
             log_prefix: 'torch_xpu'
             timeout: 10000
-            additional_steps: |
-              pip install pytest pytest-timeout
           - name: 'xpu_profiling'
             condition: ${{ contains(inputs.ut, 'xpu_profiling') }}
             command_script: |
+              cd torch-xpu-ops
               # RN50 Test
               PROFILE=1 python -u test/profiling/rn50.py -a resnet50 --dummy ./ --num-iterations 20 --xpu 0
               cp profiling.fp32.train.pt $GITHUB_WORKSPACE/ut_log/profile_test
@@ -170,30 +200,36 @@ jobs:
               python -m pytest --timeout 600 -vs test_profiler_tree.py | \
                 tee $GITHUB_WORKSPACE/ut_log/profile_test/test_profiler_tree.log
             additional_steps: |
-              pip install pytest pytest-timeout
               mkdir -p ut_log/profile_test/issue_reproduce
     outputs: 
       ut_name: ${{ steps.set-output.outputs.UT_NAME || '' }}
     steps:
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
+      - name: Check runner
+        run: |
+          ls -al
+          find ./ |grep -v "^\./$" |xargs rm -rf
+          hostname && whoami && id
+          clinfo --list
+          gcc -v && g++ -v
+          which python && which pip
+          python -V
+          pip install -U pip wheel setuptools
+          pip list
+          pip install pytest pytest-timeout
+          uname -a
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Create unique workspace
-        run: |
-          # Create unique conda env for each UT test
-          random=$(head /dev/urandom | tr -dc A-Za-z0-9_ | head -c ${1:-5} | xargs)
-          echo "CONDA_ENV_NAME=xpu_op_${ZE_AFFINITY_MASK}_${{ matrix.test.name }}_${random}" >> $GITHUB_ENV
-      - name: Create Conda Env
-        run: |
-          pwd
-          which conda
-          conda remove --all -y -n $CONDA_ENV_NAME || \
-                rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
-          conda create -n $CONDA_ENV_NAME python=${{ inputs.python }} cmake ninja -y
-          source activate $CONDA_ENV_NAME
+        with:
+          path: torch-xpu-ops
       - name: Install oneAPI DLE
         if: ${{ inputs.oneapi != 'installed' }}
         run: |
-          rm -rf ~/intel ~/.intel /opt/intel
+          rm -rf ~/intel ~/.intel
           wget -q -O oneapi.sh "${{ inputs.oneapi }}"
           bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
           echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
@@ -204,75 +240,71 @@ jobs:
           pattern: Torch-XPU-Wheel-*
       - name: Prepare Stock Pytorch
         run: |
-          cd ../
-          rm -rf ./pytorch || sudo rm -rf ./pytorch
-          git clone https://github.com/pytorch/pytorch pytorch
-          source activate $CONDA_ENV_NAME
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install --force-reinstall ${{ github.workspace }}/torch*.whl
-            TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-            cd ./pytorch
-            git checkout ${TORCH_COMMIT_ID}
-            rm -rf vision || sudo rm -rf vision
-            git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
-          else
+          # install pytorch
+          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
             pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-            TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-            cd ./pytorch
-            git checkout ${TORCH_COMMIT_ID}
+          else
+            pip install --force-reinstall ${{ github.workspace }}/*.whl
           fi
-          pip install requests
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-          git show -s && git status && git diff
+          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
+          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
+            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
+          else
+            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
+          fi
+          git clone ${PYTORCH_REPO} pytorch
+          cd pytorch
+          git checkout ${TORCH_COMMIT_ID}
           pip install -r .ci/docker/requirements-ci.txt
+          # apply extra PRs for stock pytorch
+          python ../.github/scripts/apply_torch_pr.py
+          git status && git diff && git show -s
       - name: Prepare Torch-xpu-ops
         run: |
-          cd ../pytorch
+          cd pytorch
           rm -rf third_party/torch-xpu-ops
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            cp -r ${{ github.workspace }} third_party
+          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
+            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
+            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
           else
-            TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt)
-            git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops
-            cd third_party/torch-xpu-ops
-            git checkout ${TORCH_XPU_OPS_COMMIT}
+            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
+            if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
+              TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
+            else
+              TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
+            fi
           fi
-      - name: Download Triton wheel
-        if: ${{ inputs.pytorch != 'nightly_wheel' }}
-        uses: actions/download-artifact@v4
-        with:
-          name: Triton-Wheel-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}
-      - name: Install Triton
-        run: |
-          source activate $CONDA_ENV_NAME
-          pip install --force-reinstall ${{ github.workspace }}/pytorch_triton_xpu-*.whl
+          if [ "${{ inputs.torch_xpu_ops }}" == "cicd" ];then
+            cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
+          else
+            git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
+          fi
+          cd third_party/torch-xpu-ops
+          git checkout ${TORCH_XPU_OPS_COMMIT}
+          git status && git diff && git show -s
       - name: Torch Config
         run: |
-          source activate $CONDA_ENV_NAME
+          printenv
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
           python -c "import triton; print(triton.__version__)"
-
-          cd ..
           python pytorch/torch/utils/collect_env.py
-          rm -rf /tmp/torchinductor_* || sudo rm -rf /tmp/torchinductor_*
-          rm -rf ~/.triton/cache || sudo rm -rf ~/.triton/cache
-          echo "UT_NAME=${{ matrix.test.name }}" >> "${GITHUB_ENV}"
+          pip list |grep -E 'torch|intel'
       - name: Run XPU UT Test
         if: ${{ matrix.test.condition }}
         run: |
           set -e
           mkdir -p ${{ github.workspace }}/ut_log
           mkdir -p ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
-          source activate $CONDA_ENV_NAME
           echo "Running ${{ matrix.test.name }}"
           echo "Directory: ${{ matrix.test.directory }}"
           ${{ matrix.test.additional_steps }}
-
           cd ${{ matrix.test.directory }}
-
           if [[ "${{ matrix.test.name }}" == "op_ut" ]] || [[ "${{ matrix.test.name }}" == "xpu_profiling" ]] || [[ "${{ matrix.test.name }}" == "torch_xpu" ]]; then
             bash << "SCRIPT"
             set -e
@@ -287,18 +319,11 @@ jobs:
       - name: UT Test Results Summary
         if: ${{ matrix.test.condition }}
         run: |
-          source activate $CONDA_ENV_NAME
           pip install junitparser
-          python .github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
+          python torch-xpu-ops/.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
           if [ -e "ut_failure_list.csv" ];then
               cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv
           fi
-      - name: Clean up
-        if: ${{ always() }}
-        run: |
-          if [ -n "$CONDA_ENV_NAME" ]; then
-            conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
-          fi
       - name: Upload Inductor XPU UT Log
         if: ${{ matrix.test.condition }}
         uses: actions/upload-artifact@v4
@@ -323,7 +348,7 @@ jobs:
     timeout-minutes: 30
     env:
       GH_TOKEN: ${{ github.token }}
-      ut_skip_issue: 1624
+      UT_SKIP_ISSUE: 1624
     strategy:
       fail-fast: false
       matrix:
@@ -370,7 +395,7 @@ jobs:
           }
           set -xe
           cd ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
-          gh --repo $repo issue view $ut_skip_issue --json body -q .body | sed '/^$/d' > Known_issue.log
+          gh --repo $repo issue view $UT_SKIP_ISSUE --json body -q .body | sed '/^$/d' > Known_issue.log
           gh api "repos/${{ github.repository }}/issues?labels=skipped" \
           --jq '.[] | select(.pull_request == null) | "Issue #\(.number): \(.title)\n\(.body)\n"' \
           > issues.log
@@ -393,69 +418,101 @@ jobs:
     timeout-minutes: 60
     env:
       GH_TOKEN: ${{ github.token }}
-      ut_skip_issue: 1624
+      AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/_tools
     steps:
+      - name: Check runner
+        run: |
+          ls -al
+          find ./ |grep -v "^\./$" |xargs rm -rf
+          sudo find /tmp/ |grep -v "^/tmp/$" |xargs sudo rm -rf
+          rm -rf ~/.triton ~/.torch
+          hostname && whoami && id
+          xpu-smi topology -m
+          gcc -v && g++ -v
+          uname -a
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Create Conda Env
+        with:
+          path: torch-xpu-ops
+      - name: Install oneAPI DLE
+        if: ${{ inputs.oneapi != 'installed' }}
         run: |
-          pwd
-          which conda && conda clean -ay
-          conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
-                rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
-          conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y
-          source activate xpu_op_${ZE_AFFINITY_MASK}
+          rm -rf ~/intel ~/.intel
+          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
       - name: Download Pytorch wheel
-        if: ${{ inputs.pytorch != 'nightly_wheel' }}
+        if: ${{ ! contains(inputs.test_type, 'wheel') }}
         uses: actions/download-artifact@v4
         with:
-          name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
+          pattern: Torch-XPU-Wheel-*
       - name: Prepare Stock Pytorch
         run: |
-          cd ../
-          rm -rf ./pytorch || sudo rm -rf ./pytorch
-          git clone https://github.com/pytorch/pytorch pytorch
-          source activate xpu_op_${ZE_AFFINITY_MASK}
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install --force-reinstall ${{ github.workspace }}/torch*.whl
-            TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-            cd ./pytorch
-            git checkout ${TORCH_COMMIT_ID}
-            rm -rf vision || sudo rm -rf vision
-            git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
-          else
+          which python && which pip
+          python -V
+          pip install -U pip wheel setuptools
+          pip list
+          pip install pytest pytest-timeout
+          # install pytorch
+          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
             pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-            TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-            cd ./pytorch
-            git checkout ${TORCH_COMMIT_ID}
+          else
+            pip install --force-reinstall ${{ github.workspace }}/*.whl
+          fi
+          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
+          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
+            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
+          else
+            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
           fi
-          pip install requests
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-          git show -s && git status && git diff
+          git clone ${PYTORCH_REPO} pytorch
+          cd pytorch
+          git checkout ${TORCH_COMMIT_ID}
           pip install -r .ci/docker/requirements-ci.txt
+          # apply extra PRs for stock pytorch
+          python ../.github/scripts/apply_torch_pr.py
+          git status && git diff && git show -s
       - name: Prepare Torch-xpu-ops
         run: |
-          cd ../pytorch
+          cd pytorch
           rm -rf third_party/torch-xpu-ops
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            cp -r ${{ github.workspace }} third_party
+          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
+            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
+            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
           else
-            TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt)
-            git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops
-            cd third_party/torch-xpu-ops
-            git checkout ${TORCH_XPU_OPS_COMMIT}
+            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
+            if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
+              TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
+            else
+              TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
+            fi
           fi
+          if [ "${{ inputs.torch_xpu_ops }}" == "cicd" ];then
+            cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
+          else
+            git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
+          fi
+          cd third_party/torch-xpu-ops
+          git checkout ${TORCH_XPU_OPS_COMMIT}
+          git status && git diff && git show -s
       - name: Torch Config
         run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
+          printenv
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
           python -c "import triton; print(triton.__version__)"
-          cd ..
           python pytorch/torch/utils/collect_env.py
-          rm -rf /tmp/torchinductor_* || sudo rm -rf /tmp/torchinductor_*
-          rm -rf ~/.triton/cache || sudo rm -rf ~/.triton/cache
+          pip list |grep -E 'torch|intel'
       - name: Set Ptrace_scope
         if: ${{ always() }}
         run: |
@@ -467,8 +524,6 @@ jobs:
       - name: Run Torch XPU Distributed UT
         run: |
           set -x -e -o pipefail
-          source activate xpu_op_${ZE_AFFINITY_MASK}
-          pip install pytest pytest-timeout
           mkdir -p ut_log/xpu_distributed
           cd ../pytorch/third_party/torch-xpu-ops/test/xpu
           XCCL_ENABLE=$(python -c "import torch;print(torch.distributed.is_xccl_available())")
@@ -498,7 +553,7 @@ jobs:
     timeout-minutes: 30
     env:
       GH_TOKEN: ${{ github.token }}
-      ut_skip_issue: 1624
+      UT_SKIP_ISSUE: 1624
     steps:
       - name: Set the UT name
         run: |
@@ -526,7 +581,7 @@ jobs:
           set -xe
           echo "UT_NAME=$(echo ${{ inputs.ut }} |sed 's/,/-/g')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
           cd ${{ github.workspace }}/ut_log/xpu_distributed
-          gh --repo $repo issue view $ut_skip_issue --json body -q .body | sed '/^$/d' > Known_issue.log
+          gh --repo $repo issue view $UT_SKIP_ISSUE --json body -q .body | sed '/^$/d' > Known_issue.log
           gh api "repos/${{ github.repository }}/issues?labels=skipped" \
           --jq '.[] | select(.pull_request == null) | "Issue #\(.number): \(.title)\n\(.body)\n"' \
           > issues.log

From 9a621c526c5635db786c62fb176361b169a99fc4 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 11:20:12 +0800
Subject: [PATCH 042/160] update

---
 .github/workflows/_linux_e2e.yml |  39 +++++++-
 .github/workflows/_linux_ut.yml  |  14 ++-
 .github/workflows/pull.yml       | 149 ++-----------------------------
 3 files changed, 56 insertions(+), 146 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 021f906d09..c9ae30614e 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -155,7 +155,11 @@ jobs:
           cd pytorch
           git checkout ${TORCH_COMMIT_ID}
           # apply extra PRs for stock pytorch
-          python ../.github/scripts/apply_torch_pr.py
+          if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
+            python ../.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
+          else
+            python ../.github/scripts/apply_torch_pr.py
+          fi
           git status && git diff && git show -s
       - name: Torch Config
         run: |
@@ -168,6 +172,35 @@ jobs:
           pip list |grep -E 'torch|intel'
           dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
 
+      # CICD launch
+      - name: Nightly Huggingface BF16 & FP16 Training Test
+        if: ${{ contains(inputs.test_type, 'cicd') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: huggingface
+          dt: bfloat16,float16
+          mode: training
+          scenario: accuracy,performance
+      - name: Nightly Torchbench BF16 Training Test
+        if: ${{ contains(inputs.test_type, 'cicd') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: torchbench
+          dt: bfloat16
+          mode: training
+          scenario: accuracy,performance
+      - name: Nightly Timm_models BF16 Training Test
+        if: ${{ contains(inputs.test_type, 'cicd') }}
+        uses: ./.github/actions/inductor-xpu-e2e-test
+        with:
+          env_prepare: true
+          suite: timm_models
+          dt: bfloat16
+          mode: training
+          scenario: accuracy,performance
+
       # Nihglty launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
@@ -300,7 +333,7 @@ jobs:
       - name: Download Baseline Artifact
         run: |
           mkdir baseline/
-          artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/')"
+          artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/;s/cicd/weekly/')"
           gh --repo intel/torch-xpu-ops issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee body.txt
           REFERENCE_RUN_ID="$(cat body.txt |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo '')"
           if [ "${REFERENCE_RUN_ID}" != "" ];then
@@ -333,7 +366,7 @@ jobs:
             fi
           fi
       - name: Upload Reference Run ID
-        if: ${{ ! contains(inputs.test_type, 'ondemand') && github.repository_owner == 'intel' }}
+        if: ${{ ! (contains(inputs.test_type, 'ondemand') && contains(inputs.test_type, 'cicd')) && github.repository_owner == 'intel' }}
         run: |
           gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee new_body.txt 2>&1
           has_or_not="$(grep 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt |wc -l)"
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index b905428035..4f3cf67baf 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -45,6 +45,7 @@ jobs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
+      ZE_AFFINITY_MASK: ${{ steps.runner-info.outputs.ZE_AFFINITY_MASK }}
     steps:
       - name: Get runner info
         id: runner-info
@@ -53,6 +54,7 @@ jobs:
           echo "test_host=${NODE_LABEL}" |tee -a ${GITHUB_OUTPUT}
           echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
           echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
+          echo "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK}" |tee -a ${GITHUB_OUTPUT}
           # show host info
           cat /etc/os-release
           uname -a
@@ -83,6 +85,7 @@ jobs:
         AGENT_TOOLSDIRECTORY: /opt/_tools
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+        ZE_AFFINITY_MASK: ${{ needs.get_ut_runner.outputs.ZE_AFFINITY_MASK }}
     env:
       UT_NAME: ${{ matrix.test.name }}
     defaults:
@@ -104,7 +107,8 @@ jobs:
             command: 'pytest --timeout 600 -v test_operation_on_device_1.py --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml'
             log_prefix: 'op_regression_dev1'
             timeout: 300
-            runner: 'pvc_rolling'
+            additional_steps: |
+              unset ZE_AFFINITY_MASK
           - name: 'op_transformers'
             condition: ${{ contains(inputs.ut, 'op_transformers') }}
             directory: 'pytorch'
@@ -261,7 +265,11 @@ jobs:
           git checkout ${TORCH_COMMIT_ID}
           pip install -r .ci/docker/requirements-ci.txt
           # apply extra PRs for stock pytorch
-          python ../.github/scripts/apply_torch_pr.py
+          if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
+            python ../.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
+          else
+            python ../.github/scripts/apply_torch_pr.py
+          fi
           git status && git diff && git show -s
       - name: Prepare Torch-xpu-ops
         run: |
@@ -278,7 +286,7 @@ jobs:
               TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
             fi
           fi
-          if [ "${{ inputs.torch_xpu_ops }}" == "cicd" ];then
+          if [ "${{ inputs.test_type }}" == "cicd" ];then
             cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
           else
             git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 5b539f800d..2b8e8b803d 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -107,152 +107,21 @@ jobs:
     needs: [preci-conditions-filter, preci-linux-build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
-      disabled_tests: ${{ needs.preci-conditions-filter.outputs.disabled_tests }}
-      ut: op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_distributed
       runner: linux.idc.xpu
+      test_type: build-cicd
+      pytorch: main
+      ut: op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_distributed
+      disabled_tests: ${{ needs.preci-conditions-filter.outputs.disabled_tests }}
 
   preci-linux-e2e:
     if: ${{ !contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
     name: preci-linux / e2e_test
     needs: [preci-conditions-filter, preci-linux-build]
-    runs-on: pvc_e2e
-    env:
-      GH_TOKEN: ${{ github.token }}
-      reference_issue: 1645
-    timeout-minutes: 300
-    steps:
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-      - name: Prepare Conda ENV
-        run: |
-          which conda && conda clean -ay
-          conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci
-          conda create -n e2e_ci python=3.10 cmake ninja -y
-          source activate e2e_ci
-          pip install pandas scipy psutil requests
-      - name: Download Pytorch wheel
-        uses: actions/download-artifact@v4
-        with:
-          name: Torch-XPU-Wheel-${{ github.event.pull_request.number }}
-      - name: Install Pytorch XPU
-        run: |
-          source activate e2e_ci
-          pip install --force-reinstall ${{ github.workspace }}/torch*.whl
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          cd ../
-          rm -rf pytorch || sudo rm -rf pytorch
-          git clone https://github.com/pytorch/pytorch pytorch
-          cd pytorch && git checkout ${TORCH_COMMIT_ID}
-          # apply PRs for stock pytorch
-          # https://github.com/pytorch/pytorch/pull/152940 internal use only for subset model list
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
-          git show -s && git status && git diff
-      - name: Triton Installation
-        run: |
-          source activate e2e_ci
-          cd ../pytorch
-          pip install cmake ninja pybind11
-          rm -rf pytorch_triton_xpu-*.whl
-          python .github/scripts/build_triton_wheel.py --device xpu
-          pip install pytorch_triton_xpu-*.whl
-      - name: Identify pinned versions
-        run: |
-          cd ../pytorch
-          echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" >> "${GITHUB_ENV}"
-          echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" >> "${GITHUB_ENV}"
-          echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" >> "${GITHUB_ENV}"
-          echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" >> "${GITHUB_ENV}"
-          echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" >> "${GITHUB_ENV}"
-          echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" >> "${GITHUB_ENV}"
-          . /etc/os-release
-          echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          ../torch-xpu-ops/.github/scripts/env.sh
-          echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-      - name: Torch Config
-        run: |
-          echo "$GITHUB_ENV"
-          rm -rf ../pytorch/inductor_log || sudo rm -rf ../pytorch/inductor_log
-          rm -rf /tmp/torchinductor_* || sudo rm -rf /tmp/torchinductor_*
-          rm -rf ~/.triton/cache || sudo rm -rf ~/.triton/cache
-          cd ..
-          source activate e2e_ci
-          python -c "import triton; print(triton.__version__)"
-          python pytorch/torch/utils/collect_env.py
-      - name: Huggingface BF16 Training Accuracy Test
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: huggingface
-          dt: bfloat16
-          mode: training
-          scenario: accuracy,performance
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Huggingface FP16 Training Accuracy Test
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: huggingface
-          dt: float16
-          mode: training
-          scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Timm_models BF16 Training Accuracy Test
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: timm_models
-          dt: bfloat16
-          mode: training
-          scenario: accuracy,performance
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Torchbench BF16 Training Accuracy Test
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: torchbench
-          dt: bfloat16
-          mode: training
-          scenario: accuracy,performance
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Download Reference Artifact
-        id: reference_id
-        run: |
-          set -xe
-          source activate e2e_ci
-          conda install gh --channel conda-forge -y
-          REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
-            --json body -q .body |grep "Inductor-weekly-LTS-XPU-E2E" |sed 's/.*: *//')"
-          gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
-          rm -rf reference && mv Inductor-*-XPU-E2E-* reference
-      - name: Summarize archieve files
-        if: ${{ ! cancelled() }}
-        run: |
-          set -x -e -o pipefail
-          rm -rf ${{ github.workspace }}/upload_files || sudo rm -rf ${{ github.workspace }}/upload_files
-          cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
-          # Print summary
-          source activate e2e_ci
-          export IS_PR=1
-          bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
-              ${{ github.workspace }}/upload_files \
-              ${{ github.workspace }}/reference \
-          >> ${GITHUB_STEP_SUMMARY}
-          exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
-          if [ ${exit_label} -ne 0 ];then
-            grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
-            echo "There are ${exit_label} cases that need look into!!! Please check them"
-            exit ${exit_label}
-          fi
-      - name: Upload Inductor XPU E2E Data
-        if: ${{ ! cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: Inductor-CI-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/upload_files
+    uses: ./.github/workflows/_linux_e2e.yml
+    with:
+      runner: pvc_rolling
+      test_type: build-cicd
+      pytorch: main
 
   preci-windows:
     name: preci-windows

From bb17babef15542bfbf9e5802ee4f6e2e7b27dc91 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 11:22:13 +0800
Subject: [PATCH 043/160] update

---
 .github/workflows/_linux_e2e.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index c9ae30614e..b314eacd1c 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -25,22 +25,18 @@ on:
         default: '3.10'
         description: Python version
       suite:
-        required: true
         type: string
         default: 'huggingface'
         description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
       dt:
-        required: true
         type: string
         default: 'float32'
         description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma
       mode:
-        required: true
         type: string
         default: 'inference'
         description: Test mode. `inference,training`. Delimiter is comma
       scenario:
-        required: true
         type: string
         default: 'accuracy'
         description: Test scenario. `accuracy,performance`. Delimiter is comma

From 981c7442b8c993e451cab560bbaca346ef5ba483 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 11:23:19 +0800
Subject: [PATCH 044/160] update

---
 .github/workflows/pull.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 2b8e8b803d..90a11b020e 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -116,6 +116,7 @@ jobs:
   preci-linux-e2e:
     if: ${{ !contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
     name: preci-linux / e2e_test
+    permissions: write-all
     needs: [preci-conditions-filter, preci-linux-build]
     uses: ./.github/workflows/_linux_e2e.yml
     with:

From 6482077c2c82da713d0c55e8f03f25af12fb6d7c Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 11:42:33 +0800
Subject: [PATCH 045/160] update

---
 .github/scripts/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
index f10f095934..4545fc3bfa 100755
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -51,7 +51,7 @@ python -m pip install -r requirements.txt
 python -m pip install mkl-static mkl-include
 export USE_STATIC_MKL=1
 export USE_XCCL=1
-if [ "${XPU_ONEAPI_PATH}" != "" ];then
+if [ "${XPU_ONEAPI_PATH}" == "" ];then
     export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
         intel-cmplr-lib-rt==2025.1.1 | \
         intel-cmplr-lib-ur==2025.1.1 | \

From ec0c1f231688c1ba68636af8f80bcc1f30196b6e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 11:53:52 +0800
Subject: [PATCH 046/160] update

---
 .github/workflows/_linux_ut.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 4f3cf67baf..ece9c2647b 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -45,7 +45,6 @@ jobs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
-      ZE_AFFINITY_MASK: ${{ steps.runner-info.outputs.ZE_AFFINITY_MASK }}
     steps:
       - name: Get runner info
         id: runner-info
@@ -54,7 +53,6 @@ jobs:
           echo "test_host=${NODE_LABEL}" |tee -a ${GITHUB_OUTPUT}
           echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
           echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
-          echo "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK}" |tee -a ${GITHUB_OUTPUT}
           # show host info
           cat /etc/os-release
           uname -a
@@ -72,7 +70,7 @@ jobs:
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
   ut_test:
     needs: get_ut_runner
-    runs-on: ${{ matrix.test.runner || needs.get_ut_runner.outputs.test_host }}
+    runs-on: ${{ needs.get_ut_runner.outputs.test_host }}
     if: ${{ inputs.ut != 'xpu_distributed' && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 300
     container:
@@ -85,7 +83,7 @@ jobs:
         AGENT_TOOLSDIRECTORY: /opt/_tools
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-        ZE_AFFINITY_MASK: ${{ needs.get_ut_runner.outputs.ZE_AFFINITY_MASK }}
+        ZE_AFFINITY_MASK: ${{ env.ZE_AFFINITY_MASK }}
     env:
       UT_NAME: ${{ matrix.test.name }}
     defaults:

From 1cc986eb2db3d1eaecc1f4d70e8ed7959d7ec773 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 12:39:06 +0800
Subject: [PATCH 047/160] update

---
 .github/workflows/_linux_ut.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index ece9c2647b..8435ab972e 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -79,11 +79,11 @@ jobs:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
               -u ${{ needs.get_ut_runner.outputs.test_user }}:${{ needs.get_ut_runner.outputs.test_group }}
+              -e ZE_AFFINITY_MASK
       env:
         AGENT_TOOLSDIRECTORY: /opt/_tools
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-        ZE_AFFINITY_MASK: ${{ env.ZE_AFFINITY_MASK }}
     env:
       UT_NAME: ${{ matrix.test.name }}
     defaults:

From b2b48c54efd38140ceb35d4cb503344c2b1487cf Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 14:07:58 +0800
Subject: [PATCH 048/160] update

---
 .github/workflows/_linux_e2e.yml       | 1 +
 .github/workflows/_linux_ut.yml        | 1 +
 .github/workflows/nightly_ondemand.yml | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index b314eacd1c..8d71a75eeb 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -115,6 +115,7 @@ jobs:
           pip list
           uname -a
           dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+          pip install pandas psutil scipy requests
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Install oneAPI DLE
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 8435ab972e..150580452a 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -224,6 +224,7 @@ jobs:
           pip install pytest pytest-timeout
           uname -a
           dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+          pip install pandas psutil scipy requests
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
         with:
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 57800e2eb4..7b1c0bb685 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -124,7 +124,7 @@ jobs:
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
-      runner: pvc_rolling
+      runner: linux.idc.xpu
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}

From bccec9320ee8db8824010fc5d6d7f0345a9a57e5 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 15:10:31 +0800
Subject: [PATCH 049/160] update

---
 .github/workflows/_linux_e2e.yml | 5 +++++
 .github/workflows/_linux_ut.yml  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 8d71a75eeb..1a3f729504 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -309,6 +309,7 @@ jobs:
     container:
       image: ubuntu:24.04
       env:
+        AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
         REFERENCE_ISSUE_ID: 1645
     defaults:
@@ -319,6 +320,10 @@ jobs:
         run: |
           apt-get update
           apt-get install gh rsync ca-certificates -y
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download Target Artifact
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 150580452a..0f6dbf3dcb 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -50,7 +50,7 @@ jobs:
         id: runner-info
         run: |
           # get test runner
-          echo "test_host=${NODE_LABEL}" |tee -a ${GITHUB_OUTPUT}
+          echo "test_host=${RUNNER_NAME%-*}" |tee -a ${GITHUB_OUTPUT}
           echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
           echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
           # show host info

From 9f604a77c3912c486f7941b4dc44ed146fe5efe1 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 21 Jul 2025 16:00:55 +0800
Subject: [PATCH 050/160] update

---
 .github/workflows/_linux_e2e.yml | 3 ++-
 .github/workflows/_linux_ut.yml  | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 1a3f729504..0980acd111 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -307,7 +307,7 @@ jobs:
     permissions:
       issues: write
     container:
-      image: ubuntu:24.04
+      image: ubuntu:latest
       env:
         AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
@@ -350,6 +350,7 @@ jobs:
       - name: Get summary
         if: ${{ ! cancelled() }}
         run: |
+          pip install pandas requests
           if [ "${{ inputs.suite }}" != 'pt2e' ];then
             bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
             exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 0f6dbf3dcb..260203acfd 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -265,9 +265,9 @@ jobs:
           pip install -r .ci/docker/requirements-ci.txt
           # apply extra PRs for stock pytorch
           if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
-            python ../.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
+            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
           else
-            python ../.github/scripts/apply_torch_pr.py
+            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
           fi
           git status && git diff && git show -s
       - name: Prepare Torch-xpu-ops
@@ -486,7 +486,7 @@ jobs:
           git checkout ${TORCH_COMMIT_ID}
           pip install -r .ci/docker/requirements-ci.txt
           # apply extra PRs for stock pytorch
-          python ../.github/scripts/apply_torch_pr.py
+          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
           git status && git diff && git show -s
       - name: Prepare Torch-xpu-ops
         run: |

From 8a78c7c0ca929d4fd107867a6eaa9dd159ee7490 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 08:42:36 +0800
Subject: [PATCH 051/160] update

---
 .../workflows/nightly_ondemand_rolling.yml    | 460 ------------------
 .github/workflows/nightly_ondemand_whl.yml    | 396 ---------------
 2 files changed, 856 deletions(-)
 delete mode 100644 .github/workflows/nightly_ondemand_rolling.yml
 delete mode 100644 .github/workflows/nightly_ondemand_whl.yml

diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml
deleted file mode 100644
index 03101ebf3a..0000000000
--- a/.github/workflows/nightly_ondemand_rolling.yml
+++ /dev/null
@@ -1,460 +0,0 @@
-name: Nightly-OnDemand Tests Rolling
-
-on:
-  schedule:
-    # GMT+8 21:30 every workday
-    - cron: '30 13 * * 0-4'
-    # GMT+8 0:30 Saturday
-    - cron: '30 16 * * 5'
-  workflow_dispatch:
-    inputs:
-      pytorch:
-        required: false
-        type: string
-        default: 'main'
-        description: Pytorch branch/commit
-      keep_torch_xpu_ops:
-        required: false
-        type: string
-        default: 'false'
-        description: Keep torch-xpu-ops pin. `true` means use pined commit
-      ut:
-        required: false
-        type: string
-        default: 'torch_xpu'
-        description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,torch_xpu,xpu_profiling`. Delimiter is comma
-      triton:
-        required: false
-        type: string
-        default: ''
-        description: Triton commit. Use pytorch pined commit by default
-      suite:
-        required: true
-        type: string
-        default: 'huggingface'
-        description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
-      dt:
-        required: true
-        type: string
-        default: 'float32'
-        description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma
-      mode:
-        required: true
-        type: string
-        default: 'inference'
-        description: Test mode. `inference,training`. Delimiter is comma
-      scenario:
-        required: true
-        type: string
-        default: 'accuracy'
-        description: Test scenario. `accuracy,performance`. Delimiter is comma
-      model:
-        required: false
-        type: string
-        default: ''
-        description: Model. Will only run this one mode if set
-      python:
-        required: false
-        type: string
-        default: '3.10'
-        description: Python version
-
-permissions: read-all
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }}
-  cancel-in-progress: ${{ github.event_name != 'schedule' }}
-
-jobs:
-  Linux-Nightly-Ondemand-Build-Rolling:
-    if: ${{ github.repository_owner == 'intel' }}
-    name: linux-nightly-ondemand-rolling
-    secrets: inherit
-    uses: ./.github/workflows/_linux_build.yml
-    with:
-      pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
-      keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      driver: rolling
-      runner: pvc_rolling
-
-  Linux-Nightly-Ondemand-UT-Tests-Rolling:
-    if: ${{ github.event_name == 'schedule' || inputs.ut != '' }}
-    name: linux-nightly-ondemand-rolling
-    needs: Linux-Nightly-Ondemand-Build-Rolling
-    uses: ./.github/workflows/_linux_ut.yml
-    with:
-      keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
-      ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
-      pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
-      driver: rolling
-      runner: pvc_rolling
-  
-  Linux-Nightly-Ondemand-OP-Microbench-Tests-Rolling:
-    name: linux-nightly-ondemand-rolling / Op_microbench
-    permissions:
-      issues: write
-    needs: Linux-Nightly-Ondemand-Build-Rolling
-    uses: ./.github/workflows/_linux_op_benchmark.yml
-    with:
-      keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
-      pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
-      driver: rolling
-      runner: pvc_rolling
-
-  Linux-Nightly-Ondemand-E2E-Tests-Rolling:
-    runs-on: pvc_rolling
-    name: linux-nightly-ondemand-rolling / e2e_test
-    needs: Linux-Nightly-Ondemand-Build-Rolling
-    timeout-minutes: 3600
-    permissions:
-      issues: write
-    env:
-      GH_TOKEN: ${{ github.token }}
-      reference_issue: 1645
-      pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }}
-      keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      NEOReadDebugKeys: 1
-      DisableScratchPages: 1
-      run_type: ${{ (github.event_name == 'schedule' && (github.event.schedule == '30 16 * * 5' && 'weekly' || 'nightly')) || 'on-demand' }}
-    outputs:
-      TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }}
-      TORCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCH_COMMIT_ID }}
-      DRIVER_VERSION: ${{ steps.pinned.outputs.DRIVER_VERSION }}
-      KERNEL_VERSION: ${{ steps.pinned.outputs.KERNEL_VERSION }}
-      BUNDLE_VERSION: ${{ steps.pinned.outputs.BUNDLE_VERSION }}
-      OS_PRETTY_NAME: ${{ steps.pinned.outputs.OS_PRETTY_NAME }}
-      GCC_VERSION: ${{ steps.pinned.outputs.GCC_VERSION }}
-      TORCHBENCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCHBENCH_COMMIT_ID }}
-      TORCHVISION_COMMIT_ID: ${{ steps.pinned.outputs.TORCHVISION_COMMIT_ID }}
-      TORCHAUDIO_COMMIT_ID: ${{ steps.pinned.outputs.TORCHAUDIO_COMMIT_ID }}
-      TRANSFORMERS_VERSION: ${{ steps.pinned.outputs.TRANSFORMERS_VERSION }}
-      TIMM_COMMIT_ID: ${{ steps.pinned.outputs.TIMM_COMMIT_ID }}
-      TRITON_COMMIT_ID: ${{ steps.pinned.outputs.TRITON_COMMIT_ID }}
-      TIMEOUT_MODELS: ${{ steps.summary.outputs.TIMEOUT_MODELS }}
-    steps:
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-      - name: Prepare Conda ENV
-        run: |
-          which conda && conda clean -ay
-          conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci
-          conda create -n e2e_ci python=${{ env.python }} cmake ninja -y
-          source activate e2e_ci
-          pip install pandas scipy psutil requests
-      - name: Download Pytorch wheel
-        if: ${{ inputs.pytorch != 'nightly_wheel' }}
-        uses: actions/download-artifact@v4
-        with:
-          name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
-      - name: Prepare Stock Pytorch
-        run: |
-          pwd
-          cd ../
-          rm -rf pytorch || sudo rm -rf pytorch
-          source activate e2e_ci
-          pip install --force-reinstall ${{ github.workspace }}/torch*.whl
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          git clone https://github.com/pytorch/pytorch pytorch
-          cd pytorch
-          git checkout ${TORCH_COMMIT_ID}
-          # apply extra PRs for stock pytorch
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-          git status && git diff && git show -s
-      - name: Identify pinned versions
-        id: pinned
-        run: |
-          source .github/scripts/env.sh
-          cd ../pytorch
-          if [ -z ${{ inputs.triton }} ]; then
-            echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          else
-            echo "TRITON_COMMIT_ID=${{ inputs.triton }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          fi
-          echo "TORCH_BRANCH_ID=${{ inputs.pytorch }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          . /etc/os-release
-          echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo ${GITHUB_ENV}
-      - name: Triton Installation
-        run: |
-          source activate e2e_ci
-          cd ../pytorch
-          pip install cmake ninja pybind11
-          rm -rf pytorch_triton_xpu-*.whl
-          TRITON_VERSION_NAME="$(
-            curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
-                    grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
-          )"
-          python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
-          pip install pytorch_triton_xpu-*.whl
-      - name: Show GITHUB_ENV
-        run: |
-          echo "$GITHUB_ENV"
-          rm -rf ../pytorch/inductor_log || sudo rm -rf ../pytorch/inductor_log
-          rm -rf /tmp/torchinductor_* || sudo rm -rf /tmp/torchinductor_*
-          rm -rf ~/.triton/cache || sudo rm -rf ~/.triton/cache
-
-      # Nihglty launch
-      - name: Nightly Huggingface FP32/BF16/FP16 Inference & Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: huggingface
-          env_prepare: true
-          dt: float32,bfloat16,float16
-          mode: inference,training
-          scenario: accuracy
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-      - name: Nightly Torchbench BF16 Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: torchbench
-          dt: bfloat16
-          mode: training
-          scenario: accuracy
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-      - name: Nightly Timm_models FP16 Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: timm_models
-          dt: float16
-          mode: training
-          scenario: accuracy
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-      - name: Nightly PT2E Full Test
-        if: ${{ env.run_type == 'nightly' }}
-        uses: ./.github/actions/pt2e
-        with:
-          dt: float32,int8
-          scenario: accuracy,performance
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-
-      # Weekly launch
-      - name: Weekly Huggingface Full Test
-        if: ${{ env.run_type == 'weekly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: huggingface
-          env_prepare: true
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-      - name: Weekly Torchbench Full Test
-        if: ${{ env.run_type == 'weekly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: torchbench
-          env_prepare: true
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-      - name: Weekly Timm_models Full Test
-        if: ${{ env.run_type == 'weekly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: timm_models
-          env_prepare: true
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-      - name: Weekly PT2E Accuracy Test
-        if: ${{ env.run_type == 'weekly' }}
-        uses: ./.github/actions/pt2e
-        with:
-          env_prepare: true
-          dt: float32,int8
-          scenario: accuracy,performance
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-
-      # On-demand launch
-      - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-        if: ${{ github.event_name != 'schedule' && inputs.suite != 'pt2e' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: ${{ inputs.suite }}
-          env_prepare: true
-          dt: ${{ inputs.dt }}
-          mode: ${{ inputs.mode }}
-          scenario: ${{ inputs.scenario }}
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-      - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-        if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }}
-        uses: ./.github/actions/pt2e
-        with:
-          env_prepare: true
-          dt: ${{ inputs.dt }}
-          scenario: ${{ inputs.scenario }}
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          driver: rolling
-      - name: Download Reference Artifact
-        id: reference_id
-        run: |
-          set -xe
-          source activate e2e_ci
-          conda install gh --channel conda-forge -y
-          if [ "${{ env.run_type }}" == "on-demand" ];then
-            artifact_type="weekly"
-          else
-            artifact_type="${{ env.run_type }}"
-          fi
-          REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
-            --json body -q .body |grep "Inductor-${artifact_type}-Rolling-XPU-E2E" |sed 's/.*: *//')"
-          gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
-          rm -rf reference && mv Inductor-*-XPU-E2E-* reference
-      - name: Summarize archieve files
-        id: summary
-        if: ${{ ! cancelled() }}
-        run: |
-          set -x -e -o pipefail
-          rm -rf ${{ github.workspace }}/upload_files
-          cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
-          mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
-          find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
-          tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
-          # Print summary
-          if [ "${{ inputs.suite }}" != 'pt2e' ];then
-            source activate e2e_ci
-            export LTS_OR_ROLLING='rolling'
-            bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
-                ${{ github.workspace }}/upload_files \
-                ${{ github.workspace }}/reference \
-            >> ${GITHUB_STEP_SUMMARY}
-            exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
-            if [ ${exit_label} -ne 0 ];then
-              grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
-              echo "There are ${exit_label} cases that need look into!!! Please check them"
-              exit ${exit_label}
-            fi
-          fi
-          pt2e_summary_csv="$(find ${{ github.workspace }}/upload_files/ -name "summary.csv")"
-          if [ -f "${pt2e_summary_csv}" ];then
-            cat ${pt2e_summary_csv}
-            failed_num=$(grep ',failed' ${pt2e_summary_csv} |wc -l)
-            if [ ${failed_num} -ne 0 ];then
-              echo "[Warning] PT2E has failures!"
-            fi
-          fi
-      - name: Upload Inductor XPU E2E Data
-        if: ${{ ! cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: Inductor-${{ env.run_type }}-Rolling-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/upload_files
-      - name: Upload Reference Run ID
-        if: ${{ env.run_type != 'on-demand' }}
-        run: |
-          gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body | \
-            sed "s/Inductor-${{ env.run_type }}-Rolling-XPU-E2E:.*/Inductor-${{ env.run_type }}-Rolling-XPU-E2E: ${GITHUB_RUN_ID}/" | sed '/^$/d' > new_body.txt
-          gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt
-
-  Tests-Failure-And-Report:
-    if: ${{ ! cancelled() }}
-    runs-on: [ self-hosted, Linux ]
-    permissions:
-      issues: write
-    env:
-      GH_TOKEN: ${{ github.token }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-    needs: Linux-Nightly-Ondemand-E2E-Tests-Rolling
-    steps:
-      - name: Report github issue for XPU OPS nightly
-        if: github.repository_owner == 'intel'
-        run: |
-          set -xe
-          # Test env
-          build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-          repo="${{ github.repository }}"
-          TORCH_BRANCH_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TORCH_BRANCH_ID }}"
-          TORCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TORCH_COMMIT_ID }}"
-          KERNEL_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.KERNEL_VERSION }}"
-          DRIVER_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.DRIVER_VERSION }}"
-          BUNDLE_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.BUNDLE_VERSION }}"
-          OS_PRETTY_NAME="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.OS_PRETTY_NAME }}"
-          GCC_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.GCC_VERSION }}"
-          TORCHBENCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TORCHBENCH_COMMIT_ID }}"
-          TORCHVISION_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TORCHVISION_COMMIT_ID }}"
-          TORCHAUDIO_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TORCHAUDIO_COMMIT_ID }}"
-          TRANSFORMERS_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TRANSFORMERS_VERSION }}"
-          TIMM_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TIMM_COMMIT_ID }}"
-          TRITON_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TRITON_COMMIT_ID }}"
-          TIMEOUT_MODELS="${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.outputs.TIMEOUT_MODELS }}"
-          # Test status
-          if [ "${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.result }}" == "success" ];then
-            test_status=Success
-          elif [ "${{ needs.Linux-Nightly-Ondemand-E2E-Tests-Rolling.result }}" == "failure" ];then
-            test_status=Failure
-            cc_comment="CC ${{ secrets.NIGHTLY_EMAIL_LIST }}"
-          else
-            test_status=None
-            exit 0
-          fi
-          # Test Type
-          if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
-            test_type="On-demand"
-            test_issue_id=426
-            cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}"
-          elif [ "${{ github.event.schedule }}" == "30 16 * * 5" ];then
-            test_type="Weekly"
-            test_issue_id=432
-          else
-            test_type="Nightly"
-            test_issue_id=432
-          fi
-          # Test report
-          echo -e "**${test_status}** $test_type Rolling Test on $(date +'%F'), See: $build_url\n" > ${{ github.workspace }}/report.txt
-          printf "Torch-xpu-ops | PyTorch | Triton\n--- | --- | ---\n${GITHUB_WORKFLOW_SHA:0:7} on ${GITHUB_REF_NAME} | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCH_COMMIT_ID:0:7}](https://github.com/pytorch/pytorch/commit/${TORCH_COMMIT_ID:0:7}) on $TORCH_BRANCH_ID | " >> ${{ github.workspace }}/report.txt
-          echo -e "[${TRITON_COMMIT_ID:0:7}](https://github.com/intel/intel-xpu-backend-for-triton/commit/${TRITON_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt
-          printf "Transformers | Timm | Torchbench | Torchvision | Torchaudio\n--- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt
-          printf "[${TRANSFORMERS_VERSION:0:7}](https://github.com/huggingface/transformers/commit/${TRANSFORMERS_VERSION:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TIMM_COMMIT_ID:0:7}](https://github.com/huggingface/pytorch-image-models/commit/${TIMM_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCHBENCH_COMMIT_ID:0:7}](https://github.com/pytorch/benchmark/commit/${TORCHBENCH_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCHVISION_COMMIT_ID:0:7}](https://github.com/pytorch/vision/commit/${TORCHVISION_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          echo -e "[${TORCHAUDIO_COMMIT_ID:0:7}](https://github.com/pytorch/audio/commit/${TORCHAUDIO_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt
-          printf "Device | OS | GCC | Python | Driver(DKMS) | Kernel | Bundle(DPCPP)\n--- | --- | --- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt
-          echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | rolling-$DRIVER_VERSION |$KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt
-          if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
-            test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}"
-            if [ "${{ inputs.model }}" != "" ];then
-              test_scope+="; model=${{ inputs.model }}"
-            fi
-            echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt
-          fi
-          echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt
-          echo "$cc_comment" >> ${{ github.workspace }}/report.txt
-          # Report
-          report_txt=$(cat ${{ github.workspace }}/report.txt)
-          gh --repo $repo issue comment $test_issue_id --body "$report_txt"
diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml
deleted file mode 100644
index 23f5456f28..0000000000
--- a/.github/workflows/nightly_ondemand_whl.yml
+++ /dev/null
@@ -1,396 +0,0 @@
-name: Torch Nightly WHL Tests
-
-on:
-  schedule:
-    # GMT+8 21:00 every workday
-    - cron: '0 14 * * 0-4'
-    # GMT+8 0:00 Saturday
-    - cron: '0 17 * * 5'
-  workflow_dispatch:
-    inputs:
-      pytorch:
-        required: false
-        type: string
-        default: 'nightly'
-        description: Pytorch branch/commit
-      ut:
-        required: false
-        type: string
-        default: 'torch_xpu'
-        description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,torch_xpu,xpu_profiling`. Delimiter is comma
-      suite:
-        required: true
-        type: string
-        default: 'huggingface'
-        description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
-      dt:
-        required: true
-        type: string
-        default: 'float32'
-        description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma
-      mode:
-        required: true
-        type: string
-        default: 'inference'
-        description: Test mode. `inference,training`. Delimiter is comma
-      scenario:
-        required: true
-        type: string
-        default: 'accuracy'
-        description: Test scenario. `accuracy,performance`. Delimiter is comma
-      model:
-        required: false
-        type: string
-        default: ''
-        description: Model. Will only run this one mode if set
-      python:
-        required: false
-        type: string
-        default: '3.10'
-        description: Python version
-
-permissions: read-all
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.ut }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }}
-  cancel-in-progress: ${{ github.event_name != 'schedule' }}
-
-jobs:
-  Linux-Nightly-Ondemand-UT-WHL-Tests:
-    if: ${{ (github.event_name == 'schedule' || inputs.ut != '') && github.repository_owner == 'intel' }}
-    uses: ./.github/workflows/_linux_ut.yml
-    with:
-      ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      pytorch: nightly_wheel
-      runner: linux.idc.xpu
-
-  Linux-Nightly-Ondemand-E2E-WHL-Tests:
-    runs-on: pvc_e2e
-    if: ${{ github.repository_owner == 'intel' }}
-    timeout-minutes: 3600
-    permissions:
-      issues: write
-    env:
-      GH_TOKEN: ${{ github.token }}
-      reference_issue: 1645
-      pytorch: ${{ github.event_name == 'schedule' && 'nightly' || inputs.pytorch }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      run_type: ${{ (github.event_name == 'schedule' && (github.event.schedule == '0 17 * * 5' && 'weekly' || 'nightly')) || 'on-demand' }}
-    outputs:
-      TORCH_BRANCH_ID: ${{ steps.installed.outputs.TORCH_BRANCH_ID }}
-      TORCH_COMMIT_ID: ${{ steps.installed.outputs.TORCH_COMMIT_ID }}
-      TORCH_XPU_OPS_COMMIT: ${{ steps.installed.outputs.TORCH_XPU_OPS_COMMIT }}
-      TORCHBENCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCHBENCH_COMMIT_ID }}
-      TORCHVISION_COMMIT_ID: ${{ steps.pinned.outputs.TORCHVISION_COMMIT_ID }}
-      TORCHAUDIO_COMMIT_ID: ${{ steps.pinned.outputs.TORCHAUDIO_COMMIT_ID }}
-      TRANSFORMERS_VERSION: ${{ steps.pinned.outputs.TRANSFORMERS_VERSION }}
-      TIMM_COMMIT_ID: ${{ steps.pinned.outputs.TIMM_COMMIT_ID }}
-      TRITON_COMMIT_ID: ${{ steps.pinned.outputs.TRITON_COMMIT_ID }}
-      DRIVER_VERSION: ${{ steps.pinned.outputs.DRIVER_VERSION }}
-      KERNEL_VERSION: ${{ steps.pinned.outputs.KERNEL_VERSION }}
-      BUNDLE_VERSION: ${{ steps.pinned.outputs.BUNDLE_VERSION }}
-      OS_PRETTY_NAME: ${{ steps.pinned.outputs.OS_PRETTY_NAME }}
-      GCC_VERSION: ${{ steps.pinned.outputs.GCC_VERSION }}
-      TIMEOUT_MODELS: ${{ steps.summary.outputs.TIMEOUT_MODELS }}
-    steps:
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-      - name: Prepare Conda ENV
-        run: |
-          which conda && conda clean -ay
-          conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci
-          conda create -n e2e_ci python=${{ env.python }} cmake ninja -y
-          source activate e2e_ci
-          pip install pandas scipy psutil requests
-      - name: Prepare Stock Pytorch
-        id: installed
-        run: |
-          pwd
-          cd ../
-          source activate e2e_ci
-          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          echo "TORCH_BRANCH_ID=$(python -c 'import torch; print(torch.__version__)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCH_COMMIT_ID=${TORCH_COMMIT_ID}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          rm -rf pytorch || sudo rm -rf pytorch
-          git clone https://github.com/pytorch/pytorch pytorch
-          cd pytorch && git checkout ${TORCH_COMMIT_ID}
-          # apply PRs for stock pytorch
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-          git status && git diff && git show -s
-      - name: Identify pinned versions
-        id: pinned
-        run: |
-          source activate e2e_ci
-          source .github/scripts/env.sh
-          echo "TORCHVISION_COMMIT_ID=$(python -c 'import torchvision; print(torchvision.version.git_version)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCHAUDIO_COMMIT_ID=$(python -c 'import torchaudio; print(torchaudio.version.git_version)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TRITON_COMMIT_ID=$(python -c 'import triton; print(triton.__version__)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          cd ../pytorch
-          echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "BUNDLE_VERSION=$(pip list |grep cmplr |head -n 1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          . /etc/os-release
-          echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo ${GITHUB_ENV}
-      - name: Show GITHUB_ENV
-        run: |
-          echo "$GITHUB_ENV"
-          rm -rf ../pytorch/inductor_log || sudo rm -rf ../pytorch/inductor_log
-          rm -rf /tmp/torchinductor_* || sudo rm -rf /tmp/torchinductor_*
-          rm -rf ~/.triton/cache || sudo rm -rf ~/.triton/cache
-
-      # Nihglty launch
-      - name: Nightly Huggingface FP32/BF16/FP16 Inference & Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: huggingface
-          env_prepare: true
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy
-          pytorch: nightly_wheel
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Nightly Torchbench BF16 Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: torchbench
-          dt: bfloat16
-          mode: training
-          scenario: accuracy
-          pytorch: nightly_wheel
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Nightly Timm_models FP16 Training Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: timm_models
-          dt: float16
-          mode: training
-          scenario: accuracy
-          pytorch: nightly_wheel
-          env_prepare: true
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Nightly PT2E Accuracy Test
-        if: ${{ env.run_type == 'nightly' }}
-        uses: ./.github/actions/pt2e
-        with:
-          dt: float32,int8
-          scenario: accuracy,performance
-          pytorch: nightly_wheel
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          env_prepare: true
-
-      # Weekly launch
-      - name: Weekly Huggingface Full Test
-        if: ${{ env.run_type == 'weekly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: huggingface
-          env_prepare: true
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-          pytorch: nightly_wheel
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Weekly Torchbench Full Test
-        if: ${{ env.run_type == 'weekly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: torchbench
-          env_prepare: true
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-          pytorch: nightly_wheel
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Weekly Timm_models Full Test
-        if: ${{ env.run_type == 'weekly' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: timm_models
-          env_prepare: true
-          dt: float32,bfloat16,float16,amp_bf16,amp_fp16
-          mode: inference,training
-          scenario: accuracy,performance
-          pytorch: nightly_wheel
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Weekly PT2E Accuracy Test
-        if: ${{ env.run_type == 'weekly' }}
-        uses: ./.github/actions/pt2e
-        with:
-          env_prepare: true
-          dt: float32,int8
-          scenario: accuracy,performance
-          pytorch: nightly_wheel
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-
-      # On-demand launch
-      - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-        if: ${{ github.event_name != 'schedule' && inputs.suite != 'pt2e' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
-        with:
-          suite: ${{ inputs.suite }}
-          env_prepare: true
-          dt: ${{ inputs.dt }}
-          mode: ${{ inputs.mode }}
-          scenario: ${{ inputs.scenario }}
-          pytorch: nightly_wheel
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-        if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }}
-        uses: ./.github/actions/pt2e
-        with:
-          env_prepare: true
-          dt: ${{ inputs.dt }}
-          scenario: ${{ inputs.scenario }}
-          pytorch: nightly_wheel
-          hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      - name: Download Reference Artifact
-        id: reference_id
-        run: |
-          set -xe
-          source activate e2e_ci
-          conda install gh --channel conda-forge -y
-          if [ "${{ env.run_type }}" == "on-demand" ];then
-            artifact_type="weekly"
-          else
-            artifact_type="${{ env.run_type }}"
-          fi
-          REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
-            --json body -q .body |grep "Inductor-${artifact_type}-Pre-XPU-E2E" |sed 's/.*: *//')"
-          gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
-          rm -rf reference && mv Inductor-*-XPU-E2E-* reference
-      - name: Summarize archieve files
-        id: summary
-        if: ${{ ! cancelled() }}
-        run: |
-          set -x -e -o pipefail
-          rm -rf ${{ github.workspace }}/upload_files
-          cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
-          mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
-          find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
-          tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
-          # Print summary
-          if [ "${{ inputs.suite }}" != 'pt2e' ];then
-            source activate e2e_ci
-            bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
-                ${{ github.workspace }}/upload_files \
-                ${{ github.workspace }}/reference \
-            >> ${GITHUB_STEP_SUMMARY}
-            exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
-            if [ ${exit_label} -ne 0 ];then
-              grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
-              echo "There are ${exit_label} cases that need look into!!! Please check them"
-              exit ${exit_label}
-            fi
-          fi
-          pt2e_summary_csv="$(find ${{ github.workspace }}/upload_files/ -name "summary.csv")"
-          if [ -f "${pt2e_summary_csv}" ];then
-            cat ${pt2e_summary_csv}
-            failed_num=$(grep ',failed' ${pt2e_summary_csv} |wc -l)
-            if [ ${failed_num} -ne 0 ];then
-              echo "[Warning] PT2E has failures!"
-            fi
-          fi
-      - name: Upload Inductor XPU E2E Data
-        if: ${{ ! cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: Inductor-${{ env.run_type }}-Pre-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/upload_files
-      - name: Upload Reference Run ID
-        if: ${{ env.run_type != 'on-demand' }}
-        run: |
-          gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body | \
-            sed "s/Inductor-${{ env.run_type }}-Pre-XPU-E2E:.*/Inductor-${{ env.run_type }}-Pre-XPU-E2E: ${GITHUB_RUN_ID}/" | sed '/^$/d' > new_body.txt
-          gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt
-
-  Tests-Failure-And-Report:
-    if: ${{ ! cancelled() }}
-    runs-on: [ self-hosted, Linux ]
-    permissions:
-      issues: write
-    env:
-      GH_TOKEN: ${{ github.token }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-    needs: Linux-Nightly-Ondemand-E2E-WHL-Tests
-    steps:
-      - name: Report github issue for XPU OPS nightly
-        if: github.repository_owner == 'intel'
-        run: |
-          set -xe
-          # Test env
-          build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-          repo="${{ github.repository }}"
-          TORCH_BRANCH_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCH_BRANCH_ID }}"
-          TORCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCH_COMMIT_ID }}"
-          TORCH_XPU_OPS_COMMIT="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCH_XPU_OPS_COMMIT }}"
-          DRIVER_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.DRIVER_VERSION }}"
-          KERNEL_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.KERNEL_VERSION }}"
-          BUNDLE_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.BUNDLE_VERSION }}"
-          OS_PRETTY_NAME="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.OS_PRETTY_NAME }}"
-          GCC_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.GCC_VERSION }}"
-          TORCHBENCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCHBENCH_COMMIT_ID }}"
-          TORCHVISION_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCHVISION_COMMIT_ID }}"
-          TORCHAUDIO_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCHAUDIO_COMMIT_ID }}"
-          TRANSFORMERS_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TRANSFORMERS_VERSION }}"
-          TIMM_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TIMM_COMMIT_ID }}"
-          TRITON_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TRITON_COMMIT_ID }}"
-          TIMEOUT_MODELS="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TIMEOUT_MODELS }}"
-          # Test status
-          if [ "${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.result }}" == "success" ];then
-            test_status=Success
-          elif [ "${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.result }}" == "failure" ];then
-            test_status=Failure
-            cc_comment="CC ${{ secrets.NIGHTLY_EMAIL_LIST }}"
-          else
-            test_status=None
-            exit 0
-          fi
-          # Test Type
-          if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
-            test_type="On-demand"
-            test_issue_id=426
-            cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}"
-          elif [ "${{ github.event.schedule }}" == "0 17 * * 5" ];then
-            test_type="Weekly"
-            test_issue_id=432
-          else
-            test_type="Nightly"
-            test_issue_id=432
-          fi
-          # Test report
-          echo -e "**${test_status}** $test_type WHL Test on $(date +'%F'), See: $build_url\n" > ${{ github.workspace }}/report.txt
-          printf "Torch-xpu-ops | PyTorch | Triton\n--- | --- | ---\n${TORCH_XPU_OPS_COMMIT:0:7} on pinned | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCH_COMMIT_ID:0:7}](https://github.com/pytorch/pytorch/commit/${TORCH_COMMIT_ID:0:7}) on $TORCH_BRANCH_ID | " >> ${{ github.workspace }}/report.txt
-          echo -e "[${TRITON_COMMIT_ID:0:7}](https://github.com/intel/intel-xpu-backend-for-triton/commit/${TRITON_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt
-          printf "Transformers | Timm | Torchbench | Torchvision | Torchaudio\n--- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt
-          printf "[${TRANSFORMERS_VERSION:0:7}](https://github.com/huggingface/transformers/commit/${TRANSFORMERS_VERSION:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TIMM_COMMIT_ID:0:7}](https://github.com/huggingface/pytorch-image-models/commit/${TIMM_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCHBENCH_COMMIT_ID:0:7}](https://github.com/pytorch/benchmark/commit/${TORCHBENCH_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          printf "[${TORCHVISION_COMMIT_ID:0:7}](https://github.com/pytorch/vision/commit/${TORCHVISION_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt
-          echo -e "[${TORCHAUDIO_COMMIT_ID:0:7}](https://github.com/pytorch/audio/commit/${TORCHAUDIO_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt
-          printf "Device | OS | GCC | Python | Driver(DKMS) | Kernel | Bundle(DPCPP)\n--- | --- | --- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt
-          echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION |$KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt
-          if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
-            test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}"
-            if [ "${{ inputs.model }}" != "" ];then
-              test_scope+="; model=${{ inputs.model }}"
-            fi
-            echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt
-          fi
-          echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt
-          echo "$cc_comment" >> ${{ github.workspace }}/report.txt
-          # Report
-          report_txt=$(cat ${{ github.workspace }}/report.txt)
-          gh --repo $repo issue comment $test_issue_id --body "$report_txt"

From 46d00c8cc3761b9207a39dff4d583a709422bc0c Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 09:16:44 +0800
Subject: [PATCH 052/160] update

---
 .github/workflows/_linux_build.yml        |  42 ++++-
 .github/workflows/_linux_e2e.yml          |   3 +-
 .github/workflows/_linux_op_benchmark.yml | 204 +++++++++++++---------
 .github/workflows/_linux_ut.yml           |   5 +-
 .github/workflows/nightly_ondemand.yml    |  18 +-
 .github/workflows/pull.yml                |   4 +-
 6 files changed, 183 insertions(+), 93 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 776c03a59e..7fd29a97b4 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -44,15 +44,46 @@ jobs:
     steps:
       - name: Use ${{ inputs.pytorch }}
         run: echo 'Use ${{ inputs.pytorch }}'
-  build:
+  get_build_runner:
     if: ${{ ! contains(inputs.test_type, 'wheel') }}
     runs-on: ${{ inputs.runner }}
+    outputs:
+      test_host: ${{ steps.runner-info.outputs.test_host }}
+      test_user: ${{ steps.runner-info.outputs.test_user }}
+      test_group: ${{ steps.runner-info.outputs.test_group }}
+    steps:
+      - name: Get runner info
+        id: runner-info
+        run: |
+          # get test runner
+          echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT}
+          echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
+          echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
+          # show host info
+          cat /etc/os-release
+          uname -a
+          source /opt/intel/oneapi/setvars.sh
+          sycl-ls
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+      - name: Cleanup workspace
+        if: ${{ always() }}
+        run: |
+          # clean docker cache
+          docker stop $(docker ps -aq) || true
+          docker system prune -af || true
+          # clean files
+          ls -al
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
+  build:
+    needs: get_build_runner
+    runs-on: ${{ needs.get_build_runner.outputs.test_host }}
     container:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
+      options: -u ${{ needs.get_build_runner.outputs.test_user }}:${{ needs.get_build_runner.outputs.test_group }}
       env:
-        PATH: /opt/xpu-build/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+        PATH: /tmp/xpu-build/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
         GH_TOKEN: ${{ github.token }}
     timeout-minutes: 300
     steps:
@@ -67,7 +98,7 @@ jobs:
           dnf install gh --repo gh-cli -y
           # Setup python
           local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv /opt/xpu-build
+          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-build
           which python && python -V && pip list
           pip install -U pip wheel setuptools
       - name: Checkout torch-xpu-ops
@@ -93,7 +124,7 @@ jobs:
           fi
           # oneAPI DLE
           if [ "${{ inputs.oneapi }}" != "installed" ];then
-            rm -rf ~/intel ~/.intel /opt/intel
+            rm -rf ${HOME}/intel ${HOME}/.intel
             wget -q -O oneapi.sh "${{ inputs.oneapi }}"
             bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
             export XPU_ONEAPI_PATH="${HOME}/intel/oneapi"
@@ -146,6 +177,7 @@ jobs:
           cp dist/*.whl ${{ github.workspace }}
       - name: Torch Config
         run: |
+          printenv
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
@@ -153,6 +185,8 @@ jobs:
           python -c "import torchvision; print(torchvision.__version__)"
           python -c "import torchaudio; print(torchaudio.__version__)"
           python pytorch/torch/utils/collect_env.py
+          pip list |grep -E 'torch|intel'
+
       - name: Upload Torch XPU Wheel
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 0980acd111..47c8218e39 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -90,7 +90,7 @@ jobs:
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
               -u ${{ needs.get_e2e_runner.outputs.test_user }}:${{ needs.get_e2e_runner.outputs.test_group }}
       env:
-        AGENT_TOOLSDIRECTORY: /opt/_tools
+        AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         MODEL_ONLY_NAME: ${{ inputs.model }}
@@ -167,7 +167,6 @@ jobs:
           python -c "import triton; print(triton.__version__)"
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
 
       # CICD launch
       - name: Nightly Huggingface BF16 & FP16 Training Test
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 144bf04fb7..0588acec27 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -3,112 +3,157 @@ name: Linux OP Benchmark Test
 on:
   workflow_call:
     inputs:
+      runner:
+        required: true
+        type: string
+        default: 'pvc_rolling'
+        description: Runner label
+      test_type:
+        type: string
+        default: 'build-from-source'
+        description: Build from source or install nightly wheel
       pytorch:
-        required: false
         type: string
         default: 'main'
-        description: Pytorch branch/commit
-      keep_torch_xpu_ops:
-        required: false
+        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      torch_xpu_ops:
         type: string
-        default: 'false'
-        description: Keep torch-xpu-ops pin. `true` means use pined commit
-      triton:
-        required: false
+        default: 'main'
+        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
+      oneapi:
         type: string
-        default: ''
-        description: Triton commit. Use pytorch pined commit by default
+        default: 'installed'
+        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
       python:
-        required: false
         type: string
         default: '3.10'
         description: Python version
-      runner:
-        required: true
-        type: string
-        default: 'linux.idc.xpu'
-        description: Runner label
-      driver:
-        required: false
-        type: string
-        default: 'rolling'
-        description: Driver lts/rolling
 
-permissions: 
-  issues: write
+permissions: read-all
 
 jobs:
+  get_op_runner:
+    runs-on: ${{ inputs.runner }}
+    outputs:
+      test_host: ${{ steps.runner-info.outputs.test_host }}
+      test_user: ${{ steps.runner-info.outputs.test_user }}
+      test_group: ${{ steps.runner-info.outputs.test_group }}
+    steps:
+      - name: Get runner info
+        id: runner-info
+        run: |
+          # get test runner
+          echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT}
+          echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
+          echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
+          # show host info
+          cat /etc/os-release
+          uname -a
+          source /opt/intel/oneapi/setvars.sh
+          sycl-ls
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+      - name: Cleanup workspace
+        if: ${{ always() }}
+        run: |
+          # clean docker cache
+          docker stop $(docker ps -aq) || true
+          docker system prune -af || true
+          # clean files
+          ls -al
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
   op_benchmark_test:
-    runs-on: ${{ inputs.runner }} 
+    needs: get_op_runner
+    runs-on: ${{ needs.get_op_runner.outputs.test_host }}
+    permissions: 
+      issues: write
     timeout-minutes: 900
-    env:
-      GH_TOKEN: ${{ github.token }}
-      reference_issue: 1689
-      NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-      DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
+    container:
+      image: mengfeili/intel-pvc-driver:1146-1136
+      volumes:
+        - ${{ github.workspace }}:${{ github.workspace }}
+      options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
+              -u ${{ needs.get_op_runner.outputs.test_user }}:${{ needs.get_op_runner.outputs.test_group }}
+      env:
+        AGENT_TOOLSDIRECTORY: /opt/_tools
+        GH_TOKEN: ${{ github.token }}
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+        REFERENCE_ISSUE: 1689
+    defaults:
+      run:
+        shell: bash -xe {0}
     steps:
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
+      - name: Check runner
+        run: |
+          ls -al
+          find ./ |grep -v "^\./$" |xargs rm -rf
+          hostname && whoami && id
+          clinfo --list
+          gcc -v && g++ -v
+          which python && which pip
+          python -V
+          pip install -U pip wheel setuptools
+          pip list
+          uname -a
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+          pip install pandas psutil scipy requests
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Prepare Stock Pytorch
+      - name: Install oneAPI DLE
+        if: ${{ inputs.oneapi != 'installed' }}
         run: |
-          pwd
-          which conda && conda clean -ay
-          conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
-                rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
-          conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y
-          source activate xpu_op_${ZE_AFFINITY_MASK}
-          cd ../ && rm -rf pytorch
-          pip install requests
-          git clone https://github.com/pytorch/pytorch pytorch
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
-            # apply PRs for stock pytorch
-            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-            git status && git show -s
-            git submodule sync && git submodule update --init --recursive
-            if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
-              echo "Don't replace torch-xpu-ops!"
-            else
-              rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
-              # Workaround for torch-xpu-ops ci test
-              sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
-            fi
-          fi
+          rm -rf ~/intel ~/.intel
+          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
       - name: Download Pytorch wheel
-        if: ${{ inputs.pytorch != 'nightly_wheel' }}
+        if: ${{ ! contains(inputs.test_type, 'wheel') }}
         uses: actions/download-artifact@v4
         with:
-          name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}
-      - name: Install Pytorch XPU
+          pattern: Torch-XPU-Wheel-*
+      - name: Prepare Stock Pytorch
         run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            cd ../pytorch
-            export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
-            pip install -r requirements.txt
-            pip install --force-reinstall ${{ github.workspace }}/torch*.whl
-            git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
-          else
+          # install pytorch
+          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
             pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-            TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
+          else
+            pip install --force-reinstall ${{ github.workspace }}/*.whl
           fi
-          pip install -r .ci/docker/requirements-ci.txt
+          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
+          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
+            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
+          else
+            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
+          fi
+          git clone ${PYTORCH_REPO} pytorch
+          cd pytorch
+          git checkout ${TORCH_COMMIT_ID}
+          # apply extra PRs for stock pytorch
+          if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
+            python ../.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
+          else
+            python ../.github/scripts/apply_torch_pr.py
+          fi
+          git status && git diff && git show -s
       - name: Torch Config
         run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
+          printenv
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-
-          cd ..
+          python -c "import triton; print(triton.__version__)"
           python pytorch/torch/utils/collect_env.py
-          rm -rf /tmp/torchinductor_*
-          rm -rf ~/.triton/cache
+          pip list |grep -E 'torch|intel'
+
       - name: Run Torch XPU Op Benchmark
-        if: ${{ inputs.driver == 'rolling' }} 
         run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
           mkdir -p ${{ github.workspace }}/op_benchmark
           cd test/microbench
           filename=$(find -- *.py)
@@ -124,15 +169,12 @@ jobs:
         continue-on-error: true
         id: reference_id
         run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
-          conda install gh --channel conda-forge -y
-          REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
+          REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE} \
             --json body -q .body |grep "Inductor-XPU-OP-Benchmark-Data" |sed 's/.*: *//')"
           gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-XPU-OP-Benchmark-Data-*"
           rm -rf ${GITHUB_WORKSPACE:-"/tmp"}/reference
           mkdir ${GITHUB_WORKSPACE:-"/tmp"}/reference
           mv Inductor-XPU-OP-Benchmark-Data-*/* ${GITHUB_WORKSPACE:-"/tmp"}/reference
-
           mkdir ${{ github.workspace }}/baseline
           if [[ -f "${GITHUB_WORKSPACE:-"/tmp"}/reference/new_baseline/baseline_forward_op_summary.csv" ]]; then
             cp ${GITHUB_WORKSPACE:-"/tmp"}/reference/new_baseline/baseline_forward_op_summary.csv ${{ github.workspace }}/baseline
@@ -143,7 +185,6 @@ jobs:
           fi
       - name: Check the OP Regression
         run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
           pip install tabulate
           # Compare forward op
           python ${{ github.workspace }}/.github/scripts/op_perf_comparison.py --xpu_file ${{ github.workspace }}/op_benchmark/forward_op_summary.csv --baseline_file ${{ github.workspace }}/baseline/baseline_forward_op_summary.csv
@@ -151,7 +192,6 @@ jobs:
           python ${{ github.workspace }}/.github/scripts/op_perf_comparison.py --xpu_file ${{ github.workspace }}/op_benchmark/backward_op_summary.csv --baseline_file ${{ github.workspace }}/baseline/baseline_backward_op_summary.csv
       - name: Update OP Baseline
         run: |
-          source activate xpu_op_${ZE_AFFINITY_MASK}
           mkdir ${{ github.workspace }}/new_baseline
           cp ${{ github.workspace }}/baseline/baseline*.csv ${{ github.workspace }}/new_baseline
           # Update forward op
@@ -167,6 +207,6 @@ jobs:
           path: ${{ github.workspace }}/op_benchmark
       - name: Upload Reference Run ID
         run: |
-          gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body | \
+          gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE} --json body -q .body | \
             sed "s/Inductor-XPU-OP-Benchmark-Data:.*/Inductor-XPU-OP-Benchmark-Data: ${GITHUB_RUN_ID}/" | sed '/^$/d' > new_body.txt
-          gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt
+          gh --repo ${GITHUB_REPOSITORY} issue edit ${REFERENCE_ISSUE} --body-file new_body.txt
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 260203acfd..56b8da5901 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -81,7 +81,7 @@ jobs:
               -u ${{ needs.get_ut_runner.outputs.test_user }}:${{ needs.get_ut_runner.outputs.test_group }}
               -e ZE_AFFINITY_MASK
       env:
-        AGENT_TOOLSDIRECTORY: /opt/_tools
+        AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
     env:
@@ -302,6 +302,7 @@ jobs:
           python -c "import triton; print(triton.__version__)"
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
+
       - name: Run XPU UT Test
         if: ${{ matrix.test.condition }}
         run: |
@@ -503,7 +504,7 @@ jobs:
               TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
             fi
           fi
-          if [ "${{ inputs.torch_xpu_ops }}" == "cicd" ];then
+          if [ "${{ inputs.test_type }}" == "cicd" ];then
             cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
           else
             git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 7b1c0bb685..4dacb40ba7 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -127,7 +127,7 @@ jobs:
       runner: linux.idc.xpu
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
-      torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
+      torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
@@ -150,12 +150,26 @@ jobs:
       scenario: ${{ github.event_name == 'schedule' && 'accuracy' || inputs.scenario }}
       model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
 
+  Linux-Nightly-Ondemand-OP-Microbench-Tests-Rolling:
+    if: ${{ github.event_name == 'schedule' }}
+    name: linux-nightly-ondemand-rolling / Op_microbench
+    permissions: write-all
+    needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
+    uses: ./.github/workflows/_linux_op_benchmark.yml
+    with:
+      runner: pvc_rolling
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
+      pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
+      torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
+      oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
+      python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
+
   Windows-Nightly-Ondemand-UT-Tests:
     if: ${{ github.event_name == 'schedule' }}
     name: Windows-nightly-ondemand
     uses: ./.github/workflows/_windows_ut.yml
     with:
-      torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.torch_xpu_ops }}
+      torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       src_changed: false
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 90a11b020e..5387eb415e 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -99,8 +99,10 @@ jobs:
     secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
+      runner: pvc_rolling
+      test_type: build-cicd
       pytorch: main
-      runner: pvc_e2e
+      torch_xpu_ops: cicd
 
   preci-linux-ut:
     name: preci-linux

From e3949d81221d98517bfcc2c8698d27122bee7308 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 10:22:53 +0800
Subject: [PATCH 053/160] update

---
 .github/workflows/_linux_e2e.yml          |  2 ++
 .github/workflows/_linux_op_benchmark.yml | 23 ++---------------------
 .github/workflows/_linux_ut.yml           |  5 +++++
 .github/workflows/nightly_ondemand.yml    |  1 -
 4 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 47c8218e39..a89c01d8b0 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -164,6 +164,8 @@ jobs:
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import torchvision; print(torchvision.__version__)"
+          python -c "import torchaudio; print(torchaudio.__version__)"
           python -c "import triton; print(triton.__version__)"
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 0588acec27..763d358677 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -16,10 +16,6 @@ on:
         type: string
         default: 'main'
         description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
-      torch_xpu_ops:
-        type: string
-        default: 'main'
-        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
       oneapi:
         type: string
         default: 'installed'
@@ -126,30 +122,15 @@ jobs:
           else
             pip install --force-reinstall ${{ github.workspace }}/*.whl
           fi
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
-            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-          else
-            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-          fi
-          git clone ${PYTORCH_REPO} pytorch
-          cd pytorch
-          git checkout ${TORCH_COMMIT_ID}
-          # apply extra PRs for stock pytorch
-          if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
-            python ../.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
-          else
-            python ../.github/scripts/apply_torch_pr.py
-          fi
-          git status && git diff && git show -s
       - name: Torch Config
         run: |
           printenv
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import torchvision; print(torchvision.__version__)"
+          python -c "import torchaudio; print(torchaudio.__version__)"
           python -c "import triton; print(triton.__version__)"
-          python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
 
       - name: Run Torch XPU Op Benchmark
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 56b8da5901..195fd48b96 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -299,6 +299,8 @@ jobs:
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import torchvision; print(torchvision.__version__)"
+          python -c "import torchaudio; print(torchaudio.__version__)"
           python -c "import triton; print(triton.__version__)"
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
@@ -518,9 +520,12 @@ jobs:
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import torchvision; print(torchvision.__version__)"
+          python -c "import torchaudio; print(torchaudio.__version__)"
           python -c "import triton; print(triton.__version__)"
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
+
       - name: Set Ptrace_scope
         if: ${{ always() }}
         run: |
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 4dacb40ba7..226828471e 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -169,7 +169,6 @@ jobs:
     name: Windows-nightly-ondemand
     uses: ./.github/workflows/_windows_ut.yml
     with:
-      torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       src_changed: false

From 3f692136fc6af6225a0f3d6c5ced7738956cc7c2 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 13:34:39 +0800
Subject: [PATCH 054/160] update

---
 .github/workflows/_linux_build.yml        | 43 +++++++++++------------
 .github/workflows/_linux_e2e.yml          |  1 +
 .github/workflows/_linux_op_benchmark.yml |  2 ++
 .github/workflows/_linux_ut.yml           |  2 ++
 4 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 7fd29a97b4..00f12c003b 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -44,6 +44,7 @@ jobs:
     steps:
       - name: Use ${{ inputs.pytorch }}
         run: echo 'Use ${{ inputs.pytorch }}'
+
   get_build_runner:
     if: ${{ ! contains(inputs.test_type, 'wheel') }}
     runs-on: ${{ inputs.runner }}
@@ -74,7 +75,9 @@ jobs:
           # clean files
           ls -al
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
-  build:
+          sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
+
+  build_torch:
     needs: get_build_runner
     runs-on: ${{ needs.get_build_runner.outputs.test_host }}
     container:
@@ -83,22 +86,27 @@ jobs:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: -u ${{ needs.get_build_runner.outputs.test_user }}:${{ needs.get_build_runner.outputs.test_group }}
       env:
-        PATH: /tmp/xpu-build/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
         GH_TOKEN: ${{ github.token }}
+        AGENT_TOOLSDIRECTORY: /tmp/_tools
     timeout-minutes: 300
+    defaults:
+      run:
+        shell: bash -xe {0}
     steps:
-      - name: Setup based env
+      - name: Setup gh
+        uses: actions4gh/setup-gh@v1
+      - name: Setup gcc
+        uses: Dup4/actions-setup-gcc@v1
+        with:
+          version: 11
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
+      - name: Clean workspace
         run: |
           # Cleanup workspace
-          rm -rf ./*
-          # Install gh
-          dnf install 'dnf-command(config-manager)'
-          dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
-          dnf autoremove -y git236* && dnf install -y git
-          dnf install gh --repo gh-cli -y
-          # Setup python
-          local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-build
+          find ./ |grep -v "^\./$" |xargs rm -rf
           which python && python -V && pip list
           pip install -U pip wheel setuptools
       - name: Checkout torch-xpu-ops
@@ -107,7 +115,6 @@ jobs:
           path: torch-xpu-ops
       - name: Build Pytorch XPU
         run: |
-          set -xe -o pipefail
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
             PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
@@ -130,8 +137,6 @@ jobs:
             export XPU_ONEAPI_PATH="${HOME}/intel/oneapi"
           fi
           source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
-          # gcc 11
-          source /opt/rh/gcc-toolset-11/enable
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
             --WORKSPACE="${{ github.workspace }}" \
             --PYTORCH_REPO="${PYTORCH_REPO}" \
@@ -141,9 +146,6 @@ jobs:
             2>&1 |tee ${{ github.workspace }}/build_pytorch_${PYTORCH_COMMIT//\//-}.log
       - name: Build Triton
         run: |
-          # gcc 13
-          dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
-          source /opt/rh/gcc-toolset-13/enable
           cd ./pytorch
           pip install cmake ninja pybind11
           rm -rf pytorch_triton_xpu-*.whl
@@ -199,8 +201,3 @@ jobs:
         with:
           name: Torch-XPU-Build-Log-${{ github.event.pull_request.number || github.sha }}
           path: ${{ github.workspace }}/build_*.log
-      - name: Cleanup
-        if: always()
-        run: |
-          chmod 777 . -R
-          rm -rf ./*
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index a89c01d8b0..e3861703bf 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -78,6 +78,7 @@ jobs:
           # clean files
           ls -al
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
+          sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
 
   e2e_test:
     runs-on: ${{ needs.get_e2e_runner.outputs.test_host }}
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 763d358677..89a42fbbfe 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -57,6 +57,8 @@ jobs:
           # clean files
           ls -al
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
+          sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
+
   op_benchmark_test:
     needs: get_op_runner
     runs-on: ${{ needs.get_op_runner.outputs.test_host }}
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 195fd48b96..8a3a27c017 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -68,6 +68,8 @@ jobs:
           # clean files
           ls -al
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
+          sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
+
   ut_test:
     needs: get_ut_runner
     runs-on: ${{ needs.get_ut_runner.outputs.test_host }}

From e8b015a7eb47e94e94b025ef2a8d251570f499fb Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 13:50:18 +0800
Subject: [PATCH 055/160] update

---
 .github/workflows/_linux_build.yml        | 10 +++---
 .github/workflows/_linux_e2e.yml          | 10 +++---
 .github/workflows/_linux_op_benchmark.yml | 10 +++---
 .github/workflows/_linux_ut.yml           | 16 ++++-----
 .github/workflows/pull.yml                | 40 +++++++++++------------
 5 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 00f12c003b..9a403caa97 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -45,7 +45,7 @@ jobs:
       - name: Use ${{ inputs.pytorch }}
         run: echo 'Use ${{ inputs.pytorch }}'
 
-  get_build_runner:
+  get_runner:
     if: ${{ ! contains(inputs.test_type, 'wheel') }}
     runs-on: ${{ inputs.runner }}
     outputs:
@@ -77,14 +77,14 @@ jobs:
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
           sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
 
-  build_torch:
-    needs: get_build_runner
-    runs-on: ${{ needs.get_build_runner.outputs.test_host }}
+  build:
+    needs: get_runner
+    runs-on: ${{ needs.get_runner.outputs.test_host }}
     container:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: -u ${{ needs.get_build_runner.outputs.test_user }}:${{ needs.get_build_runner.outputs.test_group }}
+      options: -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
       env:
         GH_TOKEN: ${{ github.token }}
         AGENT_TOOLSDIRECTORY: /tmp/_tools
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index e3861703bf..3b56454362 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -49,7 +49,7 @@ on:
 permissions: read-all
 
 jobs:
-  get_e2e_runner:
+  get_runner:
     runs-on: ${{ inputs.runner }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
@@ -81,15 +81,15 @@ jobs:
           sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
 
   e2e_test:
-    runs-on: ${{ needs.get_e2e_runner.outputs.test_host }}
-    needs: get_e2e_runner
+    runs-on: ${{ needs.get_runner.outputs.test_host }}
+    needs: get_runner
     timeout-minutes: 3600
     container:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.get_e2e_runner.outputs.test_user }}:${{ needs.get_e2e_runner.outputs.test_group }}
+              -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
       env:
         AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
@@ -302,7 +302,7 @@ jobs:
           name: Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
           path: ${{ github.workspace }}/upload_files
 
-  e2e_summary:
+  summary:
     runs-on: [self-hosted, Linux, X64]
     if: ${{ always() }}
     needs: e2e_test
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 89a42fbbfe..8ae2f349c9 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -28,7 +28,7 @@ on:
 permissions: read-all
 
 jobs:
-  get_op_runner:
+  get_runner:
     runs-on: ${{ inputs.runner }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
@@ -59,9 +59,9 @@ jobs:
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
           sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
 
-  op_benchmark_test:
-    needs: get_op_runner
-    runs-on: ${{ needs.get_op_runner.outputs.test_host }}
+  op_benchmark:
+    needs: get_runner
+    runs-on: ${{ needs.get_runner.outputs.test_host }}
     permissions: 
       issues: write
     timeout-minutes: 900
@@ -70,7 +70,7 @@ jobs:
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.get_op_runner.outputs.test_user }}:${{ needs.get_op_runner.outputs.test_group }}
+              -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
       env:
         AGENT_TOOLSDIRECTORY: /opt/_tools
         GH_TOKEN: ${{ github.token }}
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 8a3a27c017..76db53209c 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -39,7 +39,7 @@ on:
 permissions: read-all
 
 jobs:
-  get_ut_runner:
+  get_runner:
     runs-on: ${{ inputs.runner }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
@@ -71,8 +71,8 @@ jobs:
           sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
 
   ut_test:
-    needs: get_ut_runner
-    runs-on: ${{ needs.get_ut_runner.outputs.test_host }}
+    needs: get_runner
+    runs-on: ${{ needs.get_runner.outputs.test_host }}
     if: ${{ inputs.ut != 'xpu_distributed' && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 300
     container:
@@ -80,7 +80,7 @@ jobs:
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.get_ut_runner.outputs.test_user }}:${{ needs.get_ut_runner.outputs.test_group }}
+              -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
               -e ZE_AFFINITY_MASK
       env:
         AGENT_TOOLSDIRECTORY: /tmp/_tools
@@ -354,7 +354,7 @@ jobs:
         run: |
           echo "UT_NAME=${{ matrix.test.name }}" >> $GITHUB_OUTPUT
   
-  ut_test_results_check:
+  ut_summary:
     needs: ut_test
     runs-on: ubuntu-22.04
     timeout-minutes: 30
@@ -424,7 +424,7 @@ jobs:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}-checked
           path: ${{ github.workspace }}/ut_log
 
-  distributed_ut_test:
+  distributed:
     runs-on: pytorch-06
     if: ${{ contains(inputs.ut, 'xpu_distributed') && !contains(inputs.disabled_tests, 'disable_distribute') }}
     timeout-minutes: 60
@@ -562,8 +562,8 @@ jobs:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-xpu_distributed
           path: ${{ github.workspace }}/ut_log
 
-  distributed_ut_test_results_check:
-    needs: distributed_ut_test
+  distributed_summary:
+    needs: distributed
     runs-on: ubuntu-22.04
     timeout-minutes: 30
     env:
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 5387eb415e..9e804a27ee 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -51,8 +51,8 @@ jobs:
           export CLANG=1
           bash third_party/torch-xpu-ops/.github/scripts/lintrunner.sh
 
-  preci-conditions-filter:
-    name: preci-conditions-filter
+  conditions-filter:
+    name: conditions-filter
     if: ${{ github.event.pull_request.draft == false }}
     needs: [preci-lint-check]
     runs-on: ubuntu-22.04
@@ -92,10 +92,10 @@ jobs:
           disabled_tests="$(awk '/disable_/{printf("%s ", $0)}' pr-info.txt)"
           echo "disabled_tests=${disabled_tests}" |tee "${GITHUB_OUTPUT}"
 
-  preci-linux-build:
-    name: preci-linux
-    if: ${{ !contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_all')}}
-    needs: [preci-conditions-filter]
+  linux-build:
+    name: linux
+    if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all')}}
+    needs: [conditions-filter]
     secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
@@ -104,35 +104,35 @@ jobs:
       pytorch: main
       torch_xpu_ops: cicd
 
-  preci-linux-ut:
-    name: preci-linux
-    needs: [preci-conditions-filter, preci-linux-build]
+  linux-ut:
+    name: linux
+    needs: [conditions-filter, linux-build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
       test_type: build-cicd
       pytorch: main
       ut: op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_distributed
-      disabled_tests: ${{ needs.preci-conditions-filter.outputs.disabled_tests }}
+      disabled_tests: ${{ needs.conditions-filter.outputs.disabled_tests }}
 
-  preci-linux-e2e:
-    if: ${{ !contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
-    name: preci-linux / e2e_test
+  linux-e2e:
+    if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
+    name: linux
     permissions: write-all
-    needs: [preci-conditions-filter, preci-linux-build]
+    needs: [conditions-filter, linux-build]
     uses: ./.github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
       pytorch: main
 
-  preci-windows:
-    name: preci-windows
-    if: ${{ !(contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_win')) }} 
-    needs: [preci-conditions-filter]
+  windows:
+    name: windows
+    if: ${{ !(contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.conditions-filter.outputs.disabled_tests, 'disable_win')) }} 
+    needs: [conditions-filter]
     uses: ./.github/workflows/_windows_ut.yml
     with: 
       ut: op_extended,torch_xpu
       runner: Windows_CI
-      src_changed: ${{ needs.preci-conditions-filter.outputs.src_changed }}
-      has_label: ${{ needs.preci-conditions-filter.outputs.has_label }}
+      src_changed: ${{ needs.conditions-filter.outputs.src_changed }}
+      has_label: ${{ needs.conditions-filter.outputs.has_label }}

From bbd82cdefbd61ca2495f329b293a4b29ade68b66 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 14:09:03 +0800
Subject: [PATCH 056/160] update

---
 .github/workflows/_linux_build.yml | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 9a403caa97..9e341222f1 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -37,16 +37,7 @@ on:
 permissions: read-all
 
 jobs:
-  wheel:
-    if: ${{ contains(inputs.test_type, 'wheel') }}
-    name: ${{ inputs.pytorch }}
-    runs-on: ubuntu-latest
-    steps:
-      - name: Use ${{ inputs.pytorch }}
-        run: echo 'Use ${{ inputs.pytorch }}'
-
   get_runner:
-    if: ${{ ! contains(inputs.test_type, 'wheel') }}
     runs-on: ${{ inputs.runner }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
@@ -78,7 +69,9 @@ jobs:
           sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
 
   build:
+    name: ${{ contains(inputs.test_type, 'wheel') && inputs.pytorch || 'build' }}
     needs: get_runner
+    if: ${{ ! contains(inputs.test_type, 'wheel') }}
     runs-on: ${{ needs.get_runner.outputs.test_host }}
     container:
       image: 'pytorch/manylinux2_28-builder:xpu-main'

From c144babd219575fd5c064235a992648fc8bae1b0 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 14:29:44 +0800
Subject: [PATCH 057/160] get runner

---
 .github/actions/get-runner/action.yml     | 39 +++++++++++++++++++++++
 .github/workflows/_linux_build.yml        | 24 ++------------
 .github/workflows/_linux_e2e.yml          | 24 ++------------
 .github/workflows/_linux_op_benchmark.yml | 24 ++------------
 .github/workflows/_linux_ut.yml           | 24 ++------------
 5 files changed, 47 insertions(+), 88 deletions(-)
 create mode 100644 .github/actions/get-runner/action.yml

diff --git a/.github/actions/get-runner/action.yml b/.github/actions/get-runner/action.yml
new file mode 100644
index 0000000000..853f30e6bd
--- /dev/null
+++ b/.github/actions/get-runner/action.yml
@@ -0,0 +1,39 @@
+name: Get Runner Infos
+
+on:
+  workflow_call:
+
+permissions: read-all
+
+runs:
+  using: composite
+  steps:
+    - name: Get runner
+      shell: bash -xe {0}
+      run: |
+        # get test runner
+        echo "test_host=$(echo ${RUNNER_NAME} |sed 's/\-[0-9]$//')" |tee -a ${GITHUB_OUTPUT}
+        echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
+        echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
+        # show host info
+        lscpu
+        free -h
+        cat /etc/os-release
+        uname -a
+        gcc -v && g++ -v
+        source /opt/intel/oneapi/setvars.sh
+        sycl-ls
+        icpx -v
+        dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+        cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
+        df -h
+    - name: Cleanup host
+      if: ${{ always() }}
+      shell: bash -xe {0}
+      run: |
+        # clean docker cache
+        docker system prune -af || true
+        # clean files
+        ls -al
+        sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
+        sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 9e341222f1..b226fb9371 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -44,29 +44,9 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
-      - name: Get runner info
+      - name: Get runner
         id: runner-info
-        run: |
-          # get test runner
-          echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT}
-          echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
-          echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
-          # show host info
-          cat /etc/os-release
-          uname -a
-          source /opt/intel/oneapi/setvars.sh
-          sycl-ls
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-      - name: Cleanup workspace
-        if: ${{ always() }}
-        run: |
-          # clean docker cache
-          docker stop $(docker ps -aq) || true
-          docker system prune -af || true
-          # clean files
-          ls -al
-          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
-          sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
+        uses: .github/actions/get-runner
 
   build:
     name: ${{ contains(inputs.test_type, 'wheel') && inputs.pytorch || 'build' }}
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 3b56454362..682d22565b 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -56,29 +56,9 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
-      - name: Get runner info
+      - name: Get runner
         id: runner-info
-        run: |
-          # get test runner
-          echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT}
-          echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
-          echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
-          # show host info
-          cat /etc/os-release
-          uname -a
-          source /opt/intel/oneapi/setvars.sh
-          sycl-ls
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-      - name: Cleanup workspace
-        if: ${{ always() }}
-        run: |
-          # clean docker cache
-          docker stop $(docker ps -aq) || true
-          docker system prune -af || true
-          # clean files
-          ls -al
-          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
-          sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
+        uses: .github/actions/get-runner
 
   e2e_test:
     runs-on: ${{ needs.get_runner.outputs.test_host }}
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 8ae2f349c9..50ed2efc63 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -35,29 +35,9 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
-      - name: Get runner info
+      - name: Get runner
         id: runner-info
-        run: |
-          # get test runner
-          echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT}
-          echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
-          echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
-          # show host info
-          cat /etc/os-release
-          uname -a
-          source /opt/intel/oneapi/setvars.sh
-          sycl-ls
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-      - name: Cleanup workspace
-        if: ${{ always() }}
-        run: |
-          # clean docker cache
-          docker stop $(docker ps -aq) || true
-          docker system prune -af || true
-          # clean files
-          ls -al
-          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
-          sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
+        uses: .github/actions/get-runner
 
   op_benchmark:
     needs: get_runner
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 76db53209c..6692c3fad1 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -46,29 +46,9 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
-      - name: Get runner info
+      - name: Get runner
         id: runner-info
-        run: |
-          # get test runner
-          echo "test_host=${RUNNER_NAME%-*}" |tee -a ${GITHUB_OUTPUT}
-          echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
-          echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
-          # show host info
-          cat /etc/os-release
-          uname -a
-          source /opt/intel/oneapi/setvars.sh
-          sycl-ls
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-      - name: Cleanup workspace
-        if: ${{ always() }}
-        run: |
-          # clean docker cache
-          # docker stop $(docker ps -aq) || true
-          docker system prune -af || true
-          # clean files
-          ls -al
-          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
-          sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
+        uses: .github/actions/get-runner
 
   ut_test:
     needs: get_runner

From 40180c0021033539435e0dfd1751e1ab0887cea6 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 14:52:44 +0800
Subject: [PATCH 058/160] test env

---
 .github/actions/get-runner/action.yml     |  10 +-
 .github/actions/setup-testenv/action.yml  | 136 +++++++++++++++
 .github/workflows/_linux_e2e.yml          |  77 +--------
 .github/workflows/_linux_op_benchmark.yml |  60 +------
 .github/workflows/_linux_ut.yml           | 202 ++--------------------
 5 files changed, 168 insertions(+), 317 deletions(-)
 create mode 100644 .github/actions/setup-testenv/action.yml

diff --git a/.github/actions/get-runner/action.yml b/.github/actions/get-runner/action.yml
index 853f30e6bd..89dc1764d2 100644
--- a/.github/actions/get-runner/action.yml
+++ b/.github/actions/get-runner/action.yml
@@ -17,16 +17,10 @@ runs:
         echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
         # show host info
         lscpu
+        lshw -C display
         free -h
-        cat /etc/os-release
-        uname -a
-        gcc -v && g++ -v
-        source /opt/intel/oneapi/setvars.sh
-        sycl-ls
-        icpx -v
-        dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-        cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
         df -h
+        cat /etc/os-release
     - name: Cleanup host
       if: ${{ always() }}
       shell: bash -xe {0}
diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
new file mode 100644
index 0000000000..a51b8beb6f
--- /dev/null
+++ b/.github/actions/setup-testenv/action.yml
@@ -0,0 +1,136 @@
+name: Get Runner Infos
+
+on:
+  workflow_call:
+    inputs:
+      test_type:
+        required: true
+        type: string
+        description: Test scope
+      pytorch:
+        type: string
+        default: 'main'
+        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      torch_xpu_ops:
+        type: string
+        default: 'main'
+        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
+      oneapi:
+        type: string
+        default: 'installed'
+        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+      python:
+        type: string
+        default: '3.10'
+        description: Python version
+
+permissions: read-all
+
+runs:
+  using: composite
+  steps:
+      - name: Setup gh
+        uses: actions4gh/setup-gh@v1
+      - name: Setup gcc
+        uses: Dup4/actions-setup-gcc@v1
+        with:
+          version: 11
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
+      - name: Check runner
+        run: |
+          ls -al
+          find ./ |grep -v "^\./$" |xargs rm -rf
+          hostname && whoami && id
+          cat /etc/os-release
+          gcc -v && g++ -v
+          which python && which pip
+          python -V
+          pip install -U pip wheel setuptools
+          pip list
+          uname -a
+          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+          clinfo --list
+          cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
+        with:
+          path: torch-xpu-ops
+      - name: Install oneAPI DLE
+        if: ${{ inputs.oneapi != 'installed' }}
+        run: |
+          rm -rf ~/intel ~/.intel
+          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
+      - name: Download Pytorch wheel
+        if: ${{ ! contains(inputs.test_type, 'wheel') }}
+        uses: actions/download-artifact@v4
+        with:
+          pattern: Torch-XPU-Wheel-*
+      - name: Prepare Stock Pytorch
+        run: |
+          # install pytorch
+          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
+          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
+            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
+          else
+            pip install --force-reinstall ${{ github.workspace }}/*.whl
+          fi
+          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
+          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
+            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
+          else
+            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
+          fi
+          git clone ${PYTORCH_REPO} pytorch
+          cd pytorch
+          git checkout ${TORCH_COMMIT_ID}
+          pip install -r .ci/docker/requirements-ci.txt
+          # apply extra PRs for stock pytorch
+          if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
+            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
+          else
+            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
+          fi
+          git status && git diff && git show -s
+      - name: Prepare Torch-xpu-ops
+        if: ${{ inputs.torch_xpu_ops != 'skipped' }}
+        run: |
+          cd pytorch
+          rm -rf third_party/torch-xpu-ops
+          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
+            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
+            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
+          else
+            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
+            if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
+              TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
+            else
+              TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
+            fi
+          fi
+          if [ "${{ inputs.test_type }}" == "cicd" ];then
+            cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
+          else
+            git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
+          fi
+          cd third_party/torch-xpu-ops
+          git checkout ${TORCH_XPU_OPS_COMMIT}
+          git status && git diff && git show -s
+      - name: Torch Config
+        run: |
+          printenv
+          python -c "import torch; print(torch.__config__.show())"
+          python -c "import torch; print(torch.__config__.parallel_info())"
+          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import torchvision; print(torchvision.__version__)"
+          python -c "import torchaudio; print(torchaudio.__version__)"
+          python -c "import triton; print(triton.__version__)"
+          python pytorch/torch/utils/collect_env.py
+          pip list |grep -E 'torch|intel'
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 682d22565b..48e0dd915e 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -79,77 +79,14 @@ jobs:
       run:
         shell: bash -xe {0}
     steps:
-      - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ inputs.python }}
-      - name: Check runner
-        run: |
-          ls -al
-          find ./ |grep -v "^\./$" |xargs rm -rf
-          hostname && whoami && id
-          clinfo --list
-          gcc -v && g++ -v
-          which python && which pip
-          python -V
-          pip install -U pip wheel setuptools
-          pip list
-          uname -a
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-          pip install pandas psutil scipy requests
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-      - name: Install oneAPI DLE
-        if: ${{ inputs.oneapi != 'installed' }}
-        run: |
-          rm -rf ~/intel ~/.intel
-          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
-          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
-          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
-      - name: Download Pytorch wheel
-        if: ${{ ! contains(inputs.test_type, 'wheel') }}
-        uses: actions/download-artifact@v4
+      - name: Setup Test Env
+        uses: .github/actions/setup-testenv
         with:
-          pattern: Torch-XPU-Wheel-*
-      - name: Prepare Stock Pytorch
-        run: |
-          # install pytorch
-          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-          else
-            pip install --force-reinstall ${{ github.workspace }}/*.whl
-          fi
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
-            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-          else
-            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-          fi
-          git clone ${PYTORCH_REPO} pytorch
-          cd pytorch
-          git checkout ${TORCH_COMMIT_ID}
-          # apply extra PRs for stock pytorch
-          if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
-            python ../.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
-          else
-            python ../.github/scripts/apply_torch_pr.py
-          fi
-          git status && git diff && git show -s
-      - name: Torch Config
-        run: |
-          printenv
-          python -c "import torch; print(torch.__config__.show())"
-          python -c "import torch; print(torch.__config__.parallel_info())"
-          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-          python -c "import torchvision; print(torchvision.__version__)"
-          python -c "import torchaudio; print(torchaudio.__version__)"
-          python -c "import triton; print(triton.__version__)"
-          python pytorch/torch/utils/collect_env.py
-          pip list |grep -E 'torch|intel'
+          test_type: ${{ inputs.test_type }}
+          pytorch: ${{ inputs.pytorch }}
+          torch_xpu_ops: skipped
+          oneapi: ${{ inputs.oneapi }}
+          python: ${{ inputs.python }}
 
       # CICD launch
       - name: Nightly Huggingface BF16 & FP16 Training Test
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 50ed2efc63..582251fb80 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -60,60 +60,14 @@ jobs:
       run:
         shell: bash -xe {0}
     steps:
-      - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
+      - name: Setup Test Env
+        uses: .github/actions/setup-testenv
         with:
-          python-version: ${{ inputs.python }}
-      - name: Check runner
-        run: |
-          ls -al
-          find ./ |grep -v "^\./$" |xargs rm -rf
-          hostname && whoami && id
-          clinfo --list
-          gcc -v && g++ -v
-          which python && which pip
-          python -V
-          pip install -U pip wheel setuptools
-          pip list
-          uname -a
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-          pip install pandas psutil scipy requests
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-      - name: Install oneAPI DLE
-        if: ${{ inputs.oneapi != 'installed' }}
-        run: |
-          rm -rf ~/intel ~/.intel
-          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
-          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
-          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
-      - name: Download Pytorch wheel
-        if: ${{ ! contains(inputs.test_type, 'wheel') }}
-        uses: actions/download-artifact@v4
-        with:
-          pattern: Torch-XPU-Wheel-*
-      - name: Prepare Stock Pytorch
-        run: |
-          # install pytorch
-          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-          else
-            pip install --force-reinstall ${{ github.workspace }}/*.whl
-          fi
-      - name: Torch Config
-        run: |
-          printenv
-          python -c "import torch; print(torch.__config__.show())"
-          python -c "import torch; print(torch.__config__.parallel_info())"
-          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-          python -c "import torchvision; print(torchvision.__version__)"
-          python -c "import torchaudio; print(torchaudio.__version__)"
-          python -c "import triton; print(triton.__version__)"
-          pip list |grep -E 'torch|intel'
+          test_type: ${{ inputs.test_type }}
+          pytorch: ${{ inputs.pytorch }}
+          torch_xpu_ops: skipped
+          oneapi: ${{ inputs.oneapi }}
+          python: ${{ inputs.python }}
 
       - name: Run Torch XPU Op Benchmark
         run: |
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 6692c3fad1..57371e75ce 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -188,104 +188,14 @@ jobs:
     outputs: 
       ut_name: ${{ steps.set-output.outputs.UT_NAME || '' }}
     steps:
-      - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
+      - name: Setup Test Env
+        uses: .github/actions/setup-testenv
         with:
-          python-version: ${{ inputs.python }}
-      - name: Check runner
-        run: |
-          ls -al
-          find ./ |grep -v "^\./$" |xargs rm -rf
-          hostname && whoami && id
-          clinfo --list
-          gcc -v && g++ -v
-          which python && which pip
-          python -V
-          pip install -U pip wheel setuptools
-          pip list
-          pip install pytest pytest-timeout
-          uname -a
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-          pip install pandas psutil scipy requests
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-        with:
-          path: torch-xpu-ops
-      - name: Install oneAPI DLE
-        if: ${{ inputs.oneapi != 'installed' }}
-        run: |
-          rm -rf ~/intel ~/.intel
-          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
-          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
-          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
-      - name: Download Pytorch wheel
-        if: ${{ ! contains(inputs.test_type, 'wheel') }}
-        uses: actions/download-artifact@v4
-        with:
-          pattern: Torch-XPU-Wheel-*
-      - name: Prepare Stock Pytorch
-        run: |
-          # install pytorch
-          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-          else
-            pip install --force-reinstall ${{ github.workspace }}/*.whl
-          fi
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
-            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-          else
-            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-          fi
-          git clone ${PYTORCH_REPO} pytorch
-          cd pytorch
-          git checkout ${TORCH_COMMIT_ID}
-          pip install -r .ci/docker/requirements-ci.txt
-          # apply extra PRs for stock pytorch
-          if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
-            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
-          else
-            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-          fi
-          git status && git diff && git show -s
-      - name: Prepare Torch-xpu-ops
-        run: |
-          cd pytorch
-          rm -rf third_party/torch-xpu-ops
-          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
-            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
-            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
-          else
-            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
-            if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
-              TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
-            else
-              TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
-            fi
-          fi
-          if [ "${{ inputs.test_type }}" == "cicd" ];then
-            cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
-          else
-            git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
-          fi
-          cd third_party/torch-xpu-ops
-          git checkout ${TORCH_XPU_OPS_COMMIT}
-          git status && git diff && git show -s
-      - name: Torch Config
-        run: |
-          printenv
-          python -c "import torch; print(torch.__config__.show())"
-          python -c "import torch; print(torch.__config__.parallel_info())"
-          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-          python -c "import torchvision; print(torchvision.__version__)"
-          python -c "import torchaudio; print(torchaudio.__version__)"
-          python -c "import triton; print(triton.__version__)"
-          python pytorch/torch/utils/collect_env.py
-          pip list |grep -E 'torch|intel'
+          test_type: ${{ inputs.test_type }}
+          pytorch: ${{ inputs.pytorch }}
+          torch_xpu_ops: ${{ inputs.torch_xpu_ops }}
+          oneapi: ${{ inputs.oneapi }}
+          python: ${{ inputs.python }}
 
       - name: Run XPU UT Test
         if: ${{ matrix.test.condition }}
@@ -415,98 +325,18 @@ jobs:
       - name: Check runner
         run: |
           ls -al
-          find ./ |grep -v "^\./$" |xargs rm -rf
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
           sudo find /tmp/ |grep -v "^/tmp/$" |xargs sudo rm -rf
-          rm -rf ~/.triton ~/.torch
-          hostname && whoami && id
+          sudo rm -rf ~/.triton ~/.torch
           xpu-smi topology -m
-          gcc -v && g++ -v
-          uname -a
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-      - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ inputs.python }}
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
+      - name: Setup Test Env
+        uses: .github/actions/setup-testenv
         with:
-          path: torch-xpu-ops
-      - name: Install oneAPI DLE
-        if: ${{ inputs.oneapi != 'installed' }}
-        run: |
-          rm -rf ~/intel ~/.intel
-          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
-          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
-          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
-      - name: Download Pytorch wheel
-        if: ${{ ! contains(inputs.test_type, 'wheel') }}
-        uses: actions/download-artifact@v4
-        with:
-          pattern: Torch-XPU-Wheel-*
-      - name: Prepare Stock Pytorch
-        run: |
-          which python && which pip
-          python -V
-          pip install -U pip wheel setuptools
-          pip list
-          pip install pytest pytest-timeout
-          # install pytorch
-          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-          else
-            pip install --force-reinstall ${{ github.workspace }}/*.whl
-          fi
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
-            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-          else
-            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-          fi
-          git clone ${PYTORCH_REPO} pytorch
-          cd pytorch
-          git checkout ${TORCH_COMMIT_ID}
-          pip install -r .ci/docker/requirements-ci.txt
-          # apply extra PRs for stock pytorch
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-          git status && git diff && git show -s
-      - name: Prepare Torch-xpu-ops
-        run: |
-          cd pytorch
-          rm -rf third_party/torch-xpu-ops
-          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
-            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
-            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
-          else
-            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
-            if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
-              TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
-            else
-              TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
-            fi
-          fi
-          if [ "${{ inputs.test_type }}" == "cicd" ];then
-            cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
-          else
-            git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
-          fi
-          cd third_party/torch-xpu-ops
-          git checkout ${TORCH_XPU_OPS_COMMIT}
-          git status && git diff && git show -s
-      - name: Torch Config
-        run: |
-          printenv
-          python -c "import torch; print(torch.__config__.show())"
-          python -c "import torch; print(torch.__config__.parallel_info())"
-          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-          python -c "import torchvision; print(torchvision.__version__)"
-          python -c "import torchaudio; print(torchaudio.__version__)"
-          python -c "import triton; print(triton.__version__)"
-          python pytorch/torch/utils/collect_env.py
-          pip list |grep -E 'torch|intel'
+          test_type: ${{ inputs.test_type }}
+          pytorch: ${{ inputs.pytorch }}
+          torch_xpu_ops: ${{ inputs.torch_xpu_ops }}
+          oneapi: ${{ inputs.oneapi }}
+          python: ${{ inputs.python }}
 
       - name: Set Ptrace_scope
         if: ${{ always() }}

From 54ea2f0c5b37eba5c54d9f0f0a516d6539861db8 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 14:56:41 +0800
Subject: [PATCH 059/160] update

---
 .github/workflows/_linux_e2e.yml       | 28 +++++++++++++-------------
 .github/workflows/nightly_ondemand.yml | 10 ++++-----
 .github/workflows/pull.yml             |  8 ++++----
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 48e0dd915e..a9b36c2765 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -91,7 +91,7 @@ jobs:
       # CICD launch
       - name: Nightly Huggingface BF16 & FP16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: huggingface
@@ -100,7 +100,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: torchbench
@@ -109,7 +109,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: timm_models
@@ -120,7 +120,7 @@ jobs:
       # Nihglty launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: huggingface
@@ -129,7 +129,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: torchbench
@@ -138,7 +138,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models FP16 Training Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: timm_models
@@ -147,7 +147,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly PT2E Full Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/pt2e
+        uses: .github/actions/pt2e
         with:
           env_prepare: true
           dt: float32,int8
@@ -156,7 +156,7 @@ jobs:
       # Weekly launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: huggingface
@@ -165,7 +165,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: torchbench
@@ -174,7 +174,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models FP16 Training Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: timm_models
@@ -183,7 +183,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly PT2E Full Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/pt2e
+        uses: .github/actions/pt2e
         with:
           env_prepare: true
           dt: float32,int8
@@ -192,7 +192,7 @@ jobs:
       # On-demand launch
       - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
         if: ${{ contains(inputs.test_type, 'ondemand') && inputs.suite != 'pt2e' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: .github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: ${{ inputs.suite }}
@@ -201,7 +201,7 @@ jobs:
           scenario: ${{ inputs.scenario }}
       - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
         if: ${{ contains(inputs.test_type, 'ondemand') && contains(inputs.suite, 'pt2e') }}
-        uses: ./.github/actions/pt2e
+        uses: .github/actions/pt2e
         with:
           env_prepare: true
           dt: ${{ inputs.dt }}
@@ -271,7 +271,7 @@ jobs:
         run: |
           pip install pandas requests
           if [ "${{ inputs.suite }}" != 'pt2e' ];then
-            bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
+            bash .github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
             exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
             if [ ${exit_label} -ne 0 ];then
               grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 226828471e..3bedac1c96 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -108,7 +108,7 @@ jobs:
     needs: [Conditions-Filter]
     name: linux
     secrets: inherit
-    uses: ./.github/workflows/_linux_build.yml
+    uses: .github/workflows/_linux_build.yml
     with:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
@@ -122,7 +122,7 @@ jobs:
     if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'p') }}
     name: linux
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
-    uses: ./.github/workflows/_linux_ut.yml
+    uses: .github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
@@ -137,7 +137,7 @@ jobs:
     name: linux
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
-    uses: ./.github/workflows/_linux_e2e.yml
+    uses: .github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
@@ -155,7 +155,7 @@ jobs:
     name: linux-nightly-ondemand-rolling / Op_microbench
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
-    uses: ./.github/workflows/_linux_op_benchmark.yml
+    uses: .github/workflows/_linux_op_benchmark.yml
     with:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
@@ -167,7 +167,7 @@ jobs:
   Windows-Nightly-Ondemand-UT-Tests:
     if: ${{ github.event_name == 'schedule' }}
     name: Windows-nightly-ondemand
-    uses: ./.github/workflows/_windows_ut.yml
+    uses: .github/workflows/_windows_ut.yml
     with:
       ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 9e804a27ee..b17252b41a 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -97,7 +97,7 @@ jobs:
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all')}}
     needs: [conditions-filter]
     secrets: inherit
-    uses: ./.github/workflows/_linux_build.yml
+    uses: .github/workflows/_linux_build.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
@@ -107,7 +107,7 @@ jobs:
   linux-ut:
     name: linux
     needs: [conditions-filter, linux-build]
-    uses: ./.github/workflows/_linux_ut.yml
+    uses: .github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
       test_type: build-cicd
@@ -120,7 +120,7 @@ jobs:
     name: linux
     permissions: write-all
     needs: [conditions-filter, linux-build]
-    uses: ./.github/workflows/_linux_e2e.yml
+    uses: .github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
@@ -130,7 +130,7 @@ jobs:
     name: windows
     if: ${{ !(contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.conditions-filter.outputs.disabled_tests, 'disable_win')) }} 
     needs: [conditions-filter]
-    uses: ./.github/workflows/_windows_ut.yml
+    uses: .github/workflows/_windows_ut.yml
     with: 
       ut: op_extended,torch_xpu
       runner: Windows_CI

From 9b660b96d72b9359a63094c3830451965b0c3c55 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 14:59:38 +0800
Subject: [PATCH 060/160] Revert "update"

This reverts commit 54ea2f0c5b37eba5c54d9f0f0a516d6539861db8.
---
 .github/workflows/_linux_e2e.yml       | 28 +++++++++++++-------------
 .github/workflows/nightly_ondemand.yml | 10 ++++-----
 .github/workflows/pull.yml             |  8 ++++----
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index a9b36c2765..48e0dd915e 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -91,7 +91,7 @@ jobs:
       # CICD launch
       - name: Nightly Huggingface BF16 & FP16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: huggingface
@@ -100,7 +100,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: torchbench
@@ -109,7 +109,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: timm_models
@@ -120,7 +120,7 @@ jobs:
       # Nihglty launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: huggingface
@@ -129,7 +129,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: torchbench
@@ -138,7 +138,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models FP16 Training Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: timm_models
@@ -147,7 +147,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly PT2E Full Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: .github/actions/pt2e
+        uses: ./.github/actions/pt2e
         with:
           env_prepare: true
           dt: float32,int8
@@ -156,7 +156,7 @@ jobs:
       # Weekly launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: huggingface
@@ -165,7 +165,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: torchbench
@@ -174,7 +174,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models FP16 Training Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: timm_models
@@ -183,7 +183,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly PT2E Full Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: .github/actions/pt2e
+        uses: ./.github/actions/pt2e
         with:
           env_prepare: true
           dt: float32,int8
@@ -192,7 +192,7 @@ jobs:
       # On-demand launch
       - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
         if: ${{ contains(inputs.test_type, 'ondemand') && inputs.suite != 'pt2e' }}
-        uses: .github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/inductor-xpu-e2e-test
         with:
           env_prepare: true
           suite: ${{ inputs.suite }}
@@ -201,7 +201,7 @@ jobs:
           scenario: ${{ inputs.scenario }}
       - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
         if: ${{ contains(inputs.test_type, 'ondemand') && contains(inputs.suite, 'pt2e') }}
-        uses: .github/actions/pt2e
+        uses: ./.github/actions/pt2e
         with:
           env_prepare: true
           dt: ${{ inputs.dt }}
@@ -271,7 +271,7 @@ jobs:
         run: |
           pip install pandas requests
           if [ "${{ inputs.suite }}" != 'pt2e' ];then
-            bash .github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
+            bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
             exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
             if [ ${exit_label} -ne 0 ];then
               grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 3bedac1c96..226828471e 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -108,7 +108,7 @@ jobs:
     needs: [Conditions-Filter]
     name: linux
     secrets: inherit
-    uses: .github/workflows/_linux_build.yml
+    uses: ./.github/workflows/_linux_build.yml
     with:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
@@ -122,7 +122,7 @@ jobs:
     if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'p') }}
     name: linux
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
-    uses: .github/workflows/_linux_ut.yml
+    uses: ./.github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
@@ -137,7 +137,7 @@ jobs:
     name: linux
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
-    uses: .github/workflows/_linux_e2e.yml
+    uses: ./.github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
@@ -155,7 +155,7 @@ jobs:
     name: linux-nightly-ondemand-rolling / Op_microbench
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
-    uses: .github/workflows/_linux_op_benchmark.yml
+    uses: ./.github/workflows/_linux_op_benchmark.yml
     with:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
@@ -167,7 +167,7 @@ jobs:
   Windows-Nightly-Ondemand-UT-Tests:
     if: ${{ github.event_name == 'schedule' }}
     name: Windows-nightly-ondemand
-    uses: .github/workflows/_windows_ut.yml
+    uses: ./.github/workflows/_windows_ut.yml
     with:
       ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index b17252b41a..9e804a27ee 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -97,7 +97,7 @@ jobs:
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all')}}
     needs: [conditions-filter]
     secrets: inherit
-    uses: .github/workflows/_linux_build.yml
+    uses: ./.github/workflows/_linux_build.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
@@ -107,7 +107,7 @@ jobs:
   linux-ut:
     name: linux
     needs: [conditions-filter, linux-build]
-    uses: .github/workflows/_linux_ut.yml
+    uses: ./.github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
       test_type: build-cicd
@@ -120,7 +120,7 @@ jobs:
     name: linux
     permissions: write-all
     needs: [conditions-filter, linux-build]
-    uses: .github/workflows/_linux_e2e.yml
+    uses: ./.github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
@@ -130,7 +130,7 @@ jobs:
     name: windows
     if: ${{ !(contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.conditions-filter.outputs.disabled_tests, 'disable_win')) }} 
     needs: [conditions-filter]
-    uses: .github/workflows/_windows_ut.yml
+    uses: ./.github/workflows/_windows_ut.yml
     with: 
       ut: op_extended,torch_xpu
       runner: Windows_CI

From 7d025c0e6fcc850ae5ec0ac71338e7c83e2c30da Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 15:26:53 +0800
Subject: [PATCH 061/160] update

---
 .github/workflows/_linux_build.yml        | 2 +-
 .github/workflows/_linux_e2e.yml          | 4 ++--
 .github/workflows/_linux_op_benchmark.yml | 4 ++--
 .github/workflows/_linux_ut.yml           | 6 +++---
 .github/workflows/pull.yml                | 8 ++++----
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index b226fb9371..e7ca29b90e 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -46,7 +46,7 @@ jobs:
     steps:
       - name: Get runner
         id: runner-info
-        uses: .github/actions/get-runner
+        uses: ./.github/actions/get-runner
 
   build:
     name: ${{ contains(inputs.test_type, 'wheel') && inputs.pytorch || 'build' }}
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 48e0dd915e..cba621d0e0 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -58,7 +58,7 @@ jobs:
     steps:
       - name: Get runner
         id: runner-info
-        uses: .github/actions/get-runner
+        uses: ./.github/actions/get-runner
 
   e2e_test:
     runs-on: ${{ needs.get_runner.outputs.test_host }}
@@ -80,7 +80,7 @@ jobs:
         shell: bash -xe {0}
     steps:
       - name: Setup Test Env
-        uses: .github/actions/setup-testenv
+        uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 582251fb80..2e23483ad6 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -37,7 +37,7 @@ jobs:
     steps:
       - name: Get runner
         id: runner-info
-        uses: .github/actions/get-runner
+        uses: ./.github/actions/get-runner
 
   op_benchmark:
     needs: get_runner
@@ -61,7 +61,7 @@ jobs:
         shell: bash -xe {0}
     steps:
       - name: Setup Test Env
-        uses: .github/actions/setup-testenv
+        uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 57371e75ce..0824d0f0e0 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -48,7 +48,7 @@ jobs:
     steps:
       - name: Get runner
         id: runner-info
-        uses: .github/actions/get-runner
+        uses: ./.github/actions/get-runner
 
   ut_test:
     needs: get_runner
@@ -189,7 +189,7 @@ jobs:
       ut_name: ${{ steps.set-output.outputs.UT_NAME || '' }}
     steps:
       - name: Setup Test Env
-        uses: .github/actions/setup-testenv
+        uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
@@ -330,7 +330,7 @@ jobs:
           sudo rm -rf ~/.triton ~/.torch
           xpu-smi topology -m
       - name: Setup Test Env
-        uses: .github/actions/setup-testenv
+        uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 9e804a27ee..b17252b41a 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -97,7 +97,7 @@ jobs:
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all')}}
     needs: [conditions-filter]
     secrets: inherit
-    uses: ./.github/workflows/_linux_build.yml
+    uses: .github/workflows/_linux_build.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
@@ -107,7 +107,7 @@ jobs:
   linux-ut:
     name: linux
     needs: [conditions-filter, linux-build]
-    uses: ./.github/workflows/_linux_ut.yml
+    uses: .github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
       test_type: build-cicd
@@ -120,7 +120,7 @@ jobs:
     name: linux
     permissions: write-all
     needs: [conditions-filter, linux-build]
-    uses: ./.github/workflows/_linux_e2e.yml
+    uses: .github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
@@ -130,7 +130,7 @@ jobs:
     name: windows
     if: ${{ !(contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.conditions-filter.outputs.disabled_tests, 'disable_win')) }} 
     needs: [conditions-filter]
-    uses: ./.github/workflows/_windows_ut.yml
+    uses: .github/workflows/_windows_ut.yml
     with: 
       ut: op_extended,torch_xpu
       runner: Windows_CI

From dd23ceb6ef422419073fd7b266d0e75ca0b5d547 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 15:32:54 +0800
Subject: [PATCH 062/160] update

---
 .github/workflows/pull.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index b17252b41a..9e804a27ee 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -97,7 +97,7 @@ jobs:
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all')}}
     needs: [conditions-filter]
     secrets: inherit
-    uses: .github/workflows/_linux_build.yml
+    uses: ./.github/workflows/_linux_build.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
@@ -107,7 +107,7 @@ jobs:
   linux-ut:
     name: linux
     needs: [conditions-filter, linux-build]
-    uses: .github/workflows/_linux_ut.yml
+    uses: ./.github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
       test_type: build-cicd
@@ -120,7 +120,7 @@ jobs:
     name: linux
     permissions: write-all
     needs: [conditions-filter, linux-build]
-    uses: .github/workflows/_linux_e2e.yml
+    uses: ./.github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
@@ -130,7 +130,7 @@ jobs:
     name: windows
     if: ${{ !(contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.conditions-filter.outputs.disabled_tests, 'disable_win')) }} 
     needs: [conditions-filter]
-    uses: .github/workflows/_windows_ut.yml
+    uses: ./.github/workflows/_windows_ut.yml
     with: 
       ut: op_extended,torch_xpu
       runner: Windows_CI

From de4a432f0979ac44813054e5d2da880516fb34ba Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 17:01:53 +0800
Subject: [PATCH 063/160] update

---
 .github/actions/setup-testenv/action.yml |  8 +++-----
 .github/workflows/_linux_build.yml       | 13 ++++++++-----
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index a51b8beb6f..a26f543f45 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -30,11 +30,9 @@ runs:
   using: composite
   steps:
       - name: Setup gh
-        uses: actions4gh/setup-gh@v1
-      - name: Setup gcc
-        uses: Dup4/actions-setup-gcc@v1
-        with:
-          version: 11
+        run: |
+          curl -sS https://webi.sh/gh | sh
+          echo "PATH=${HOME}/.local/bin:$PATH" >> ${GITHUB_ENV}
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index e7ca29b90e..f83a6cc623 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -67,11 +67,9 @@ jobs:
         shell: bash -xe {0}
     steps:
       - name: Setup gh
-        uses: actions4gh/setup-gh@v1
-      - name: Setup gcc
-        uses: Dup4/actions-setup-gcc@v1
-        with:
-          version: 11
+        run: |
+          curl -sS https://webi.sh/gh | sh
+          echo "PATH=${HOME}/.local/bin:$PATH" >> ${GITHUB_ENV}
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
@@ -110,6 +108,8 @@ jobs:
             export XPU_ONEAPI_PATH="${HOME}/intel/oneapi"
           fi
           source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
+          # gcc 11
+          source /opt/rh/gcc-toolset-11/enable
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
             --WORKSPACE="${{ github.workspace }}" \
             --PYTORCH_REPO="${PYTORCH_REPO}" \
@@ -137,6 +137,9 @@ jobs:
           cp pytorch_triton_xpu-*.whl ${{ github.workspace }}
       - name: Build Torchvision and Torchaudio
         run: |
+          # gcc 13
+          dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
+          source /opt/rh/gcc-toolset-13/enable
           cd ./pytorch
           TORCHVISION_COMMIT_ID="$(cat .github/ci_commit_pins/vision.txt)"
           TORCHAUDIO_COMMIT_ID="$(cat .github/ci_commit_pins/audio.txt)"

From 65cc01a1ccd212dd673756b2445a0073f8d1ef89 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 17:16:50 +0800
Subject: [PATCH 064/160] remove useless inputs for op benchmark

---
 .github/workflows/nightly_ondemand.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 226828471e..3d6e8b9fe3 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -160,7 +160,6 @@ jobs:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
-      torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
 

From f727ef88174a1fe699e69492d051fed8439b0a45 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 17:23:50 +0800
Subject: [PATCH 065/160] checkout torch-xpu-ops

---
 .github/workflows/_linux_build.yml        | 2 ++
 .github/workflows/_linux_e2e.yml          | 4 ++++
 .github/workflows/_linux_op_benchmark.yml | 4 ++++
 .github/workflows/_linux_ut.yml           | 6 ++++++
 4 files changed, 16 insertions(+)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index f83a6cc623..ad45c3bf04 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -44,6 +44,8 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Get runner
         id: runner-info
         uses: ./.github/actions/get-runner
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index cba621d0e0..0118ad47d3 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -56,6 +56,8 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Get runner
         id: runner-info
         uses: ./.github/actions/get-runner
@@ -79,6 +81,8 @@ jobs:
       run:
         shell: bash -xe {0}
     steps:
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Setup Test Env
         uses: ./.github/actions/setup-testenv
         with:
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 2e23483ad6..d19e53de40 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -35,6 +35,8 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Get runner
         id: runner-info
         uses: ./.github/actions/get-runner
@@ -60,6 +62,8 @@ jobs:
       run:
         shell: bash -xe {0}
     steps:
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Setup Test Env
         uses: ./.github/actions/setup-testenv
         with:
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 0824d0f0e0..7ca9ccdec5 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -46,6 +46,8 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Get runner
         id: runner-info
         uses: ./.github/actions/get-runner
@@ -188,6 +190,8 @@ jobs:
     outputs: 
       ut_name: ${{ steps.set-output.outputs.UT_NAME || '' }}
     steps:
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Setup Test Env
         uses: ./.github/actions/setup-testenv
         with:
@@ -329,6 +333,8 @@ jobs:
           sudo find /tmp/ |grep -v "^/tmp/$" |xargs sudo rm -rf
           sudo rm -rf ~/.triton ~/.torch
           xpu-smi topology -m
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
       - name: Setup Test Env
         uses: ./.github/actions/setup-testenv
         with:

From 18ada97394af7461cac3c2ab54089534b91d5750 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 17:58:31 +0800
Subject: [PATCH 066/160] modify get runner

---
 .github/actions/get-runner/action.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.github/actions/get-runner/action.yml b/.github/actions/get-runner/action.yml
index 89dc1764d2..74544db7e4 100644
--- a/.github/actions/get-runner/action.yml
+++ b/.github/actions/get-runner/action.yml
@@ -3,6 +3,14 @@ name: Get Runner Infos
 on:
   workflow_call:
 
+outputs:
+  test_host:
+    value: ${{ steps.runner.outputs.test_host }}
+  test_user:
+    value: ${{ steps.runner.outputs.test_user }}
+  test_group:
+    value: ${{ steps.runner.outputs.test_group }}
+
 permissions: read-all
 
 runs:
@@ -10,6 +18,7 @@ runs:
   steps:
     - name: Get runner
       shell: bash -xe {0}
+      id: runner
       run: |
         # get test runner
         echo "test_host=$(echo ${RUNNER_NAME} |sed 's/\-[0-9]$//')" |tee -a ${GITHUB_OUTPUT}
@@ -21,6 +30,7 @@ runs:
         free -h
         df -h
         cat /etc/os-release
+        uname -a
     - name: Cleanup host
       if: ${{ always() }}
       shell: bash -xe {0}

From 018f96804e839cf7310f5ce0ca54c7a616ba07cc Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 22 Jul 2025 21:12:07 +0800
Subject: [PATCH 067/160] modify build

---
 .github/workflows/_linux_build.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index ad45c3bf04..0925e2a73c 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -119,6 +119,10 @@ jobs:
             --TORCH_XPU_OPS_REPO="${TORCH_XPU_OPS_REPO}" \
             --TORCH_XPU_OPS_COMMIT="${TORCH_XPU_OPS_COMMIT}" \
             2>&1 |tee ${{ github.workspace }}/build_pytorch_${PYTORCH_COMMIT//\//-}.log
+          if [ ! -f "${{ github.workspace }}/torch-*.whl" ];then
+            echo "Build pytorch got failed"
+            exit 1
+          fi
       - name: Build Triton
         run: |
           cd ./pytorch
@@ -135,6 +139,10 @@ jobs:
           )"
           python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME} \
             2>&1 |tee ${{ github.workspace }}/build_triton_${TRITON_COMMIT_ID}.log
+          if [ ! -f "${{ github.workspace }}/pytorch_triton_xpu-*.whl" ];then
+            echo "Build triton got failed"
+            exit 1
+          fi
           pip install pytorch_triton_xpu-*.whl
           cp pytorch_triton_xpu-*.whl ${{ github.workspace }}
       - name: Build Torchvision and Torchaudio
@@ -148,11 +156,19 @@ jobs:
           git clone --single-branch -b main https://github.com/pytorch/vision.git xpu-vision
           cd xpu-vision && git checkout ${TORCHVISION_COMMIT_ID}
           python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_vision_${TRITON_COMMIT_ID}.log
+          if [ ! -f "dist/*.whl" ];then
+            echo "Build torchvision got failed"
+            exit 1
+          fi
           pip install dist/*.whl
           cp dist/*.whl ${{ github.workspace }}
           git clone --single-branch -b main https://github.com/pytorch/audio.git xpu-audio
           cd xpu-audio && git checkout ${TORCHAUDIO_COMMIT_ID}
           python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_audio_${TRITON_COMMIT_ID}.log
+          if [ ! -f "dist/*.whl" ];then
+            echo "Build torchaudio got failed"
+            exit 1
+          fi
           pip install dist/*.whl
           cp dist/*.whl ${{ github.workspace }}
       - name: Torch Config

From 93fa1123ac255fa984ae7e81cbdf2d31a8108752 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 11:12:36 +0800
Subject: [PATCH 068/160] modify build

---
 .github/workflows/_linux_build.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 0925e2a73c..baba85b624 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -76,6 +76,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ inputs.python }}
+          check-latest: true
       - name: Clean workspace
         run: |
           # Cleanup workspace
@@ -184,7 +185,7 @@ jobs:
           pip list |grep -E 'torch|intel'
 
       - name: Upload Torch XPU Wheel
-        if: ${{ ! cancelled() }}
+        if: ${{ success() }}
         uses: actions/upload-artifact@v4
         with:
           name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}

From ad8cc67a9300cf2c076dd56d967ce92077ff313c Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 13:20:20 +0800
Subject: [PATCH 069/160] update

---
 .github/workflows/_linux_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index baba85b624..3294eb3c80 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -75,7 +75,7 @@ jobs:
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
-          python-version: ${{ inputs.python }}
+          python-version: 'pypy${{ inputs.python }}'
           check-latest: true
       - name: Clean workspace
         run: |

From 7c9d3a346169d3f9282c4cc0e1fdab358fd0e4d8 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 13:25:12 +0800
Subject: [PATCH 070/160] update

---
 .github/workflows/_linux_e2e.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 0118ad47d3..8320cba71c 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -286,7 +286,7 @@ jobs:
           pt2e_summary_csv="$(find ./target/ -name "summary.csv")"
           if [ -f "${pt2e_summary_csv}" ];then
             cat ${pt2e_summary_csv}
-            failed_num=$(grep ',failed' ${pt2e_summary_csv} |wc -l)
+            failed_num=$(grep -c ',failed' ${pt2e_summary_csv})
             if [ ${failed_num} -ne 0 ];then
               echo "[Warning] PT2E has failures!"
             fi
@@ -295,7 +295,7 @@ jobs:
         if: ${{ ! (contains(inputs.test_type, 'ondemand') && contains(inputs.test_type, 'cicd')) && github.repository_owner == 'intel' }}
         run: |
           gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee new_body.txt 2>&1
-          has_or_not="$(grep 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt |wc -l)"
+          has_or_not="$(grep -c 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt)"
           if [ ${has_or_not} -ne 0 ];then
             sed -i "s/Inductor-${{ inputs.test_type }}-LTS2:.*/Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}/" new_body.txt
           else

From 2fc3b8eb9439e781aa8ef6785aad85d0a60e1a44 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 15:56:52 +0800
Subject: [PATCH 071/160] update

---
 .github/workflows/_linux_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 3294eb3c80..03694ceb65 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -71,7 +71,6 @@ jobs:
       - name: Setup gh
         run: |
           curl -sS https://webi.sh/gh | sh
-          echo "PATH=${HOME}/.local/bin:$PATH" >> ${GITHUB_ENV}
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
@@ -89,6 +88,7 @@ jobs:
           path: torch-xpu-ops
       - name: Build Pytorch XPU
         run: |
+          export PATH=${HOME}/.local/bin:$PATH
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
             PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"

From 78cedbfa05e3f955f5744ebe8208167bba52111e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 16:44:35 +0800
Subject: [PATCH 072/160] update

---
 .github/workflows/_linux_build.yml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 03694ceb65..42f659de62 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -70,7 +70,11 @@ jobs:
     steps:
       - name: Setup gh
         run: |
+          hostname && id
           curl -sS https://webi.sh/gh | sh
+          echo "PATH=${HOME}/.local/bin:$PATH" |tee -a ${GITHUB_ENV}
+          source ~/.config/envman/PATH.env
+          gh --version
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
@@ -80,7 +84,8 @@ jobs:
         run: |
           # Cleanup workspace
           find ./ |grep -v "^\./$" |xargs rm -rf
-          which python && python -V && pip list
+          which python && python -V
+          which pip && pip list
           pip install -U pip wheel setuptools
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
@@ -88,7 +93,6 @@ jobs:
           path: torch-xpu-ops
       - name: Build Pytorch XPU
         run: |
-          export PATH=${HOME}/.local/bin:$PATH
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
             PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
@@ -103,6 +107,8 @@ jobs:
             TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
             TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
+          # gcc 11
+          source /opt/rh/gcc-toolset-11/enable
           # oneAPI DLE
           if [ "${{ inputs.oneapi }}" != "installed" ];then
             rm -rf ${HOME}/intel ${HOME}/.intel
@@ -111,8 +117,6 @@ jobs:
             export XPU_ONEAPI_PATH="${HOME}/intel/oneapi"
           fi
           source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
-          # gcc 11
-          source /opt/rh/gcc-toolset-11/enable
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
             --WORKSPACE="${{ github.workspace }}" \
             --PYTORCH_REPO="${PYTORCH_REPO}" \

From c6bc928ed934e88ce763828f956fdc86614cc447 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 16:50:22 +0800
Subject: [PATCH 073/160] update

---
 .github/workflows/_linux_build.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 42f659de62..5b9bf994c3 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -59,7 +59,6 @@ jobs:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
       env:
         GH_TOKEN: ${{ github.token }}
         AGENT_TOOLSDIRECTORY: /tmp/_tools

From 9765fac30e301772bb1be36e144f19f484e228a1 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 16:50:57 +0800
Subject: [PATCH 074/160] update

---
 .github/workflows/_linux_build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 5b9bf994c3..378cfc58c9 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -186,6 +186,7 @@ jobs:
           python -c "import torchaudio; print(torchaudio.__version__)"
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
+          chmod 777 . -R
 
       - name: Upload Torch XPU Wheel
         if: ${{ success() }}

From eda9634e3928daa89c35bdc3736f54bf4beee372 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 17:29:19 +0800
Subject: [PATCH 075/160] modify ut

---
 .github/workflows/_linux_build.yml |   6 +-
 .github/workflows/_linux_e2e.yml   |  12 +-
 .github/workflows/_linux_ut.yml    | 203 ++++++++++++++---------------
 .github/workflows/pull.yml         |   9 +-
 4 files changed, 111 insertions(+), 119 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 378cfc58c9..d24628b6d5 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -37,7 +37,7 @@ on:
 permissions: read-all
 
 jobs:
-  get_runner:
+  runner:
     runs-on: ${{ inputs.runner }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
@@ -52,9 +52,9 @@ jobs:
 
   build:
     name: ${{ contains(inputs.test_type, 'wheel') && inputs.pytorch || 'build' }}
-    needs: get_runner
+    needs: runner
     if: ${{ ! contains(inputs.test_type, 'wheel') }}
-    runs-on: ${{ needs.get_runner.outputs.test_host }}
+    runs-on: ${{ needs.runner.outputs.test_host }}
     container:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 8320cba71c..2c55ce60a7 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -49,7 +49,7 @@ on:
 permissions: read-all
 
 jobs:
-  get_runner:
+  runner:
     runs-on: ${{ inputs.runner }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
@@ -62,16 +62,16 @@ jobs:
         id: runner-info
         uses: ./.github/actions/get-runner
 
-  e2e_test:
-    runs-on: ${{ needs.get_runner.outputs.test_host }}
-    needs: get_runner
+  test:
+    runs-on: ${{ needs.runner.outputs.test_host }}
+    needs: runner
     timeout-minutes: 3600
     container:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
+              -u ${{ needs.runner.outputs.test_user }}:${{ needs.runner.outputs.test_group }}
       env:
         AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
@@ -226,7 +226,7 @@ jobs:
   summary:
     runs-on: [self-hosted, Linux, X64]
     if: ${{ always() }}
-    needs: e2e_test
+    needs: test
     permissions:
       issues: write
     container:
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 7ca9ccdec5..31391ff634 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -38,8 +38,12 @@ on:
 
 permissions: read-all
 
+defaults:
+  run:
+    shell: bash -xe {0}
+
 jobs:
-  get_runner:
+  runner:
     runs-on: ${{ inputs.runner }}
     outputs:
       test_host: ${{ steps.runner-info.outputs.test_host }}
@@ -52,9 +56,9 @@ jobs:
         id: runner-info
         uses: ./.github/actions/get-runner
 
-  ut_test:
-    needs: get_runner
-    runs-on: ${{ needs.get_runner.outputs.test_host }}
+  normal:
+    needs: runner
+    runs-on: ${{ needs.runner.outputs.test_host }}
     if: ${{ inputs.ut != 'xpu_distributed' && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 300
     container:
@@ -62,17 +66,12 @@ jobs:
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
+              -u ${{ needs.runner.outputs.test_user }}:${{ needs.runner.outputs.test_group }}
               -e ZE_AFFINITY_MASK
       env:
         AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-    env:
-      UT_NAME: ${{ matrix.test.name }}
-    defaults:
-      run:
-        shell: bash -xe {0}
     strategy:
       fail-fast: false
       matrix:
@@ -187,8 +186,8 @@ jobs:
                 tee $GITHUB_WORKSPACE/ut_log/profile_test/test_profiler_tree.log
             additional_steps: |
               mkdir -p ut_log/profile_test/issue_reproduce
-    outputs: 
-      ut_name: ${{ steps.set-output.outputs.UT_NAME || '' }}
+    env:
+      UT_NAME: ${{ matrix.test.name }}
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
@@ -200,11 +199,9 @@ jobs:
           torch_xpu_ops: ${{ inputs.torch_xpu_ops }}
           oneapi: ${{ inputs.oneapi }}
           python: ${{ inputs.python }}
-
       - name: Run XPU UT Test
         if: ${{ matrix.test.condition }}
         run: |
-          set -e
           mkdir -p ${{ github.workspace }}/ut_log
           mkdir -p ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
           echo "Running ${{ matrix.test.name }}"
@@ -213,7 +210,6 @@ jobs:
           cd ${{ matrix.test.directory }}
           if [[ "${{ matrix.test.name }}" == "op_ut" ]] || [[ "${{ matrix.test.name }}" == "xpu_profiling" ]] || [[ "${{ matrix.test.name }}" == "torch_xpu" ]]; then
             bash << "SCRIPT"
-            set -e
             ${{ matrix.test.command_script }}
           SCRIPT
           else
@@ -242,81 +238,74 @@ jobs:
         with:
           name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
           path: ${{ github.workspace }}/ut_log/ut_failure_list.csv
-      - name: Set UT outputs
-        id: set-output
-        if: ${{ matrix.test.condition }}
-        run: |
-          echo "UT_NAME=${{ matrix.test.name }}" >> $GITHUB_OUTPUT
-  
-  ut_summary:
-    needs: ut_test
-    runs-on: ubuntu-22.04
-    timeout-minutes: 30
-    env:
-      GH_TOKEN: ${{ github.token }}
-      UT_SKIP_ISSUE: 1624
+
+  devices:
+    runs-on: pvc_rolling
+    if: ${{ contains(inputs.ut, 'op_regression_dev1') && !contains(inputs.disabled_tests, 'disable_ut') }}
+    timeout-minutes: 5
     strategy:
-      fail-fast: false
       matrix:
-        test: 
-        - name: 'op_regression'
-          condition: ${{ contains(inputs.ut, 'op_regression') }}
-        - name: 'op_regression_dev1'
-          condition: ${{ contains(inputs.ut, 'op_regression_dev1') }}
-        - name: 'op_transformers'
-          condition: ${{ contains(inputs.ut, 'op_transformers') }}
-        - name: 'op_extended'
-          condition: ${{ contains(inputs.ut, 'op_extended') }}
-        - name: 'op_ut'
-          condition: ${{ contains(inputs.ut, 'op_ut') }}
-        - name: 'torch_xpu'
-          condition: ${{ contains(inputs.ut, 'torch_xpu') }}
-        - name: 'xpu_profiling'
-          condition: ${{ contains(inputs.ut, 'xpu_profiling') }}
+        test:
+          - name: 'op_regression_dev1'
+            condition: ${{ contains(inputs.ut, 'op_regression_dev1') }}
+            directory: 'pytorch/third_party/torch-xpu-ops/test/regressions'
+            command: 'pytest --timeout 600 -v test_operation_on_device_1.py --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml'
+            log_prefix: 'op_regression_dev1'
+            timeout: 3
+    env:
+      AGENT_TOOLSDIRECTORY: /tmp/_tools
+      GH_TOKEN: ${{ github.token }}
+      UT_NAME: ${{ matrix.test.name }}
     steps:
-      - name: Get matrix UT value
-        run: |
-          echo "UT_NAME=${{ needs.ut_test.outputs.ut_name }}" >> "${GITHUB_ENV}"
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Download XPU UT Logs
-        if: ${{ matrix.test.condition }}
-        uses: actions/download-artifact@v4
+      - name: Setup Test Env
+        uses: ./.github/actions/setup-testenv
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}
-          path: ${{ github.workspace }}/ut_log
-      - name: Check UT Results
+          test_type: ${{ inputs.test_type }}
+          pytorch: ${{ inputs.pytorch }}
+          torch_xpu_ops: ${{ inputs.torch_xpu_ops }}
+          oneapi: ${{ inputs.oneapi }}
+          python: ${{ inputs.python }}
+      - name: Run XPU UT Test
         if: ${{ matrix.test.condition }}
-        shell: bash
         run: |
-          repo="${{ github.repository }}"
-          function contains() {
-              contains_status="echo 'Start $2 ...'"
-              {
-                [[ $1 =~ (^|,)$2($|,) ]]
-              } || {
-                echo "[Warning] $2 is not suppotted type! Skipped!"
-                contains_status="continue"
-              }
-          }
-          set -xe
-          cd ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
-          gh --repo $repo issue view $UT_SKIP_ISSUE --json body -q .body | sed '/^$/d' > Known_issue.log
-          gh api "repos/${{ github.repository }}/issues?labels=skipped" \
-          --jq '.[] | select(.pull_request == null) | "Issue #\(.number): \(.title)\n\(.body)\n"' \
-          > issues.log
-          awk '/Cases:/ {flag=1; next} /^\|\||^$/ {flag=0} flag' issues.log | grep -Eo 'test[^[:space:]]+( \|\| [^[:space:]]+)?' | sed 's/ *|| */ /g' | sort -u > issues_temp.log
-          awk '$2 == "op_ut" {print $1}' issues_temp.log > issues_op_ut.log
-          cat issues_temp.log | awk '{print $1}' >> Known_issue.log
-          awk -F'::' '{print $1}' issues_op_ut.log | sort -u | paste -sd ',' >> Known_issue.log
-          cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
-          bash ut_result_check.sh ${{ matrix.test.name }}
+          mkdir -p ${{ github.workspace }}/ut_log
+          mkdir -p ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
+          echo "Running ${{ matrix.test.name }}"
+          echo "Directory: ${{ matrix.test.directory }}"
+          ${{ matrix.test.additional_steps }}
+          cd ${{ matrix.test.directory }}
+          if [[ "${{ matrix.test.name }}" == "op_ut" ]] || [[ "${{ matrix.test.name }}" == "xpu_profiling" ]] || [[ "${{ matrix.test.name }}" == "torch_xpu" ]]; then
+            bash << "SCRIPT"
+            ${{ matrix.test.command_script }}
+          SCRIPT
+          else
+            timeout ${{ matrix.test.timeout }} ${{ matrix.test.command }} \
+              2>${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test_error.log | \
+              tee ${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test.log
+            ${{ matrix.test.xml_post_processing || '' }}
+          fi
+      - name: UT Test Results Summary
+        if: ${{ matrix.test.condition }}
+        run: |
+          pip install junitparser
+          python torch-xpu-ops/.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
+          if [ -e "ut_failure_list.csv" ];then
+              cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv
+          fi
       - name: Upload Inductor XPU UT Log
         if: ${{ matrix.test.condition }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}-checked
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
           path: ${{ github.workspace }}/ut_log
+      - name: Upload XPU UT Failure list
+        if: ${{ matrix.test.condition }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
+          path: ${{ github.workspace }}/ut_log/ut_failure_list.csv
 
   distributed:
     runs-on: pytorch-06
@@ -343,7 +332,6 @@ jobs:
           torch_xpu_ops: ${{ inputs.torch_xpu_ops }}
           oneapi: ${{ inputs.oneapi }}
           python: ${{ inputs.python }}
-
       - name: Set Ptrace_scope
         if: ${{ always() }}
         run: |
@@ -378,53 +366,62 @@ jobs:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-xpu_distributed
           path: ${{ github.workspace }}/ut_log
 
-  distributed_summary:
-    needs: distributed
-    runs-on: ubuntu-22.04
+  summary:
+    needs: [normal, devices, distributed]
+    runs-on: ubuntu-latest
     timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        test: 
+        - name: 'op_regression'
+          condition: ${{ contains(inputs.ut, 'op_regression') }}
+        - name: 'op_regression_dev1'
+          condition: ${{ contains(inputs.ut, 'op_regression_dev1') }}
+        - name: 'op_transformers'
+          condition: ${{ contains(inputs.ut, 'op_transformers') }}
+        - name: 'op_extended'
+          condition: ${{ contains(inputs.ut, 'op_extended') }}
+        - name: 'op_ut'
+          condition: ${{ contains(inputs.ut, 'op_ut') }}
+        - name: 'torch_xpu'
+          condition: ${{ contains(inputs.ut, 'torch_xpu') }}
+        - name: 'xpu_profiling'
+          condition: ${{ contains(inputs.ut, 'xpu_profiling') }}
+        - name: 'xpu_distributed'
+          condition: ${{ contains(inputs.ut, 'xpu_distributed') }}
     env:
       GH_TOKEN: ${{ github.token }}
       UT_SKIP_ISSUE: 1624
+      UT_NAME: ${{ matrix.test.name }}
     steps:
-      - name: Set the UT name
-        run: |
-          echo "UT_NAME=$(echo ${{ inputs.ut }} |sed 's/,/-/g')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download XPU UT Logs
+        if: ${{ matrix.test.condition }}
         uses: actions/download-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-xpu_distributed
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}
           path: ${{ github.workspace }}/ut_log
       - name: Check UT Results
+        if: ${{ matrix.test.condition }}
         shell: bash
         run: |
           repo="${{ github.repository }}"
-          function contains() {
-              contains_status="echo 'Start $2 ...'"
-              {
-                [[ $1 =~ (^|,)$2($|,) ]]
-              } || {
-                echo "[Warning] $2 is not suppotted type! Skipped!"
-                contains_status="continue"
-              }
-          }
-          set -xe
-          echo "UT_NAME=$(echo ${{ inputs.ut }} |sed 's/,/-/g')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          cd ${{ github.workspace }}/ut_log/xpu_distributed
+          cd ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
           gh --repo $repo issue view $UT_SKIP_ISSUE --json body -q .body | sed '/^$/d' > Known_issue.log
           gh api "repos/${{ github.repository }}/issues?labels=skipped" \
-          --jq '.[] | select(.pull_request == null) | "Issue #\(.number): \(.title)\n\(.body)\n"' \
-          > issues.log
-          awk '/Cases:/ {flag=1; next} /^\|\||^$/ {flag=0} flag' issues.log | grep -Eo 'test[^[:space:]]+( \|\| [^[:space:]]+)?' | sed 's/ *|| */ /g' | sort -u > issues_temp.log
+            --jq '.[] | select(.pull_request == null) | "Issue #\(.number): \(.title)\n\(.body)\n"' > issues.log
+          awk '/Cases:/ {flag=1; next} /^\|\||^$/ {flag=0} flag' issues.log | \
+            grep -Eo 'test[^[:space:]]+( \|\| [^[:space:]]+)?' | sed 's/ *|| */ /g' | sort -u > issues_temp.log
           awk '$2 == "op_ut" {print $1}' issues_temp.log > issues_op_ut.log
           cat issues_temp.log | awk '{print $1}' >> Known_issue.log
           awk -F'::' '{print $1}' issues_op_ut.log | sort -u | paste -sd ',' >> Known_issue.log
           cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
-          bash ut_result_check.sh 'xpu_distributed'
+          bash ut_result_check.sh ${{ matrix.test.name }}
       - name: Upload Inductor XPU UT Log
-        if: always()
+        if: ${{ matrix.test.condition }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-xpu_distributed-checked
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}-checked
           path: ${{ github.workspace }}/ut_log
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 9e804a27ee..3a2c819e32 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -21,9 +21,8 @@ concurrency:
 
 jobs:
   preci-lint-check:
-    name: preci-lint-check
     if: ${{ github.repository_owner == 'intel' }}
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     timeout-minutes: 30
     steps:
       - name: Checkout torch-xpu-ops
@@ -52,10 +51,9 @@ jobs:
           bash third_party/torch-xpu-ops/.github/scripts/lintrunner.sh
 
   conditions-filter:
-    name: conditions-filter
     if: ${{ github.event.pull_request.draft == false }}
     needs: [preci-lint-check]
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     timeout-minutes: 10
     env:
       GH_TOKEN: ${{ github.token }}
@@ -93,7 +91,6 @@ jobs:
           echo "disabled_tests=${disabled_tests}" |tee "${GITHUB_OUTPUT}"
 
   linux-build:
-    name: linux
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all')}}
     needs: [conditions-filter]
     secrets: inherit
@@ -105,7 +102,6 @@ jobs:
       torch_xpu_ops: cicd
 
   linux-ut:
-    name: linux
     needs: [conditions-filter, linux-build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
@@ -117,7 +113,6 @@ jobs:
 
   linux-e2e:
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
-    name: linux
     permissions: write-all
     needs: [conditions-filter, linux-build]
     uses: ./.github/workflows/_linux_e2e.yml

From ec697f5398a5bbeb1609a52001f58bc2e5b4c07f Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 17:33:10 +0800
Subject: [PATCH 076/160] modify build

---
 .github/actions/setup-testenv/action.yml | 4 +++-
 .github/workflows/_linux_build.yml       | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index a26f543f45..1e4c100402 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -32,7 +32,9 @@ runs:
       - name: Setup gh
         run: |
           curl -sS https://webi.sh/gh | sh
-          echo "PATH=${HOME}/.local/bin:$PATH" >> ${GITHUB_ENV}
+          echo "PATH=${HOME}/.local/bin:${PATH}" |tee -a ${GITHUB_ENV}
+          source ~/.config/envman/PATH.env
+          gh --version
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index d24628b6d5..786e227e87 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -71,7 +71,7 @@ jobs:
         run: |
           hostname && id
           curl -sS https://webi.sh/gh | sh
-          echo "PATH=${HOME}/.local/bin:$PATH" |tee -a ${GITHUB_ENV}
+          echo "PATH=${HOME}/.local/bin:${PATH}" |tee -a ${GITHUB_ENV}
           source ~/.config/envman/PATH.env
           gh --version
       - name: Setup python-${{ inputs.python }}
@@ -92,6 +92,7 @@ jobs:
           path: torch-xpu-ops
       - name: Build Pytorch XPU
         run: |
+          source ~/.config/envman/PATH.env
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
             PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
@@ -187,7 +188,6 @@ jobs:
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
           chmod 777 . -R
-
       - name: Upload Torch XPU Wheel
         if: ${{ success() }}
         uses: actions/upload-artifact@v4

From c1e4ca740ed2ab461d01f0de851b1cc182967193 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 23 Jul 2025 17:56:54 +0800
Subject: [PATCH 077/160] modify build

---
 .github/workflows/_linux_build.yml | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 786e227e87..6572447f6f 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -36,6 +36,10 @@ on:
 
 permissions: read-all
 
+defaults:
+  run:
+    shell: bash -xe {0}
+
 jobs:
   runner:
     runs-on: ${{ inputs.runner }}
@@ -60,29 +64,22 @@ jobs:
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
+        PATH: /tmp/xpu-build/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/github/home/.local/bin
         GH_TOKEN: ${{ github.token }}
         AGENT_TOOLSDIRECTORY: /tmp/_tools
     timeout-minutes: 300
-    defaults:
-      run:
-        shell: bash -xe {0}
     steps:
       - name: Setup gh
         run: |
+          # Cleanup workspace
+          find ./ |grep -v "^\./$" |xargs rm -rf
           hostname && id
           curl -sS https://webi.sh/gh | sh
-          echo "PATH=${HOME}/.local/bin:${PATH}" |tee -a ${GITHUB_ENV}
-          source ~/.config/envman/PATH.env
           gh --version
       - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: 'pypy${{ inputs.python }}'
-          check-latest: true
-      - name: Clean workspace
         run: |
-          # Cleanup workspace
-          find ./ |grep -v "^\./$" |xargs rm -rf
+          local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
+          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-build
           which python && python -V
           which pip && pip list
           pip install -U pip wheel setuptools
@@ -92,7 +89,6 @@ jobs:
           path: torch-xpu-ops
       - name: Build Pytorch XPU
         run: |
-          source ~/.config/envman/PATH.env
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
             PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"

From 50e40fe290c7244e468f3e27bda5a2fd3699f748 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 09:31:10 +0800
Subject: [PATCH 078/160] modify build

---
 .github/actions/setup-testenv/action.yml | 11 ++---
 .github/workflows/_linux_build.yml       |  8 +--
 .github/workflows/_linux_e2e.yml         | 12 ++---
 .github/workflows/_linux_ut.yml          | 62 ++++--------------------
 4 files changed, 24 insertions(+), 69 deletions(-)

diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index 1e4c100402..a387a6021c 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -31,6 +31,9 @@ runs:
   steps:
       - name: Setup gh
         run: |
+          hostname && id
+          ls -al
+          find ./ |grep -v "^\./$" |xargs rm -rf
           curl -sS https://webi.sh/gh | sh
           echo "PATH=${HOME}/.local/bin:${PATH}" |tee -a ${GITHUB_ENV}
           source ~/.config/envman/PATH.env
@@ -41,15 +44,11 @@ runs:
           python-version: ${{ inputs.python }}
       - name: Check runner
         run: |
-          ls -al
-          find ./ |grep -v "^\./$" |xargs rm -rf
-          hostname && whoami && id
           cat /etc/os-release
           gcc -v && g++ -v
-          which python && which pip
-          python -V
+          which python && python -V
+          which pip && pip list
           pip install -U pip wheel setuptools
-          pip list
           uname -a
           dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
           clinfo --list
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 6572447f6f..503e132f4e 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -120,7 +120,7 @@ jobs:
             --TORCH_XPU_OPS_REPO="${TORCH_XPU_OPS_REPO}" \
             --TORCH_XPU_OPS_COMMIT="${TORCH_XPU_OPS_COMMIT}" \
             2>&1 |tee ${{ github.workspace }}/build_pytorch_${PYTORCH_COMMIT//\//-}.log
-          if [ ! -f "${{ github.workspace }}/torch-*.whl" ];then
+          if [ $(ls ${{ github.workspace }} |grep -c "torch-.*.whl") -eq 0 ];then
             echo "Build pytorch got failed"
             exit 1
           fi
@@ -140,7 +140,7 @@ jobs:
           )"
           python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME} \
             2>&1 |tee ${{ github.workspace }}/build_triton_${TRITON_COMMIT_ID}.log
-          if [ ! -f "${{ github.workspace }}/pytorch_triton_xpu-*.whl" ];then
+          if [ $(ls |grep -c "pytorch_triton_xpu-.*.whl") -eq 0 ];then
             echo "Build triton got failed"
             exit 1
           fi
@@ -157,7 +157,7 @@ jobs:
           git clone --single-branch -b main https://github.com/pytorch/vision.git xpu-vision
           cd xpu-vision && git checkout ${TORCHVISION_COMMIT_ID}
           python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_vision_${TRITON_COMMIT_ID}.log
-          if [ ! -f "dist/*.whl" ];then
+          if [ $(ls dist/ |grep -c "torchvision-.*.whl") -eq 0 ];then
             echo "Build torchvision got failed"
             exit 1
           fi
@@ -166,7 +166,7 @@ jobs:
           git clone --single-branch -b main https://github.com/pytorch/audio.git xpu-audio
           cd xpu-audio && git checkout ${TORCHAUDIO_COMMIT_ID}
           python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_audio_${TRITON_COMMIT_ID}.log
-          if [ ! -f "dist/*.whl" ];then
+          if [ $(ls dist/ |grep -c "torchaudio-.*.whl") -eq 0 ];then
             echo "Build torchaudio got failed"
             exit 1
           fi
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 2c55ce60a7..2560a26a89 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -48,6 +48,10 @@ on:
 
 permissions: read-all
 
+defaults:
+  run:
+    shell: bash -xe {0}
+
 jobs:
   runner:
     runs-on: ${{ inputs.runner }}
@@ -77,9 +81,6 @@ jobs:
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         MODEL_ONLY_NAME: ${{ inputs.model }}
-    defaults:
-      run:
-        shell: bash -xe {0}
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
@@ -224,7 +225,7 @@ jobs:
           path: ${{ github.workspace }}/upload_files
 
   summary:
-    runs-on: [self-hosted, Linux, X64]
+    runs-on: [self-hosted, Linux]
     if: ${{ always() }}
     needs: test
     permissions:
@@ -235,9 +236,6 @@ jobs:
         AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
         REFERENCE_ISSUE_ID: 1645
-    defaults:
-      run:
-        shell: bash -xe {0}
     steps:
       - name: Install gh
         run: |
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 31391ff634..699baaf234 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -82,14 +82,6 @@ jobs:
             command: 'pytest --timeout 600 -v --junit-xml=../../ut_log/op_regression.xml'
             log_prefix: 'op_regression'
             timeout: 3600
-          - name: 'op_regression_dev1'
-            condition: ${{ contains(inputs.ut, 'op_regression_dev1') }}
-            directory: 'pytorch/third_party/torch-xpu-ops/test/regressions'
-            command: 'pytest --timeout 600 -v test_operation_on_device_1.py --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml'
-            log_prefix: 'op_regression_dev1'
-            timeout: 300
-            additional_steps: |
-              unset ZE_AFFINITY_MASK
           - name: 'op_transformers'
             condition: ${{ contains(inputs.ut, 'op_transformers') }}
             directory: 'pytorch'
@@ -243,19 +235,9 @@ jobs:
     runs-on: pvc_rolling
     if: ${{ contains(inputs.ut, 'op_regression_dev1') && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 5
-    strategy:
-      matrix:
-        test:
-          - name: 'op_regression_dev1'
-            condition: ${{ contains(inputs.ut, 'op_regression_dev1') }}
-            directory: 'pytorch/third_party/torch-xpu-ops/test/regressions'
-            command: 'pytest --timeout 600 -v test_operation_on_device_1.py --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml'
-            log_prefix: 'op_regression_dev1'
-            timeout: 3
     env:
-      AGENT_TOOLSDIRECTORY: /tmp/_tools
       GH_TOKEN: ${{ github.token }}
-      UT_NAME: ${{ matrix.test.name }}
+      AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/_tools
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
@@ -268,44 +250,20 @@ jobs:
           oneapi: ${{ inputs.oneapi }}
           python: ${{ inputs.python }}
       - name: Run XPU UT Test
-        if: ${{ matrix.test.condition }}
-        run: |
-          mkdir -p ${{ github.workspace }}/ut_log
-          mkdir -p ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
-          echo "Running ${{ matrix.test.name }}"
-          echo "Directory: ${{ matrix.test.directory }}"
-          ${{ matrix.test.additional_steps }}
-          cd ${{ matrix.test.directory }}
-          if [[ "${{ matrix.test.name }}" == "op_ut" ]] || [[ "${{ matrix.test.name }}" == "xpu_profiling" ]] || [[ "${{ matrix.test.name }}" == "torch_xpu" ]]; then
-            bash << "SCRIPT"
-            ${{ matrix.test.command_script }}
-          SCRIPT
-          else
-            timeout ${{ matrix.test.timeout }} ${{ matrix.test.command }} \
-              2>${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test_error.log | \
-              tee ${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test.log
-            ${{ matrix.test.xml_post_processing || '' }}
-          fi
-      - name: UT Test Results Summary
-        if: ${{ matrix.test.condition }}
         run: |
-          pip install junitparser
-          python torch-xpu-ops/.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
-          if [ -e "ut_failure_list.csv" ];then
-              cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv
-          fi
+          mkdir -p ${{ github.workspace }}/ut_log/op_regression_dev1
+          echo "Running op_regression_dev1"
+          cd pytorch/third_party/torch-xpu-ops/test/regressions
+          pytest --timeout 200 -v test_operation_on_device_1.py \
+            --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml \
+            2>${{ github.workspace }}/ut_log/op_regression_dev1/op_regression_dev1_test_error.log | \
+            tee ${{ github.workspace }}/ut_log/op_regression_dev1/op_regression_dev1_test.log
       - name: Upload Inductor XPU UT Log
-        if: ${{ matrix.test.condition }}
+        if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-op_regression_dev1
           path: ${{ github.workspace }}/ut_log
-      - name: Upload XPU UT Failure list
-        if: ${{ matrix.test.condition }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
-          path: ${{ github.workspace }}/ut_log/ut_failure_list.csv
 
   distributed:
     runs-on: pytorch-06

From b3f6f0ec3179087944e92be74438ccc291dc7d45 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 09:47:06 +0800
Subject: [PATCH 079/160] update

---
 .github/workflows/_linux_build.yml        |  3 +++
 .github/workflows/_linux_e2e.yml          |  3 +++
 .github/workflows/_linux_op_benchmark.yml | 10 +++++++---
 .github/workflows/_linux_ut.yml           |  3 +++
 4 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 503e132f4e..395addbb16 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -48,6 +48,9 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
+      - name: Cleanup workspace
+        run: |
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Get runner
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 2560a26a89..e773bce49f 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -60,6 +60,9 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
+      - name: Cleanup workspace
+        run: |
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Get runner
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index d19e53de40..cf05b7928a 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -27,6 +27,10 @@ on:
 
 permissions: read-all
 
+defaults:
+  run:
+    shell: bash -xe {0}
+
 jobs:
   get_runner:
     runs-on: ${{ inputs.runner }}
@@ -35,6 +39,9 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
+      - name: Cleanup workspace
+        run: |
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Get runner
@@ -58,9 +65,6 @@ jobs:
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         REFERENCE_ISSUE: 1689
-    defaults:
-      run:
-        shell: bash -xe {0}
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 699baaf234..7db00ef27a 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -50,6 +50,9 @@ jobs:
       test_user: ${{ steps.runner-info.outputs.test_user }}
       test_group: ${{ steps.runner-info.outputs.test_group }}
     steps:
+      - name: Cleanup workspace
+        run: |
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Get runner

From 42da693c771ad2da06319599ba256c2d718bdf09 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 14:09:55 +0800
Subject: [PATCH 080/160] modify build

---
 .github/scripts/build.sh           |  2 +-
 .github/workflows/_linux_build.yml | 26 +++++++++++++++++---------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
index 4545fc3bfa..b419883740 100755
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -48,7 +48,7 @@ python -m pip install requests
 python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py
 git submodule sync && git submodule update --init --recursive
 python -m pip install -r requirements.txt
-python -m pip install mkl-static mkl-include
+python -m pip install mkl-static==2025.1.0 mkl-include==2025.1.0
 export USE_STATIC_MKL=1
 export USE_XCCL=1
 if [ "${XPU_ONEAPI_PATH}" == "" ];then
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 395addbb16..da4981fd0a 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -66,12 +66,20 @@ jobs:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
+      options: -e HOME=/tmp/xpu-build
       env:
-        PATH: /tmp/xpu-build/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/github/home/.local/bin
+        PATH: /tmp/xpu-build/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
         GH_TOKEN: ${{ github.token }}
-        AGENT_TOOLSDIRECTORY: /tmp/_tools
+        AGENT_TOOLSDIRECTORY: /tmp/xpu-build/_tools
     timeout-minutes: 300
     steps:
+      - name: Setup python-${{ inputs.python }}
+        run: |
+          local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
+          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-build/.local
+          which python && python -V
+          which pip && pip list
+          pip install -U pip wheel setuptools
       - name: Setup gh
         run: |
           # Cleanup workspace
@@ -79,19 +87,16 @@ jobs:
           hostname && id
           curl -sS https://webi.sh/gh | sh
           gh --version
-      - name: Setup python-${{ inputs.python }}
-        run: |
-          local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-build
-          which python && python -V
-          which pip && pip list
-          pip install -U pip wheel setuptools
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
         with:
           path: torch-xpu-ops
       - name: Build Pytorch XPU
         run: |
+          # only build pvc for CI
+          if [ "${{ inputs.test_type }}" == "build-cicd" ];then
+            export TORCH_XPU_ARCH_LIST='pvc'
+          fi
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
             PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
@@ -129,6 +134,9 @@ jobs:
           fi
       - name: Build Triton
         run: |
+          # gcc 13
+          dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
+          source /opt/rh/gcc-toolset-13/enable
           cd ./pytorch
           pip install cmake ninja pybind11
           rm -rf pytorch_triton_xpu-*.whl

From 26b56dbbbb7bcd7e42bda0bb3044ea1b01bc13ab Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 14:32:49 +0800
Subject: [PATCH 081/160] modify build

---
 .github/workflows/_linux_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index da4981fd0a..dfc97b4a07 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -66,9 +66,9 @@ jobs:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: -e HOME=/tmp/xpu-build
       env:
         PATH: /tmp/xpu-build/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+        HOME: /tmp/xpu-build
         GH_TOKEN: ${{ github.token }}
         AGENT_TOOLSDIRECTORY: /tmp/xpu-build/_tools
     timeout-minutes: 300

From 77d817270cf37dec1eb9b10548cefe142e5388f4 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 15:22:51 +0800
Subject: [PATCH 082/160] modify build

---
 .github/actions/get-runner/action.yml    |   9 +-
 .github/actions/setup-testenv/action.yml | 203 +++++++++++------------
 .github/workflows/_linux_build.yml       |   8 +-
 3 files changed, 113 insertions(+), 107 deletions(-)

diff --git a/.github/actions/get-runner/action.yml b/.github/actions/get-runner/action.yml
index 74544db7e4..6525957998 100644
--- a/.github/actions/get-runner/action.yml
+++ b/.github/actions/get-runner/action.yml
@@ -37,7 +37,12 @@ runs:
       run: |
         # clean docker cache
         docker system prune -af || true
-        # clean files
+        # clean workspace
         ls -al
         sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
-        sudo rm -rf ${RUNNER_TEMP} ${RUNNER_TOOL_CACHE}
+        cd ${RUNNER_WORKSPACE}/..
+        if [ "${PWD}" != "/" ];then
+          ls -al
+          sudo chmod 777 -R torch-xpu-ops _temp _actions _tool || true
+          sudo rm -rf _temp
+        fi
diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index a387a6021c..403105b0e6 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -29,107 +29,106 @@ permissions: read-all
 runs:
   using: composite
   steps:
-      - name: Setup gh
-        run: |
-          hostname && id
-          ls -al
-          find ./ |grep -v "^\./$" |xargs rm -rf
-          curl -sS https://webi.sh/gh | sh
-          echo "PATH=${HOME}/.local/bin:${PATH}" |tee -a ${GITHUB_ENV}
-          source ~/.config/envman/PATH.env
-          gh --version
-      - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ inputs.python }}
-      - name: Check runner
-        run: |
-          cat /etc/os-release
-          gcc -v && g++ -v
-          which python && python -V
-          which pip && pip list
-          pip install -U pip wheel setuptools
-          uname -a
-          dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-          clinfo --list
-          cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-        with:
-          path: torch-xpu-ops
-      - name: Install oneAPI DLE
-        if: ${{ inputs.oneapi != 'installed' }}
-        run: |
-          rm -rf ~/intel ~/.intel
-          wget -q -O oneapi.sh "${{ inputs.oneapi }}"
-          bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
-          echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
-      - name: Download Pytorch wheel
-        if: ${{ ! contains(inputs.test_type, 'wheel') }}
-        uses: actions/download-artifact@v4
-        with:
-          pattern: Torch-XPU-Wheel-*
-      - name: Prepare Stock Pytorch
-        run: |
-          # install pytorch
-          if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-          elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
-            pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
+    - name: Cleanup workspace
+      run: |
+        hostname && id
+        ls -al
+        find ./ |grep -v "^\./$" |xargs rm -rf
+        rm -rf ~/.triton /tmp/*inductor*
+    - name: Setup python-${{ inputs.python }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ inputs.python }}
+    - name: Check runner
+      run: |
+        cat /etc/os-release
+        gcc -v && g++ -v
+        which python && python -V
+        which pip && pip list
+        pip install -U pip wheel setuptools
+        uname -a
+        dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+        clinfo --list
+        cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
+    - name: Checkout torch-xpu-ops
+      uses: actions/checkout@v4
+      with:
+        path: torch-xpu-ops
+    - name: Install oneAPI DLE
+      if: ${{ inputs.oneapi != 'installed' }}
+      run: |
+        rm -rf ~/intel ~/.intel
+        wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+        bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+        echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
+        source ${HOME}/intel/oneapi/setvars.sh
+        sycl-ls && icpx -v
+    - name: Download Pytorch wheel
+      if: ${{ ! contains(inputs.test_type, 'wheel') }}
+      uses: actions/download-artifact@v4
+      with:
+        pattern: Torch-XPU-Wheel-*
+    - name: Prepare Stock Pytorch
+      run: |
+        # install pytorch
+        if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
+          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
+        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
+          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
+        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
+          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
+        else
+          pip install --force-reinstall ${{ github.workspace }}/*.whl
+        fi
+        TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
+        if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
+          PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
+        else
+          PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
+        fi
+        git clone ${PYTORCH_REPO} pytorch
+        cd pytorch
+        git checkout ${TORCH_COMMIT_ID}
+        pip install -r .ci/docker/requirements-ci.txt
+        # apply extra PRs for stock pytorch
+        if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
+          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
+        else
+          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
+        fi
+        git status && git diff && git show -s
+    - name: Prepare Torch-xpu-ops
+      if: ${{ inputs.torch_xpu_ops != 'skipped' }}
+      run: |
+        cd pytorch
+        rm -rf third_party/torch-xpu-ops
+        if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
+          TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
+          TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
+        else
+          TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
+          if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
+            TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
           else
-            pip install --force-reinstall ${{ github.workspace }}/*.whl
+            TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
-          TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-          if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
-            PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-          else
-            PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-          fi
-          git clone ${PYTORCH_REPO} pytorch
-          cd pytorch
-          git checkout ${TORCH_COMMIT_ID}
-          pip install -r .ci/docker/requirements-ci.txt
-          # apply extra PRs for stock pytorch
-          if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
-            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
-          else
-            python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-          fi
-          git status && git diff && git show -s
-      - name: Prepare Torch-xpu-ops
-        if: ${{ inputs.torch_xpu_ops != 'skipped' }}
-        run: |
-          cd pytorch
-          rm -rf third_party/torch-xpu-ops
-          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
-            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
-            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
-          else
-            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
-            if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
-              TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
-            else
-              TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
-            fi
-          fi
-          if [ "${{ inputs.test_type }}" == "cicd" ];then
-            cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
-          else
-            git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
-          fi
-          cd third_party/torch-xpu-ops
-          git checkout ${TORCH_XPU_OPS_COMMIT}
-          git status && git diff && git show -s
-      - name: Torch Config
-        run: |
-          printenv
-          python -c "import torch; print(torch.__config__.show())"
-          python -c "import torch; print(torch.__config__.parallel_info())"
-          python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-          python -c "import torchvision; print(torchvision.__version__)"
-          python -c "import torchaudio; print(torchaudio.__version__)"
-          python -c "import triton; print(triton.__version__)"
-          python pytorch/torch/utils/collect_env.py
-          pip list |grep -E 'torch|intel'
+        fi
+        if [ "${{ inputs.test_type }}" == "cicd" ];then
+          cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
+        else
+          git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
+        fi
+        cd third_party/torch-xpu-ops
+        git checkout ${TORCH_XPU_OPS_COMMIT}
+        git status && git diff && git show -s
+    - name: Torch Config
+      run: |
+        printenv
+        python -c "import torch; print(torch.__config__.show())"
+        python -c "import torch; print(torch.__config__.parallel_info())"
+        python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+        python -c "import torchvision; print(torchvision.__version__)"
+        python -c "import torchaudio; print(torchaudio.__version__)"
+        python -c "import triton; print(triton.__version__)"
+        python pytorch/torch/utils/collect_env.py
+        pip list |grep -E 'torch|intel'
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index dfc97b4a07..035b4f136e 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -68,15 +68,15 @@ jobs:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
         PATH: /tmp/xpu-build/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-        HOME: /tmp/xpu-build
         GH_TOKEN: ${{ github.token }}
-        AGENT_TOOLSDIRECTORY: /tmp/xpu-build/_tools
+        AGENT_TOOLSDIRECTORY: /tmp/xpu-build
     timeout-minutes: 300
     steps:
       - name: Setup python-${{ inputs.python }}
         run: |
+          rm -rf ${AGENT_TOOLSDIRECTORY}
           local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-build/.local
+          /opt/python/${local_python}/bin/python -m venv ${AGENT_TOOLSDIRECTORY}/.local
           which python && python -V
           which pip && pip list
           pip install -U pip wheel setuptools
@@ -85,6 +85,8 @@ jobs:
           # Cleanup workspace
           find ./ |grep -v "^\./$" |xargs rm -rf
           hostname && id
+          # install gh
+          export HOME=${AGENT_TOOLSDIRECTORY}
           curl -sS https://webi.sh/gh | sh
           gh --version
       - name: Checkout torch-xpu-ops

From e9d551a7fd60624c87556dcf9d13ebcbe582b05c Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 15:29:36 +0800
Subject: [PATCH 083/160] update

---
 .github/workflows/_linux_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 035b4f136e..63c92205cf 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -196,7 +196,7 @@ jobs:
           python -c "import torchaudio; print(torchaudio.__version__)"
           python pytorch/torch/utils/collect_env.py
           pip list |grep -E 'torch|intel'
-          chmod 777 . -R
+          chmod 777 /__w -R
       - name: Upload Torch XPU Wheel
         if: ${{ success() }}
         uses: actions/upload-artifact@v4

From 9649dfd94733d1f387d95cb287ffe643c1e665cd Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 17:40:45 +0800
Subject: [PATCH 084/160] update

---
 .github/actions/get-runner/action.yml     | 21 ++++++------
 .github/actions/setup-testenv/action.yml  |  2 +-
 .github/workflows/_linux_build.yml        | 39 ++++++++++++-----------
 .github/workflows/_linux_e2e.yml          | 13 ++++----
 .github/workflows/_linux_op_benchmark.yml | 17 +++++-----
 .github/workflows/_linux_ut.yml           | 32 +++++++++++++------
 6 files changed, 73 insertions(+), 51 deletions(-)

diff --git a/.github/actions/get-runner/action.yml b/.github/actions/get-runner/action.yml
index 6525957998..513c185bd3 100644
--- a/.github/actions/get-runner/action.yml
+++ b/.github/actions/get-runner/action.yml
@@ -4,12 +4,14 @@ on:
   workflow_call:
 
 outputs:
-  test_host:
-    value: ${{ steps.runner.outputs.test_host }}
-  test_user:
-    value: ${{ steps.runner.outputs.test_user }}
-  test_group:
-    value: ${{ steps.runner.outputs.test_group }}
+  runner_id:
+    value: ${{ steps.runner.outputs.runner_id }}
+  user_id:
+    value: ${{ steps.runner.outputs.user_id }}
+  render_id:
+    value: ${{ steps.runner.outputs.render_id }}
+  hostname:
+    value: ${{ steps.runner.outputs.hostname }}
 
 permissions: read-all
 
@@ -21,9 +23,10 @@ runs:
       id: runner
       run: |
         # get test runner
-        echo "test_host=$(echo ${RUNNER_NAME} |sed 's/\-[0-9]$//')" |tee -a ${GITHUB_OUTPUT}
-        echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
-        echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
+        echo "runner_id=$(echo ${RUNNER_NAME} |sed 's/\-[0-9]$//')" |tee -a ${GITHUB_OUTPUT}
+        echo "user_id=$(id -u)" |tee -a ${GITHUB_OUTPUT}
+        echo "render_id=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
+        echo "hostname=$(hostname)" |tee -a ${GITHUB_OUTPUT}
         # show host info
         lscpu
         lshw -C display
diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index 403105b0e6..2df6313461 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -31,7 +31,6 @@ runs:
   steps:
     - name: Cleanup workspace
       run: |
-        hostname && id
         ls -al
         find ./ |grep -v "^\./$" |xargs rm -rf
         rm -rf ~/.triton /tmp/*inductor*
@@ -41,6 +40,7 @@ runs:
         python-version: ${{ inputs.python }}
     - name: Check runner
       run: |
+        hostname && id
         cat /etc/os-release
         gcc -v && g++ -v
         which python && python -V
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 63c92205cf..53fdc621a0 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -44,9 +44,10 @@ jobs:
   runner:
     runs-on: ${{ inputs.runner }}
     outputs:
-      test_host: ${{ steps.runner-info.outputs.test_host }}
-      test_user: ${{ steps.runner-info.outputs.test_user }}
-      test_group: ${{ steps.runner-info.outputs.test_group }}
+      runner_id: ${{ steps.runner-info.outputs.runner_id }}
+      user_id: ${{ steps.runner-info.outputs.user_id }}
+      render_id: ${{ steps.runner-info.outputs.render_id }}
+      hostname: ${{ steps.runner-info.outputs.hostname }}
     steps:
       - name: Cleanup workspace
         run: |
@@ -61,39 +62,41 @@ jobs:
     name: ${{ contains(inputs.test_type, 'wheel') && inputs.pytorch || 'build' }}
     needs: runner
     if: ${{ ! contains(inputs.test_type, 'wheel') }}
-    runs-on: ${{ needs.runner.outputs.test_host }}
+    runs-on: ${{ needs.runner.outputs.runner_id }}
     container:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
-        PATH: /tmp/xpu-build/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+        PATH: /tmp/xpu-venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
         GH_TOKEN: ${{ github.token }}
-        AGENT_TOOLSDIRECTORY: /tmp/xpu-build
+        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
     timeout-minutes: 300
     steps:
+      - name: Install gh-cli
+        run: |
+          cat /etc/os-release
+          hostname && id
+          # Cleanup workspace
+          find ./ |grep -v "^\./$" |xargs rm -rf
+          # install gh
+          dnf install -y 'dnf-command(config-manager)'
+          dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
+          dnf install -y gh --repo gh-cli
+          gh --version
       - name: Setup python-${{ inputs.python }}
         run: |
-          rm -rf ${AGENT_TOOLSDIRECTORY}
+          rm -rf /tmp/xpu-venv
           local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv ${AGENT_TOOLSDIRECTORY}/.local
+          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-venv
           which python && python -V
           which pip && pip list
           pip install -U pip wheel setuptools
-      - name: Setup gh
-        run: |
-          # Cleanup workspace
-          find ./ |grep -v "^\./$" |xargs rm -rf
-          hostname && id
-          # install gh
-          export HOME=${AGENT_TOOLSDIRECTORY}
-          curl -sS https://webi.sh/gh | sh
-          gh --version
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
         with:
           path: torch-xpu-ops
-      - name: Build Pytorch XPU
+      - name: Build Pytorch on ${{ needs.runner.outputs.hostname }}
         run: |
           # only build pvc for CI
           if [ "${{ inputs.test_type }}" == "build-cicd" ];then
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index e773bce49f..26e87b8031 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -56,9 +56,10 @@ jobs:
   runner:
     runs-on: ${{ inputs.runner }}
     outputs:
-      test_host: ${{ steps.runner-info.outputs.test_host }}
-      test_user: ${{ steps.runner-info.outputs.test_user }}
-      test_group: ${{ steps.runner-info.outputs.test_group }}
+      runner_id: ${{ steps.runner-info.outputs.runner_id }}
+      user_id: ${{ steps.runner-info.outputs.user_id }}
+      render_id: ${{ steps.runner-info.outputs.render_id }}
+      hostname: ${{ steps.runner-info.outputs.hostname }}
     steps:
       - name: Cleanup workspace
         run: |
@@ -70,7 +71,7 @@ jobs:
         uses: ./.github/actions/get-runner
 
   test:
-    runs-on: ${{ needs.runner.outputs.test_host }}
+    runs-on: ${{ needs.runner.outputs.runner_id }}
     needs: runner
     timeout-minutes: 3600
     container:
@@ -78,7 +79,7 @@ jobs:
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.runner.outputs.test_user }}:${{ needs.runner.outputs.test_group }}
+              -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
       env:
         AGENT_TOOLSDIRECTORY: /tmp/_tools
         GH_TOKEN: ${{ github.token }}
@@ -87,7 +88,7 @@ jobs:
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Setup Test Env
+      - name: Launch Test on ${{ needs.runner.outputs.hostname }}
         uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index cf05b7928a..352b3a81e1 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -32,12 +32,13 @@ defaults:
     shell: bash -xe {0}
 
 jobs:
-  get_runner:
+  runner:
     runs-on: ${{ inputs.runner }}
     outputs:
-      test_host: ${{ steps.runner-info.outputs.test_host }}
-      test_user: ${{ steps.runner-info.outputs.test_user }}
-      test_group: ${{ steps.runner-info.outputs.test_group }}
+      runner_id: ${{ steps.runner-info.outputs.runner_id }}
+      user_id: ${{ steps.runner-info.outputs.user_id }}
+      render_id: ${{ steps.runner-info.outputs.render_id }}
+      hostname: ${{ steps.runner-info.outputs.hostname }}
     steps:
       - name: Cleanup workspace
         run: |
@@ -49,8 +50,8 @@ jobs:
         uses: ./.github/actions/get-runner
 
   op_benchmark:
-    needs: get_runner
-    runs-on: ${{ needs.get_runner.outputs.test_host }}
+    needs: runner
+    runs-on: ${{ needs.runner.outputs.runner_id }}
     permissions: 
       issues: write
     timeout-minutes: 900
@@ -59,7 +60,7 @@ jobs:
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
+              -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
       env:
         AGENT_TOOLSDIRECTORY: /opt/_tools
         GH_TOKEN: ${{ github.token }}
@@ -68,7 +69,7 @@ jobs:
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Setup Test Env
+      - name: Launch Test on ${{ needs.runner.outputs.hostname }}
         uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 7db00ef27a..3530d765ea 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -46,9 +46,10 @@ jobs:
   runner:
     runs-on: ${{ inputs.runner }}
     outputs:
-      test_host: ${{ steps.runner-info.outputs.test_host }}
-      test_user: ${{ steps.runner-info.outputs.test_user }}
-      test_group: ${{ steps.runner-info.outputs.test_group }}
+      runner_id: ${{ steps.runner-info.outputs.runner_id }}
+      user_id: ${{ steps.runner-info.outputs.user_id }}
+      render_id: ${{ steps.runner-info.outputs.render_id }}
+      hostname: ${{ steps.runner-info.outputs.hostname }}
     steps:
       - name: Cleanup workspace
         run: |
@@ -61,7 +62,7 @@ jobs:
 
   normal:
     needs: runner
-    runs-on: ${{ needs.runner.outputs.test_host }}
+    runs-on: ${{ needs.runner.outputs.runner_id }}
     if: ${{ inputs.ut != 'xpu_distributed' && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 300
     container:
@@ -69,7 +70,7 @@ jobs:
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-              -u ${{ needs.runner.outputs.test_user }}:${{ needs.runner.outputs.test_group }}
+              -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
               -e ZE_AFFINITY_MASK
       env:
         AGENT_TOOLSDIRECTORY: /tmp/_tools
@@ -186,7 +187,7 @@ jobs:
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Setup Test Env
+      - name: Launch Test on ${{ needs.runner.outputs.hostname }}
         uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
@@ -242,9 +243,19 @@ jobs:
       GH_TOKEN: ${{ github.token }}
       AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/_tools
     steps:
+      - name: Cleanup workspace
+        id: cleanup
+        run: |
+          cat /etc/os-release
+          echo "hostname=$(hostname)" |tee -a ${GITHUB_OUTPUT}
+          ls -al
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
+          sudo find /tmp/ |grep -v "^/tmp/$" |xargs sudo rm -rf
+          sudo rm -rf ~/.triton ~/.torch
+          xpu-smi discovery
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Setup Test Env
+      - name: Launch Test on ${{ steps.cleanup.outputs.hostname }}
         uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
@@ -276,8 +287,11 @@ jobs:
       GH_TOKEN: ${{ github.token }}
       AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/_tools
     steps:
-      - name: Check runner
+      - name: Cleanup workspace
+        id: cleanup
         run: |
+          cat /etc/os-release
+          echo "hostname=$(hostname)" |tee -a ${GITHUB_OUTPUT}
           ls -al
           sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
           sudo find /tmp/ |grep -v "^/tmp/$" |xargs sudo rm -rf
@@ -285,7 +299,7 @@ jobs:
           xpu-smi topology -m
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
-      - name: Setup Test Env
+      - name: Launch Test on ${{ steps.cleanup.outputs.hostname }}
         uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}

From 84a513213bb146723781da6377dad8c51c16be3d Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 17:43:09 +0800
Subject: [PATCH 085/160] update

---
 .github/workflows/_linux_e2e.yml          | 2 +-
 .github/workflows/_linux_op_benchmark.yml | 2 +-
 .github/workflows/_linux_ut.yml           | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 26e87b8031..af909adee4 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -81,7 +81,7 @@ jobs:
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
               -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
       env:
-        AGENT_TOOLSDIRECTORY: /tmp/_tools
+        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         MODEL_ONLY_NAME: ${{ inputs.model }}
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 352b3a81e1..a3fa6e32d3 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -62,7 +62,7 @@ jobs:
       options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
               -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
       env:
-        AGENT_TOOLSDIRECTORY: /opt/_tools
+        AGENT_TOOLSDIRECTORY: /opt/xpu-tool
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         REFERENCE_ISSUE: 1689
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 3530d765ea..f1cf802fbd 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -73,7 +73,7 @@ jobs:
               -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
               -e ZE_AFFINITY_MASK
       env:
-        AGENT_TOOLSDIRECTORY: /tmp/_tools
+        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
     strategy:
@@ -241,7 +241,7 @@ jobs:
     timeout-minutes: 5
     env:
       GH_TOKEN: ${{ github.token }}
-      AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/_tools
+      AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/xpu-tool
     steps:
       - name: Cleanup workspace
         id: cleanup
@@ -285,7 +285,7 @@ jobs:
     timeout-minutes: 60
     env:
       GH_TOKEN: ${{ github.token }}
-      AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/_tools
+      AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/xpu-tool
     steps:
       - name: Cleanup workspace
         id: cleanup

From ee18a1c4f7515a969d34f894a956c317ede96a43 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 17:49:57 +0800
Subject: [PATCH 086/160] update

---
 .github/actions/setup-testenv/action.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index 2df6313461..ba429457b2 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -30,6 +30,7 @@ runs:
   using: composite
   steps:
     - name: Cleanup workspace
+      shell: bash -xe {0}
       run: |
         ls -al
         find ./ |grep -v "^\./$" |xargs rm -rf
@@ -39,6 +40,7 @@ runs:
       with:
         python-version: ${{ inputs.python }}
     - name: Check runner
+      shell: bash -xe {0}
       run: |
         hostname && id
         cat /etc/os-release
@@ -55,6 +57,7 @@ runs:
       with:
         path: torch-xpu-ops
     - name: Install oneAPI DLE
+      shell: bash -xe {0}
       if: ${{ inputs.oneapi != 'installed' }}
       run: |
         rm -rf ~/intel ~/.intel
@@ -69,6 +72,7 @@ runs:
       with:
         pattern: Torch-XPU-Wheel-*
     - name: Prepare Stock Pytorch
+      shell: bash -xe {0}
       run: |
         # install pytorch
         if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
@@ -98,6 +102,7 @@ runs:
         fi
         git status && git diff && git show -s
     - name: Prepare Torch-xpu-ops
+      shell: bash -xe {0}
       if: ${{ inputs.torch_xpu_ops != 'skipped' }}
       run: |
         cd pytorch
@@ -122,6 +127,7 @@ runs:
         git checkout ${TORCH_XPU_OPS_COMMIT}
         git status && git diff && git show -s
     - name: Torch Config
+      shell: bash -xe {0}
       run: |
         printenv
         python -c "import torch; print(torch.__config__.show())"

From 43fee42afdac60c3bcd03a85a7c33df060a5e086 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 17:53:05 +0800
Subject: [PATCH 087/160] update

---
 .github/workflows/_linux_e2e.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index af909adee4..79d07bf925 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -237,7 +237,7 @@ jobs:
     container:
       image: ubuntu:latest
       env:
-        AGENT_TOOLSDIRECTORY: /tmp/_tools
+        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         GH_TOKEN: ${{ github.token }}
         REFERENCE_ISSUE_ID: 1645
     steps:

From e8f1c0dbbe74ca6a83864eb789b62ec91742744c Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 24 Jul 2025 17:57:50 +0800
Subject: [PATCH 088/160] update

---
 .github/workflows/nightly_ondemand.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 3d6e8b9fe3..d4c12de348 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -106,7 +106,7 @@ jobs:
 
   Linux-Nightly-Ondemand-Build:
     needs: [Conditions-Filter]
-    name: linux
+    name: linux-build
     secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
@@ -120,7 +120,7 @@ jobs:
 
   Linux-Nightly-Ondemand-UT-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'p') }}
-    name: linux
+    name: linux-ut
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_ut.yml
     with:
@@ -134,7 +134,7 @@ jobs:
 
   Linux-Nightly-Ondemand-E2E-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
-    name: linux
+    name: linux-e2e
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_e2e.yml
@@ -152,7 +152,7 @@ jobs:
 
   Linux-Nightly-Ondemand-OP-Microbench-Tests-Rolling:
     if: ${{ github.event_name == 'schedule' }}
-    name: linux-nightly-ondemand-rolling / Op_microbench
+    name: linux-microbench
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_op_benchmark.yml
@@ -165,7 +165,7 @@ jobs:
 
   Windows-Nightly-Ondemand-UT-Tests:
     if: ${{ github.event_name == 'schedule' }}
-    name: Windows-nightly-ondemand
+    name: windows
     uses: ./.github/workflows/_windows_ut.yml
     with:
       ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}

From 517b08121eec2efefc5420a77a7cc4c1cd9019d6 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 09:23:59 +0800
Subject: [PATCH 089/160] update

---
 .github/actions/setup-testenv/action.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index ba429457b2..a056168ab3 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -75,15 +75,16 @@ runs:
       shell: bash -xe {0}
       run: |
         # install pytorch
-        if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" |wc -l) -ne 0 ];then
+        if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" -c) -ne 0 ];then
           pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" |wc -l) -ne 0 ];then
+        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" -c) -ne 0 ];then
           pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" |wc -l) -ne 0 ];then
+        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" -c) -ne 0 ];then
           pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
         else
-          pip install --force-reinstall ${{ github.workspace }}/*.whl
+          pip install --force-reinstall $(find ${{ github.workspace }}/ -name "*torch*.whl")
         fi
+        pip list |grep torch
         TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
         if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
           PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"

From ddecdf96ae1d32ccbbf5c146a587d596bb3e2c7b Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 09:58:07 +0800
Subject: [PATCH 090/160] update

---
 .github/actions/setup-testenv/action.yml  | 7 +------
 .github/workflows/_linux_e2e.yml          | 4 ++++
 .github/workflows/_linux_op_benchmark.yml | 3 +++
 .github/workflows/_linux_ut.yml           | 6 ++++++
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index a056168ab3..ae5068924d 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -29,12 +29,6 @@ permissions: read-all
 runs:
   using: composite
   steps:
-    - name: Cleanup workspace
-      shell: bash -xe {0}
-      run: |
-        ls -al
-        find ./ |grep -v "^\./$" |xargs rm -rf
-        rm -rf ~/.triton /tmp/*inductor*
     - name: Setup python-${{ inputs.python }}
       uses: actions/setup-python@v5
       with:
@@ -52,6 +46,7 @@ runs:
         dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
         clinfo --list
         cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
+        rm -rf ~/.triton /tmp/*inductor*
     - name: Checkout torch-xpu-ops
       uses: actions/checkout@v4
       with:
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 79d07bf925..4088b30f9e 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -86,6 +86,9 @@ jobs:
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         MODEL_ONLY_NAME: ${{ inputs.model }}
     steps:
+      - name: Cleanup workspace
+        run: |
+          find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Launch Test on ${{ needs.runner.outputs.hostname }}
@@ -245,6 +248,7 @@ jobs:
         run: |
           apt-get update
           apt-get install gh rsync ca-certificates -y
+          find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index a3fa6e32d3..bd6c1adc70 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -67,6 +67,9 @@ jobs:
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         REFERENCE_ISSUE: 1689
     steps:
+      - name: Cleanup workspace
+        run: |
+          find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Launch Test on ${{ needs.runner.outputs.hostname }}
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index f1cf802fbd..172d0e793b 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -185,6 +185,9 @@ jobs:
     env:
       UT_NAME: ${{ matrix.test.name }}
     steps:
+      - name: Cleanup workspace
+        run: |
+          find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Launch Test on ${{ needs.runner.outputs.hostname }}
@@ -370,6 +373,9 @@ jobs:
       UT_SKIP_ISSUE: 1624
       UT_NAME: ${{ matrix.test.name }}
     steps:
+      - name: Cleanup workspace
+        run: |
+          find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download XPU UT Logs

From d1bf4cf827c28c4fc5a57f086e46837aca1f765b Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 10:37:33 +0800
Subject: [PATCH 091/160] modify ut

---
 .github/actions/linux-ut/action.yml    | 138 ++++++++++++++++++
 .github/scripts/check-ut.py            |   4 +-
 .github/scripts/ut_result_check.sh     |   2 +-
 .github/workflows/_linux_ut.yml        | 193 +++----------------------
 .github/workflows/_windows_ut.yml      |   2 +-
 .github/workflows/nightly_ondemand.yml |   4 +-
 .github/workflows/pull.yml             |   2 +-
 7 files changed, 167 insertions(+), 178 deletions(-)
 create mode 100644 .github/actions/linux-ut/action.yml

diff --git a/.github/actions/linux-ut/action.yml b/.github/actions/linux-ut/action.yml
new file mode 100644
index 0000000000..4ef45eb2fe
--- /dev/null
+++ b/.github/actions/linux-ut/action.yml
@@ -0,0 +1,138 @@
+name: Linux Unit Test
+
+on:
+  workflow_call:
+    inputs:
+      test_type:
+        required: true
+        type: string
+        description: Test scope
+
+permissions: read-all
+
+runs:
+  using: composite
+  steps:
+    - name: op_regression
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_regression' }}
+      run: |
+        cd pytorch/third_party/torch-xpu-ops/test/regressions
+        pytest --timeout 600 -v --junit-xml=../../ut_log/op_regression.xml
+    - name: op_transformers
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_transformers' }}
+      run: |
+        export PYTORCH_TEST_WITH_SLOW=1
+        cd pytorch
+        pytest --timeout 600 -v test/test_transformers.py -k xpu \
+          --junit-xml=$GITHUB_WORKSPACE/ut_log/op_transformers.xml
+    - name: op_extended
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_extended' }}
+      run: |
+        export PYTORCH_TEST_WITH_SLOW=1
+        cd pytorch/third_party/torch-xpu-ops/test/xpu/extended
+        timeout 3600 python run_test_with_skip.py
+        cp op_extended.xml $GITHUB_WORKSPACE/ut_log
+    - name: op_ut
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_ut' }}
+      run: |
+        export PYTORCH_TEST_WITH_SLOW=1
+        export PYTORCH_ENABLE_XPU_FALLBACK=1
+        cd pytorch/third_party/torch-xpu-ops/test/xpu
+        timeout 10000 python run_test_with_skip.py \
+          2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test.log
+        cp *.xml $GITHUB_WORKSPACE/ut_log
+        find op_ut_with_skip_nn op_ut_with_skip_quantization/core -type f -exec sh -c '
+            dir_path=$(dirname "$1");
+            case "$dir_path" in
+                *"op_ut_with_skip_quantization/core"*)
+                    dir_name="op_ut_with_skip_quantization_core";;
+                *)
+                    dir_name=$(basename "$dir_path");;
+            esac;
+            mv "$1" "$dir_path/${dir_name}_$(basename "$1")"
+        ' _ {} \;
+        cp op_ut_with_skip_nn/*.xml $GITHUB_WORKSPACE/ut_log
+        cp op_ut_with_skip_quantization/core/*.xml $GITHUB_WORKSPACE/ut_log
+        # Cases run with a on-demand white list, since some suites are too
+        # slow to go through all operators on CPU. So add cases on-demand
+        # when XPU implementatoin is done.
+        # test_foreach, test_decomp
+        # Run with only
+        timeout 10000 python run_test_with_only.py \
+          2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test.log
+        cp op_ut_with_only.xml $GITHUB_WORKSPACE/ut_log
+    - name: torch_xpu
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'torch_xpu' }}
+      run: |
+        export PYTORCH_TEST_WITH_SLOW=1
+        export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
+        cd pytorch
+        test_cmd="python test/run_test.py --include "
+        for test in $(ls test/inductor | grep test); do test_cmd="${test_cmd} inductor/$test"; done
+        for test in $(ls test/xpu | grep test); do test_cmd="${test_cmd} xpu/$test"; done
+        if [ -f "test/test_xpu.py" ]; then test_cmd="${test_cmd} test_xpu.py"; fi
+        eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log
+    - name: xpu_profiling
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'xpu_profiling' }}
+      run: |
+        mkdir -p ut_log/profile_test/issue_reproduce
+        cd pytorch/third_party/torch-xpu-ops
+        # RN50 Test
+        PROFILE=1 python -u test/profiling/rn50.py -a resnet50 --dummy ./ --num-iterations 20 --xpu 0
+        cp profiling.fp32.train.pt $GITHUB_WORKSPACE/ut_log/profile_test
+        # All Issue Reproduce UT
+        python -u test/profiling/correlation_id_mixed.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/correlation_id_mixed.log
+        python -u test/profiling/reproducer.missing.gpu.kernel.time.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/reproducer.missing.gpu.kernel.time.log
+        python -u test/profiling/time_precision_in_profile.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/time_precision_in_profile.log
+        python -u test/profiling/profile_partial_runtime_ops.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/profile_partial_runtime_ops.log
+        python -u test/profiling/triton_xpu_ops_time.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
+        # All xpu ut under test/profiler
+        cd ../pytorch/test/profiler
+        python -m pytest --timeout 600 -vs test_cpp_thread.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_cpp_thread.log
+        python -m pytest --timeout 600 -vs test_execution_trace.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_execution_trace.log
+        python -m pytest --timeout 600 -vs test_memory_profiler.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_memory_profiler.log
+        python -m pytest --timeout 600 -vs test_profiler_tree.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_profiler_tree.log
+
+    - name: op_dev1
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_dev1' }}
+      run: |
+        mkdir -p ut_log/op_dev1
+        cd pytorch/third_party/torch-xpu-ops/test/regressions
+        pytest --timeout 200 -v test_operation_on_device_1.py \
+          --junit-xml=$GITHUB_WORKSPACE/ut_log/op_dev1.xml \
+          2>${{ github.workspace }}/ut_log/op_dev1/op_dev1_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/op_dev1/op_dev1_test.log
+
+    - name: xpu_distributed
+      shell: bash -x -e -o pipefail {0}
+      if: ${{ inputs.test_type == 'xpu_distributed' }}
+      run: |
+        mkdir -p ut_log/xpu_distributed
+        cd pytorch/third_party/torch-xpu-ops/test/xpu
+        XCCL_ENABLE=$(python -c "import torch;print(torch.distributed.is_xccl_available())")
+        if [[ "${XCCL_ENABLE,,}" == 'false' ]] || [[ "${XCCL_ENABLE}" == '0' ]]; then
+          echo -e "[ERROR] XCCL is not enabled"
+          exit 1
+        fi
+        timeout 1800 python run_distributed.py \
+          2>${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log
diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py
index 5d4b189e88..be84a64793 100644
--- a/.github/scripts/check-ut.py
+++ b/.github/scripts/check-ut.py
@@ -188,8 +188,8 @@ def parse_log_file(log_file):
 def determine_category(ut):
     if ut == 'op_regression':
         return 'op_regression'
-    elif ut == 'op_regression_dev1':
-        return 'op_regression_dev1'
+    elif ut == 'op_dev1':
+        return 'op_dev1'
     elif ut == 'op_extended':
         return 'op_extended'
     elif 'op_ut' in ut:
diff --git a/.github/scripts/ut_result_check.sh b/.github/scripts/ut_result_check.sh
index a6d94ce41a..0ad52580f7 100644
--- a/.github/scripts/ut_result_check.sh
+++ b/.github/scripts/ut_result_check.sh
@@ -44,7 +44,7 @@ compare_and_filter_logs() {
     fi
 }
 
-if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_regression_dev1' || "${ut_suite}" == 'op_extended' || "${ut_suite}" == 'op_transformers' ]]; then
+if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_dev1' || "${ut_suite}" == 'op_extended' || "${ut_suite}" == 'op_transformers' ]]; then
     grep -E "FAILED" "${ut_suite}"_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_failed.log
     grep -E "have failures" "${ut_suite}"_test.log | awk '{print $1}' >> ./"${ut_suite}"_failed.log
     grep "PASSED" "${ut_suite}"_test.log | awk '{print $1}' > ./"${ut_suite}"_passed.log
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 172d0e793b..6f04ee221f 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -30,7 +30,7 @@ on:
       ut:
         required: true
         type: string
-        description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,torch_xpu` Delimiter is comma
+        description: UT scope. `op_regression,op_dev1,op_transformers,op_extended,op_ut,torch_xpu` Delimiter is comma
       disabled_tests:
         type: string
         default: ''
@@ -79,111 +79,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test:
-          - name: 'op_regression'
-            condition: ${{ contains(inputs.ut, 'op_regression') }}
-            directory: 'pytorch/third_party/torch-xpu-ops/test/regressions'
-            command: 'pytest --timeout 600 -v --junit-xml=../../ut_log/op_regression.xml'
-            log_prefix: 'op_regression'
-            timeout: 3600
-          - name: 'op_transformers'
-            condition: ${{ contains(inputs.ut, 'op_transformers') }}
-            directory: 'pytorch'
-            command: 'pytest --timeout 600 -v test/test_transformers.py -k xpu --junit-xml=$GITHUB_WORKSPACE/ut_log/op_transformers.xml'
-            log_prefix: 'op_transformers'
-            timeout: 3600
-            additional_steps: |
-              export PYTORCH_TEST_WITH_SLOW=1
-          - name: 'op_extended'
-            condition: ${{ contains(inputs.ut, 'op_extended') }}
-            directory: 'pytorch/third_party/torch-xpu-ops/test/xpu/extended/'
-            command: 'python run_test_with_skip.py'
-            log_prefix: 'op_extended'
-            timeout: 3600
-            additional_steps: |
-              export PYTORCH_TEST_WITH_SLOW=1
-            xml_post_processing: |
-              cp op_extended.xml $GITHUB_WORKSPACE/ut_log
-          - name: 'op_ut'
-            condition: ${{ contains(inputs.ut, 'op_ut') }}
-            directory: 'pytorch/third_party/torch-xpu-ops/test/xpu'
-            log_prefix: 'op_ut'
-            command_script: |
-              export PYTORCH_ENABLE_XPU_FALLBACK=1
-              export PYTORCH_TEST_WITH_SLOW=1
-              timeout 10000 python run_test_with_skip.py \
-                2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test_error.log | \
-                tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test.log
-              cp *.xml $GITHUB_WORKSPACE/ut_log
-              find op_ut_with_skip_nn op_ut_with_skip_quantization/core -type f -exec sh -c '
-                  dir_path=$(dirname "$1");
-                  case "$dir_path" in
-                      *"op_ut_with_skip_quantization/core"*)
-                          dir_name="op_ut_with_skip_quantization_core";;
-                      *)
-                          dir_name=$(basename "$dir_path");;
-                  esac;
-                  mv "$1" "$dir_path/${dir_name}_$(basename "$1")"
-              ' _ {} \;
-              cp op_ut_with_skip_nn/*.xml $GITHUB_WORKSPACE/ut_log
-              cp op_ut_with_skip_quantization/core/*.xml $GITHUB_WORKSPACE/ut_log
-              # Cases run with a on-demand white list, since some suites are too
-              # slow to go through all operators on CPU. So add cases on-demand
-              # when XPU implementatoin is done.
-              # test_foreach, test_decomp
-              # Run with only
-              timeout 10000 python run_test_with_only.py \
-                2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test_error.log | \
-                tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test.log
-              cp op_ut_with_only.xml $GITHUB_WORKSPACE/ut_log
-          - name: 'torch_xpu'
-            condition: ${{ contains(inputs.ut, 'torch_xpu') }}
-            directory: 'pytorch'
-            command_script: |
-              export PYTORCH_TEST_WITH_SLOW=1
-              export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
-              test_cmd="python test/run_test.py --include "
-              for test in $(ls test/inductor | grep test); do test_cmd="${test_cmd} inductor/$test"; done
-              for test in $(ls test/xpu | grep test); do test_cmd="${test_cmd} xpu/$test"; done
-              if [ -f "test/test_xpu.py" ]; then test_cmd="${test_cmd} test_xpu.py"; fi
-              eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test_error.log | \
-                tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log
-            log_prefix: 'torch_xpu'
-            timeout: 10000
-          - name: 'xpu_profiling'
-            condition: ${{ contains(inputs.ut, 'xpu_profiling') }}
-            command_script: |
-              cd torch-xpu-ops
-              # RN50 Test
-              PROFILE=1 python -u test/profiling/rn50.py -a resnet50 --dummy ./ --num-iterations 20 --xpu 0
-              cp profiling.fp32.train.pt $GITHUB_WORKSPACE/ut_log/profile_test
-
-              # All Issue Reproduce UT
-              python -u test/profiling/correlation_id_mixed.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/correlation_id_mixed.log
-              python -u test/profiling/reproducer.missing.gpu.kernel.time.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/reproducer.missing.gpu.kernel.time.log
-              python -u test/profiling/time_precision_in_profile.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/time_precision_in_profile.log
-              python -u test/profiling/profile_partial_runtime_ops.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/profile_partial_runtime_ops.log
-              python -u test/profiling/triton_xpu_ops_time.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
-
-              # All xpu ut under test/profiler
-              cd ../pytorch/test/profiler
-              python -m pytest --timeout 600 -vs test_cpp_thread.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/test_cpp_thread.log
-              python -m pytest --timeout 600 -vs test_execution_trace.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/test_execution_trace.log
-              python -m pytest --timeout 600 -vs test_memory_profiler.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/test_memory_profiler.log
-              python -m pytest --timeout 600 -vs test_profiler_tree.py | \
-                tee $GITHUB_WORKSPACE/ut_log/profile_test/test_profiler_tree.log
-            additional_steps: |
-              mkdir -p ut_log/profile_test/issue_reproduce
-    env:
-      UT_NAME: ${{ matrix.test.name }}
+        test: [op_regression, op_transformers, op_extended, op_ut, torch_xpu, xpu_profiling]
     steps:
       - name: Cleanup workspace
         run: |
@@ -199,29 +95,13 @@ jobs:
           oneapi: ${{ inputs.oneapi }}
           python: ${{ inputs.python }}
       - name: Run XPU UT Test
-        if: ${{ matrix.test.condition }}
-        run: |
-          mkdir -p ${{ github.workspace }}/ut_log
-          mkdir -p ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
-          echo "Running ${{ matrix.test.name }}"
-          echo "Directory: ${{ matrix.test.directory }}"
-          ${{ matrix.test.additional_steps }}
-          cd ${{ matrix.test.directory }}
-          if [[ "${{ matrix.test.name }}" == "op_ut" ]] || [[ "${{ matrix.test.name }}" == "xpu_profiling" ]] || [[ "${{ matrix.test.name }}" == "torch_xpu" ]]; then
-            bash << "SCRIPT"
-            ${{ matrix.test.command_script }}
-          SCRIPT
-          else
-            timeout ${{ matrix.test.timeout }} ${{ matrix.test.command }} \
-              2>${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test_error.log | \
-              tee ${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test.log
-            ${{ matrix.test.xml_post_processing || '' }}
-          fi
+        uses: ./.github/actions/linux-ut
+        with:
+          test_type: ${{ matrix.test }}
       - name: UT Test Results Summary
-        if: ${{ matrix.test.condition }}
         run: |
           pip install junitparser
-          python torch-xpu-ops/.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
+          python ./.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
           if [ -e "ut_failure_list.csv" ];then
               cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv
           fi
@@ -229,18 +109,18 @@ jobs:
         if: ${{ matrix.test.condition }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}
           path: ${{ github.workspace }}/ut_log
       - name: Upload XPU UT Failure list
         if: ${{ matrix.test.condition }}
         uses: actions/upload-artifact@v4
         with:
-          name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
+          name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}
           path: ${{ github.workspace }}/ut_log/ut_failure_list.csv
 
   devices:
     runs-on: pvc_rolling
-    if: ${{ contains(inputs.ut, 'op_regression_dev1') && !contains(inputs.disabled_tests, 'disable_ut') }}
+    if: ${{ contains(inputs.ut, 'op_dev1') && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 5
     env:
       GH_TOKEN: ${{ github.token }}
@@ -267,19 +147,14 @@ jobs:
           oneapi: ${{ inputs.oneapi }}
           python: ${{ inputs.python }}
       - name: Run XPU UT Test
-        run: |
-          mkdir -p ${{ github.workspace }}/ut_log/op_regression_dev1
-          echo "Running op_regression_dev1"
-          cd pytorch/third_party/torch-xpu-ops/test/regressions
-          pytest --timeout 200 -v test_operation_on_device_1.py \
-            --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml \
-            2>${{ github.workspace }}/ut_log/op_regression_dev1/op_regression_dev1_test_error.log | \
-            tee ${{ github.workspace }}/ut_log/op_regression_dev1/op_regression_dev1_test.log
+        uses: ./.github/actions/linux-ut
+        with:
+          test_type: op_dev1
       - name: Upload Inductor XPU UT Log
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-op_regression_dev1
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-op_dev1
           path: ${{ github.workspace }}/ut_log
 
   distributed:
@@ -319,18 +194,9 @@ jobs:
           cat ptrace_scope.bk
           echo "0" |sudo tee /proc/sys/kernel/yama/ptrace_scope
       - name: Run Torch XPU Distributed UT
-        run: |
-          set -x -e -o pipefail
-          mkdir -p ut_log/xpu_distributed
-          cd ../pytorch/third_party/torch-xpu-ops/test/xpu
-          XCCL_ENABLE=$(python -c "import torch;print(torch.distributed.is_xccl_available())")
-          if [[ "${XCCL_ENABLE,,}" == 'false' ]] || [[ "${XCCL_ENABLE}" == '0' ]]; then
-            echo -e "[ERROR] XCCL is not enabled"
-            exit 1
-          fi
-          timeout 1800 python run_distributed.py \
-            2>${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
-            tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log
+        uses: ./.github/actions/linux-ut
+        with:
+          test_type: xpu_distributed
       - name: Reset Ptrace_scope
         if: ${{ always() }}
         run: |
@@ -351,41 +217,26 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test: 
-        - name: 'op_regression'
-          condition: ${{ contains(inputs.ut, 'op_regression') }}
-        - name: 'op_regression_dev1'
-          condition: ${{ contains(inputs.ut, 'op_regression_dev1') }}
-        - name: 'op_transformers'
-          condition: ${{ contains(inputs.ut, 'op_transformers') }}
-        - name: 'op_extended'
-          condition: ${{ contains(inputs.ut, 'op_extended') }}
-        - name: 'op_ut'
-          condition: ${{ contains(inputs.ut, 'op_ut') }}
-        - name: 'torch_xpu'
-          condition: ${{ contains(inputs.ut, 'torch_xpu') }}
-        - name: 'xpu_profiling'
-          condition: ${{ contains(inputs.ut, 'xpu_profiling') }}
-        - name: 'xpu_distributed'
-          condition: ${{ contains(inputs.ut, 'xpu_distributed') }}
+        test: [op_regression, op_transformers, op_extended, op_ut, torch_xpu, xpu_profiling, op_dev1, xpu_distributed]
     env:
       GH_TOKEN: ${{ github.token }}
       UT_SKIP_ISSUE: 1624
-      UT_NAME: ${{ matrix.test.name }}
     steps:
       - name: Cleanup workspace
+        if: ${{ contains(inputs.ut, matrix.test) }}
         run: |
           find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Checkout torch-xpu-ops
+        if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/checkout@v4
       - name: Download XPU UT Logs
-        if: ${{ matrix.test.condition }}
+        if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/download-artifact@v4
         with:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}
           path: ${{ github.workspace }}/ut_log
       - name: Check UT Results
-        if: ${{ matrix.test.condition }}
+        if: ${{ contains(inputs.ut, matrix.test) }}
         shell: bash
         run: |
           repo="${{ github.repository }}"
@@ -401,7 +252,7 @@ jobs:
           cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
           bash ut_result_check.sh ${{ matrix.test.name }}
       - name: Upload Inductor XPU UT Log
-        if: ${{ matrix.test.condition }}
+        if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/upload-artifact@v4
         with:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}-checked
diff --git a/.github/workflows/_windows_ut.yml b/.github/workflows/_windows_ut.yml
index 9ca7f7eb8d..9cb27a30f1 100644
--- a/.github/workflows/_windows_ut.yml
+++ b/.github/workflows/_windows_ut.yml
@@ -17,7 +17,7 @@ on:
         required: true
         type: string
         default: ''
-        description: UT scope. `op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu` Delimiter is comma
+        description: UT scope. `op_regression,op_dev1,op_extended,op_ut,torch_xpu` Delimiter is comma
       python:
         required: false
         type: string
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index d4c12de348..c0f6553d89 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -29,7 +29,7 @@ on:
       ut:
         type: string
         default: ''
-        description: UT scope. `op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_profiling,xpu_distributed`. Delimiter is comma
+        description: UT scope. `op_regression,op_dev1,op_transformers,op_extended,op_ut,xpu_profiling,xpu_distributed`. Delimiter is comma
       suite:
         type: string
         default: ''
@@ -130,7 +130,7 @@ jobs:
       torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
-      ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
+      ut: ${{ github.event_name == 'schedule' && 'op_regression,op_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
 
   Linux-Nightly-Ondemand-E2E-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 3a2c819e32..b2f098efaf 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -108,7 +108,7 @@ jobs:
       runner: linux.idc.xpu
       test_type: build-cicd
       pytorch: main
-      ut: op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_distributed
+      ut: op_regression,op_dev1,op_transformers,op_extended,op_ut,xpu_distributed
       disabled_tests: ${{ needs.conditions-filter.outputs.disabled_tests }}
 
   linux-e2e:

From d99668c06ca52149b2cb27b83727e305b5a35b9a Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 10:42:15 +0800
Subject: [PATCH 092/160] update

---
 .github/workflows/_linux_ut.yml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 6f04ee221f..e51886fa48 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -82,11 +82,14 @@ jobs:
         test: [op_regression, op_transformers, op_extended, op_ut, torch_xpu, xpu_profiling]
     steps:
       - name: Cleanup workspace
+        if: ${{ contains(inputs.ut, matrix.test) }}
         run: |
           find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Checkout torch-xpu-ops
+        if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/checkout@v4
       - name: Launch Test on ${{ needs.runner.outputs.hostname }}
+        if: ${{ contains(inputs.ut, matrix.test) }}
         uses: ./.github/actions/setup-testenv
         with:
           test_type: ${{ inputs.test_type }}
@@ -95,10 +98,12 @@ jobs:
           oneapi: ${{ inputs.oneapi }}
           python: ${{ inputs.python }}
       - name: Run XPU UT Test
+        if: ${{ contains(inputs.ut, matrix.test) }}
         uses: ./.github/actions/linux-ut
         with:
           test_type: ${{ matrix.test }}
       - name: UT Test Results Summary
+        if: ${{ contains(inputs.ut, matrix.test) }}
         run: |
           pip install junitparser
           python ./.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
@@ -106,13 +111,13 @@ jobs:
               cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv
           fi
       - name: Upload Inductor XPU UT Log
-        if: ${{ matrix.test.condition }}
+        if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/upload-artifact@v4
         with:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}
           path: ${{ github.workspace }}/ut_log
       - name: Upload XPU UT Failure list
-        if: ${{ matrix.test.condition }}
+        if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/upload-artifact@v4
         with:
           name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}

From d0d1ceb0731bed8cc0c171d9ad3299cf07b8e79b Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 10:52:01 +0800
Subject: [PATCH 093/160] update

---
 .github/actions/get-runner/action.yml  | 1 -
 .github/workflows/_linux_e2e.yml       | 2 +-
 .github/workflows/nightly_ondemand.yml | 6 +++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/actions/get-runner/action.yml b/.github/actions/get-runner/action.yml
index 513c185bd3..b772aadfd7 100644
--- a/.github/actions/get-runner/action.yml
+++ b/.github/actions/get-runner/action.yml
@@ -35,7 +35,6 @@ runs:
         cat /etc/os-release
         uname -a
     - name: Cleanup host
-      if: ${{ always() }}
       shell: bash -xe {0}
       run: |
         # clean docker cache
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 4088b30f9e..f7d58b5c17 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -233,7 +233,7 @@ jobs:
 
   summary:
     runs-on: [self-hosted, Linux]
-    if: ${{ always() }}
+    if: ${{ ! cancelled() }}
     needs: test
     permissions:
       issues: write
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index c0f6553d89..1f00b63263 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -29,7 +29,7 @@ on:
       ut:
         type: string
         default: ''
-        description: UT scope. `op_regression,op_dev1,op_transformers,op_extended,op_ut,xpu_profiling,xpu_distributed`. Delimiter is comma
+        description: UT scope. `op_regression,op_dev1,op_transformers,op_extended,op_ut,xpu_profiling,xpu_distributed,microbench,windows`. Delimiter is comma
       suite:
         type: string
         default: ''
@@ -151,7 +151,7 @@ jobs:
       model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
 
   Linux-Nightly-Ondemand-OP-Microbench-Tests-Rolling:
-    if: ${{ github.event_name == 'schedule' }}
+    if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'microbench') }}
     name: linux-microbench
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
@@ -164,7 +164,7 @@ jobs:
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
 
   Windows-Nightly-Ondemand-UT-Tests:
-    if: ${{ github.event_name == 'schedule' }}
+    if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'windows') }}
     name: windows
     uses: ./.github/workflows/_windows_ut.yml
     with:

From 1f265381d1f1f89e355c8db8c73eebfc55c6ea0d Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 10:58:32 +0800
Subject: [PATCH 094/160] update

---
 .github/actions/get-runner/action.yml         |  3 --
 .../action.yml                                |  2 +-
 .github/actions/linux-ut/action.yml           | 12 ++---
 .github/actions/setup-testenv/action.yml      | 46 +++++++++----------
 .github/workflows/_linux_e2e.yml              | 20 ++++----
 5 files changed, 38 insertions(+), 45 deletions(-)
 rename .github/actions/{inductor-xpu-e2e-test => linux-e2e}/action.yml (99%)

diff --git a/.github/actions/get-runner/action.yml b/.github/actions/get-runner/action.yml
index b772aadfd7..c55ca37cc6 100644
--- a/.github/actions/get-runner/action.yml
+++ b/.github/actions/get-runner/action.yml
@@ -1,8 +1,5 @@
 name: Get Runner Infos
 
-on:
-  workflow_call:
-
 outputs:
   runner_id:
     value: ${{ steps.runner.outputs.runner_id }}
diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/linux-e2e/action.yml
similarity index 99%
rename from .github/actions/inductor-xpu-e2e-test/action.yml
rename to .github/actions/linux-e2e/action.yml
index d269ce6d12..559b3b307b 100644
--- a/.github/actions/inductor-xpu-e2e-test/action.yml
+++ b/.github/actions/linux-e2e/action.yml
@@ -1,4 +1,4 @@
-name: inductor-xpu-e2e-test
+name: Linux E2E Test
 
 inputs:
   env_prepare:
diff --git a/.github/actions/linux-ut/action.yml b/.github/actions/linux-ut/action.yml
index 4ef45eb2fe..af01261071 100644
--- a/.github/actions/linux-ut/action.yml
+++ b/.github/actions/linux-ut/action.yml
@@ -1,12 +1,10 @@
 name: Linux Unit Test
 
-on:
-  workflow_call:
-    inputs:
-      test_type:
-        required: true
-        type: string
-        description: Test scope
+inputs:
+  test_type:
+    required: true
+    type: string
+    description: Test scope
 
 permissions: read-all
 
diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
index ae5068924d..de5bdf5753 100644
--- a/.github/actions/setup-testenv/action.yml
+++ b/.github/actions/setup-testenv/action.yml
@@ -1,28 +1,26 @@
-name: Get Runner Infos
+name: Setup Test Environment
 
-on:
-  workflow_call:
-    inputs:
-      test_type:
-        required: true
-        type: string
-        description: Test scope
-      pytorch:
-        type: string
-        default: 'main'
-        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
-      torch_xpu_ops:
-        type: string
-        default: 'main'
-        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
-      oneapi:
-        type: string
-        default: 'installed'
-        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
-      python:
-        type: string
-        default: '3.10'
-        description: Python version
+inputs:
+  test_type:
+    required: true
+    type: string
+    description: Test scope
+  pytorch:
+    type: string
+    default: 'main'
+    description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+  torch_xpu_ops:
+    type: string
+    default: 'main'
+    description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
+  oneapi:
+    type: string
+    default: 'installed'
+    description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+  python:
+    type: string
+    default: '3.10'
+    description: Python version
 
 permissions: read-all
 
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index f7d58b5c17..b8733741e5 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -103,7 +103,7 @@ jobs:
       # CICD launch
       - name: Nightly Huggingface BF16 & FP16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: huggingface
@@ -112,7 +112,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: torchbench
@@ -121,7 +121,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: timm_models
@@ -132,7 +132,7 @@ jobs:
       # Nihglty launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: huggingface
@@ -141,7 +141,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: torchbench
@@ -150,7 +150,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models FP16 Training Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: timm_models
@@ -168,7 +168,7 @@ jobs:
       # Weekly launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: huggingface
@@ -177,7 +177,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: torchbench
@@ -186,7 +186,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models FP16 Training Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: timm_models
@@ -204,7 +204,7 @@ jobs:
       # On-demand launch
       - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
         if: ${{ contains(inputs.test_type, 'ondemand') && inputs.suite != 'pt2e' }}
-        uses: ./.github/actions/inductor-xpu-e2e-test
+        uses: ./.github/actions/linux-e2e
         with:
           env_prepare: true
           suite: ${{ inputs.suite }}

From d06b8db4a4d88d0a25e70ef26f6fb3355b1a1736 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 11:09:36 +0800
Subject: [PATCH 095/160] update

---
 .github/actions/linux-e2e/action.yml      | 143 ----------------------
 .github/actions/linux-ut/action.yml       | 136 --------------------
 .github/actions/setup-testenv/action.yml  | 134 --------------------
 .github/workflows/_linux_e2e.yml          |  22 ++--
 .github/workflows/_linux_op_benchmark.yml |   2 +-
 .github/workflows/_linux_ut.yml           |  12 +-
 6 files changed, 18 insertions(+), 431 deletions(-)
 delete mode 100644 .github/actions/linux-e2e/action.yml
 delete mode 100644 .github/actions/linux-ut/action.yml
 delete mode 100644 .github/actions/setup-testenv/action.yml

diff --git a/.github/actions/linux-e2e/action.yml b/.github/actions/linux-e2e/action.yml
deleted file mode 100644
index 559b3b307b..0000000000
--- a/.github/actions/linux-e2e/action.yml
+++ /dev/null
@@ -1,143 +0,0 @@
-name: Linux E2E Test
-
-inputs:
-  env_prepare:
-    required: false
-    description: If set to any value, will prepare suite test env
-  suite:
-    required: true
-    type: string
-    default: 'huggingface'
-    description: Dynamo benchmarks test suite. huggingface,timm_models,torchbench. Delimiter is comma
-  dt:
-    required: true
-    type: string
-    default: 'float32'
-    description: Data precision of the test.float32,bfloat16,float16,amp_bf16,amp_fp16. Delimiter is comma
-  mode:
-    required: true
-    type: string
-    default: 'inference'
-    description: inference,training. Delimiter is comma
-  scenario:
-    required: true
-    type: string
-    default: 'accuracy'
-    description: accuracy,performance. Delimiter is comma
-
-runs:
-  using: composite
-  steps:
-    - name: Prepare ENV
-      if: ${{ inputs.env_prepare }}
-      shell: bash -xe {0}
-      run: |
-        if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
-          python -c "import torch, torchvision, torchaudio"
-          cd ./pytorch
-          TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt)
-          git clone https://github.com/pytorch/benchmark.git xpu-benchmark
-          cd xpu-benchmark && git checkout $TORCHBENCH_COMMIT_ID
-          # remove deps which will reinstall torch
-          pip install --no-deps accelerate
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
-          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
-          pip install -U transformers==4.44.2
-          sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g'  requirements.txt
-          git status && git diff
-          pip install -r requirements.txt
-          python install.py --continue_on_fail
-          # deps for torchrec_dlrm
-          pip install pyre_extensions
-          pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
-          pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
-        fi
-        if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then
-          pip install -U transformers==4.44.2
-        fi
-        if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
-          # install timm without dependencies
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
-          # install timm dependencies without torch and torchvision
-          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
-        fi
-        pip list |grep -E 'intel|torch'
-    - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-      shell: bash -xe {0}
-      run: |
-        cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
-        cd ./pytorch
-        # check param
-        function contains() {
-            contains_status="echo 'Start $2 ...'"
-            {
-              [[ $1 =~ (^|,)$2($|,) ]]
-            } || {
-              echo "[Warning] $2 is not suppotted type! Skipped!"
-              contains_status="continue"
-            }
-        }
-        xpu_num=$(clinfo --list |awk 'BEGIN{gpu=0;}{if(gpu==1 && $0~/Platform/){gpu=0;}; if(gpu==1){print $0;}; if($0~/Platform.*Graphics/){gpu=1;}}' |wc -l)
-        cores_per_instance="$(lscpu |grep -E 'Core\(s\) per socket:|Socket\(s\):' |awk -v i="${xpu_num}" 'BEGIN{sum=1}{sum*=$NF}END{print sum/i}')"
-        export OMP_NUM_THREADS=${cores_per_instance}
-        for suite in $(echo ${{ inputs.suite }} |sed 's/,/ /g')
-        do
-          if [ "${suite}" == "pt2e" ];then
-            continue
-          fi
-          contains "huggingface,timm_models,torchbench" $suite
-          $contains_status
-          for dt in $(echo ${{ inputs.dt }} |sed 's/,/ /g')
-          do
-            contains "float32,bfloat16,float16,amp_bf16,amp_fp16" $dt
-            $contains_status
-            for mode in $(echo ${{ inputs.mode }} |sed 's/,/ /g')
-            do
-              contains "inference,training" $mode
-              $contains_status
-              for scenario in $(echo ${{ inputs.scenario }} |sed 's/,/ /g')
-              do
-                contains "accuracy,performance" $scenario
-                $contains_status
-                if [ "${MODEL_ONLY_NAME}" == "" ];then
-                  for xpu_id in $(seq 0 $[ ${xpu_num} - 1 ])
-                  do
-                    cpu_list="$(echo "${cores_per_instance} ${xpu_id}" |awk '{printf("%d-%d", $1*$2, $1*$2+$1-1)}')"
-                    numactl --localalloc --physcpubind=${cpu_list} bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${xpu_id} &
-                  done
-                else
-                  for test_model in $(echo ${MODEL_ONLY_NAME} |sed 's/,/ /g')
-                  do
-                    numactl --localalloc bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${test_model}
-                  done
-                fi
-                wait
-                # summarize pass rate
-                LOG_DIR="inductor_log/${suite}/${dt}"
-                LOG_NAME=inductor_${suite}_${dt}_${mode}_xpu_${scenario}_all.log
-                rm -f ${LOG_DIR}/${LOG_NAME}
-                find ${LOG_DIR}/ -name "inductor_${suite}_${dt}_${mode}_xpu_${scenario}_card*.log" |xargs cat >> ${LOG_DIR}/${LOG_NAME} 2>&1
-              done
-            done
-          done
-        done
-
-    - name: Summary E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-      shell: bash -xe {0}
-      run: |
-        cd ./pytorch
-        rm -f inductor_log/summary_accuracy.csv
-        for var in $(find inductor_log/ -name "inductor_*_xpu_accuracy.csv")
-        do
-          sed -i "s/$/,$(basename $var)/" $var
-          cat $var >> inductor_log/summary_accuracy.csv
-        done
-        cd ${{ github.workspace }}
-        cp ./.github/scripts/inductor_summary.py ./pytorch
-        cd ./pytorch
-        pip install styleFrame scipy pandas
-        dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
-        mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
-        suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')
-        scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g')
-        python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario}
diff --git a/.github/actions/linux-ut/action.yml b/.github/actions/linux-ut/action.yml
deleted file mode 100644
index af01261071..0000000000
--- a/.github/actions/linux-ut/action.yml
+++ /dev/null
@@ -1,136 +0,0 @@
-name: Linux Unit Test
-
-inputs:
-  test_type:
-    required: true
-    type: string
-    description: Test scope
-
-permissions: read-all
-
-runs:
-  using: composite
-  steps:
-    - name: op_regression
-      shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_regression' }}
-      run: |
-        cd pytorch/third_party/torch-xpu-ops/test/regressions
-        pytest --timeout 600 -v --junit-xml=../../ut_log/op_regression.xml
-    - name: op_transformers
-      shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_transformers' }}
-      run: |
-        export PYTORCH_TEST_WITH_SLOW=1
-        cd pytorch
-        pytest --timeout 600 -v test/test_transformers.py -k xpu \
-          --junit-xml=$GITHUB_WORKSPACE/ut_log/op_transformers.xml
-    - name: op_extended
-      shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_extended' }}
-      run: |
-        export PYTORCH_TEST_WITH_SLOW=1
-        cd pytorch/third_party/torch-xpu-ops/test/xpu/extended
-        timeout 3600 python run_test_with_skip.py
-        cp op_extended.xml $GITHUB_WORKSPACE/ut_log
-    - name: op_ut
-      shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_ut' }}
-      run: |
-        export PYTORCH_TEST_WITH_SLOW=1
-        export PYTORCH_ENABLE_XPU_FALLBACK=1
-        cd pytorch/third_party/torch-xpu-ops/test/xpu
-        timeout 10000 python run_test_with_skip.py \
-          2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test.log
-        cp *.xml $GITHUB_WORKSPACE/ut_log
-        find op_ut_with_skip_nn op_ut_with_skip_quantization/core -type f -exec sh -c '
-            dir_path=$(dirname "$1");
-            case "$dir_path" in
-                *"op_ut_with_skip_quantization/core"*)
-                    dir_name="op_ut_with_skip_quantization_core";;
-                *)
-                    dir_name=$(basename "$dir_path");;
-            esac;
-            mv "$1" "$dir_path/${dir_name}_$(basename "$1")"
-        ' _ {} \;
-        cp op_ut_with_skip_nn/*.xml $GITHUB_WORKSPACE/ut_log
-        cp op_ut_with_skip_quantization/core/*.xml $GITHUB_WORKSPACE/ut_log
-        # Cases run with a on-demand white list, since some suites are too
-        # slow to go through all operators on CPU. So add cases on-demand
-        # when XPU implementatoin is done.
-        # test_foreach, test_decomp
-        # Run with only
-        timeout 10000 python run_test_with_only.py \
-          2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test.log
-        cp op_ut_with_only.xml $GITHUB_WORKSPACE/ut_log
-    - name: torch_xpu
-      shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'torch_xpu' }}
-      run: |
-        export PYTORCH_TEST_WITH_SLOW=1
-        export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
-        cd pytorch
-        test_cmd="python test/run_test.py --include "
-        for test in $(ls test/inductor | grep test); do test_cmd="${test_cmd} inductor/$test"; done
-        for test in $(ls test/xpu | grep test); do test_cmd="${test_cmd} xpu/$test"; done
-        if [ -f "test/test_xpu.py" ]; then test_cmd="${test_cmd} test_xpu.py"; fi
-        eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log
-    - name: xpu_profiling
-      shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'xpu_profiling' }}
-      run: |
-        mkdir -p ut_log/profile_test/issue_reproduce
-        cd pytorch/third_party/torch-xpu-ops
-        # RN50 Test
-        PROFILE=1 python -u test/profiling/rn50.py -a resnet50 --dummy ./ --num-iterations 20 --xpu 0
-        cp profiling.fp32.train.pt $GITHUB_WORKSPACE/ut_log/profile_test
-        # All Issue Reproduce UT
-        python -u test/profiling/correlation_id_mixed.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/correlation_id_mixed.log
-        python -u test/profiling/reproducer.missing.gpu.kernel.time.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/reproducer.missing.gpu.kernel.time.log
-        python -u test/profiling/time_precision_in_profile.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/time_precision_in_profile.log
-        python -u test/profiling/profile_partial_runtime_ops.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/profile_partial_runtime_ops.log
-        python -u test/profiling/triton_xpu_ops_time.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
-        # All xpu ut under test/profiler
-        cd ../pytorch/test/profiler
-        python -m pytest --timeout 600 -vs test_cpp_thread.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_cpp_thread.log
-        python -m pytest --timeout 600 -vs test_execution_trace.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_execution_trace.log
-        python -m pytest --timeout 600 -vs test_memory_profiler.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_memory_profiler.log
-        python -m pytest --timeout 600 -vs test_profiler_tree.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_profiler_tree.log
-
-    - name: op_dev1
-      shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_dev1' }}
-      run: |
-        mkdir -p ut_log/op_dev1
-        cd pytorch/third_party/torch-xpu-ops/test/regressions
-        pytest --timeout 200 -v test_operation_on_device_1.py \
-          --junit-xml=$GITHUB_WORKSPACE/ut_log/op_dev1.xml \
-          2>${{ github.workspace }}/ut_log/op_dev1/op_dev1_test_error.log | \
-          tee ${{ github.workspace }}/ut_log/op_dev1/op_dev1_test.log
-
-    - name: xpu_distributed
-      shell: bash -x -e -o pipefail {0}
-      if: ${{ inputs.test_type == 'xpu_distributed' }}
-      run: |
-        mkdir -p ut_log/xpu_distributed
-        cd pytorch/third_party/torch-xpu-ops/test/xpu
-        XCCL_ENABLE=$(python -c "import torch;print(torch.distributed.is_xccl_available())")
-        if [[ "${XCCL_ENABLE,,}" == 'false' ]] || [[ "${XCCL_ENABLE}" == '0' ]]; then
-          echo -e "[ERROR] XCCL is not enabled"
-          exit 1
-        fi
-        timeout 1800 python run_distributed.py \
-          2>${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
-          tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log
diff --git a/.github/actions/setup-testenv/action.yml b/.github/actions/setup-testenv/action.yml
deleted file mode 100644
index de5bdf5753..0000000000
--- a/.github/actions/setup-testenv/action.yml
+++ /dev/null
@@ -1,134 +0,0 @@
-name: Setup Test Environment
-
-inputs:
-  test_type:
-    required: true
-    type: string
-    description: Test scope
-  pytorch:
-    type: string
-    default: 'main'
-    description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
-  torch_xpu_ops:
-    type: string
-    default: 'main'
-    description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
-  oneapi:
-    type: string
-    default: 'installed'
-    description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
-  python:
-    type: string
-    default: '3.10'
-    description: Python version
-
-permissions: read-all
-
-runs:
-  using: composite
-  steps:
-    - name: Setup python-${{ inputs.python }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ inputs.python }}
-    - name: Check runner
-      shell: bash -xe {0}
-      run: |
-        hostname && id
-        cat /etc/os-release
-        gcc -v && g++ -v
-        which python && python -V
-        which pip && pip list
-        pip install -U pip wheel setuptools
-        uname -a
-        dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
-        clinfo --list
-        cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
-        rm -rf ~/.triton /tmp/*inductor*
-    - name: Checkout torch-xpu-ops
-      uses: actions/checkout@v4
-      with:
-        path: torch-xpu-ops
-    - name: Install oneAPI DLE
-      shell: bash -xe {0}
-      if: ${{ inputs.oneapi != 'installed' }}
-      run: |
-        rm -rf ~/intel ~/.intel
-        wget -q -O oneapi.sh "${{ inputs.oneapi }}"
-        bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
-        echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
-        source ${HOME}/intel/oneapi/setvars.sh
-        sycl-ls && icpx -v
-    - name: Download Pytorch wheel
-      if: ${{ ! contains(inputs.test_type, 'wheel') }}
-      uses: actions/download-artifact@v4
-      with:
-        pattern: Torch-XPU-Wheel-*
-    - name: Prepare Stock Pytorch
-      shell: bash -xe {0}
-      run: |
-        # install pytorch
-        if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" -c) -ne 0 ];then
-          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
-        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" -c) -ne 0 ];then
-          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
-        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" -c) -ne 0 ];then
-          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
-        else
-          pip install --force-reinstall $(find ${{ github.workspace }}/ -name "*torch*.whl")
-        fi
-        pip list |grep torch
-        TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
-        if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
-          PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-        else
-          PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-        fi
-        git clone ${PYTORCH_REPO} pytorch
-        cd pytorch
-        git checkout ${TORCH_COMMIT_ID}
-        pip install -r .ci/docker/requirements-ci.txt
-        # apply extra PRs for stock pytorch
-        if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
-        else
-          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
-        fi
-        git status && git diff && git show -s
-    - name: Prepare Torch-xpu-ops
-      shell: bash -xe {0}
-      if: ${{ inputs.torch_xpu_ops != 'skipped' }}
-      run: |
-        cd pytorch
-        rm -rf third_party/torch-xpu-ops
-        if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
-          TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
-          TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
-        else
-          TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
-          if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
-            TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
-          else
-            TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
-          fi
-        fi
-        if [ "${{ inputs.test_type }}" == "cicd" ];then
-          cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
-        else
-          git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
-        fi
-        cd third_party/torch-xpu-ops
-        git checkout ${TORCH_XPU_OPS_COMMIT}
-        git status && git diff && git show -s
-    - name: Torch Config
-      shell: bash -xe {0}
-      run: |
-        printenv
-        python -c "import torch; print(torch.__config__.show())"
-        python -c "import torch; print(torch.__config__.parallel_info())"
-        python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-        python -c "import torchvision; print(torchvision.__version__)"
-        python -c "import torchaudio; print(torchaudio.__version__)"
-        python -c "import triton; print(triton.__version__)"
-        python pytorch/torch/utils/collect_env.py
-        pip list |grep -E 'torch|intel'
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index b8733741e5..763dca0ca1 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -92,7 +92,7 @@ jobs:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Launch Test on ${{ needs.runner.outputs.hostname }}
-        uses: ./.github/actions/setup-testenv
+        uses: ./.github/actions/linux-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
@@ -103,7 +103,7 @@ jobs:
       # CICD launch
       - name: Nightly Huggingface BF16 & FP16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: huggingface
@@ -112,7 +112,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: torchbench
@@ -121,7 +121,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: timm_models
@@ -132,7 +132,7 @@ jobs:
       # Nihglty launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: huggingface
@@ -141,7 +141,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: torchbench
@@ -150,7 +150,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models FP16 Training Test
         if: ${{ contains(inputs.test_type, 'nightly') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: timm_models
@@ -168,7 +168,7 @@ jobs:
       # Weekly launch
       - name: Nightly Huggingface Full Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: huggingface
@@ -177,7 +177,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: torchbench
@@ -186,7 +186,7 @@ jobs:
           scenario: accuracy,performance
       - name: Nightly Timm_models FP16 Training Test
         if: ${{ contains(inputs.test_type, 'weekly') }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: timm_models
@@ -204,7 +204,7 @@ jobs:
       # On-demand launch
       - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
         if: ${{ contains(inputs.test_type, 'ondemand') && inputs.suite != 'pt2e' }}
-        uses: ./.github/actions/linux-e2e
+        uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
           suite: ${{ inputs.suite }}
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index bd6c1adc70..6524c5418f 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -73,7 +73,7 @@ jobs:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Launch Test on ${{ needs.runner.outputs.hostname }}
-        uses: ./.github/actions/setup-testenv
+        uses: ./.github/actions/linux-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index e51886fa48..029a029498 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -90,7 +90,7 @@ jobs:
         uses: actions/checkout@v4
       - name: Launch Test on ${{ needs.runner.outputs.hostname }}
         if: ${{ contains(inputs.ut, matrix.test) }}
-        uses: ./.github/actions/setup-testenv
+        uses: ./.github/actions/linux-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
@@ -99,7 +99,7 @@ jobs:
           python: ${{ inputs.python }}
       - name: Run XPU UT Test
         if: ${{ contains(inputs.ut, matrix.test) }}
-        uses: ./.github/actions/linux-ut
+        uses: ./.github/actions/linux-uttest
         with:
           test_type: ${{ matrix.test }}
       - name: UT Test Results Summary
@@ -144,7 +144,7 @@ jobs:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Launch Test on ${{ steps.cleanup.outputs.hostname }}
-        uses: ./.github/actions/setup-testenv
+        uses: ./.github/actions/linux-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
@@ -152,7 +152,7 @@ jobs:
           oneapi: ${{ inputs.oneapi }}
           python: ${{ inputs.python }}
       - name: Run XPU UT Test
-        uses: ./.github/actions/linux-ut
+        uses: ./.github/actions/linux-uttest
         with:
           test_type: op_dev1
       - name: Upload Inductor XPU UT Log
@@ -183,7 +183,7 @@ jobs:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Launch Test on ${{ steps.cleanup.outputs.hostname }}
-        uses: ./.github/actions/setup-testenv
+        uses: ./.github/actions/linux-testenv
         with:
           test_type: ${{ inputs.test_type }}
           pytorch: ${{ inputs.pytorch }}
@@ -199,7 +199,7 @@ jobs:
           cat ptrace_scope.bk
           echo "0" |sudo tee /proc/sys/kernel/yama/ptrace_scope
       - name: Run Torch XPU Distributed UT
-        uses: ./.github/actions/linux-ut
+        uses: ./.github/actions/linux-uttest
         with:
           test_type: xpu_distributed
       - name: Reset Ptrace_scope

From 70577e1927a707d9551eaaecc19e307068b29bb5 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 11:10:23 +0800
Subject: [PATCH 096/160] update

---
 .github/actions/linux-e2etest/action.yml | 143 +++++++++++++++++++++++
 .github/actions/linux-testenv/action.yml | 135 +++++++++++++++++++++
 .github/actions/linux-uttest/action.yml  | 136 +++++++++++++++++++++
 3 files changed, 414 insertions(+)
 create mode 100644 .github/actions/linux-e2etest/action.yml
 create mode 100644 .github/actions/linux-testenv/action.yml
 create mode 100644 .github/actions/linux-uttest/action.yml

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
new file mode 100644
index 0000000000..559b3b307b
--- /dev/null
+++ b/.github/actions/linux-e2etest/action.yml
@@ -0,0 +1,143 @@
+name: Linux E2E Test
+
+inputs:
+  env_prepare:
+    required: false
+    description: If set to any value, will prepare suite test env
+  suite:
+    required: true
+    type: string
+    default: 'huggingface'
+    description: Dynamo benchmarks test suite. huggingface,timm_models,torchbench. Delimiter is comma
+  dt:
+    required: true
+    type: string
+    default: 'float32'
+    description: Data precision of the test.float32,bfloat16,float16,amp_bf16,amp_fp16. Delimiter is comma
+  mode:
+    required: true
+    type: string
+    default: 'inference'
+    description: inference,training. Delimiter is comma
+  scenario:
+    required: true
+    type: string
+    default: 'accuracy'
+    description: accuracy,performance. Delimiter is comma
+
+runs:
+  using: composite
+  steps:
+    - name: Prepare ENV
+      if: ${{ inputs.env_prepare }}
+      shell: bash -xe {0}
+      run: |
+        if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
+          python -c "import torch, torchvision, torchaudio"
+          cd ./pytorch
+          TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt)
+          git clone https://github.com/pytorch/benchmark.git xpu-benchmark
+          cd xpu-benchmark && git checkout $TORCHBENCH_COMMIT_ID
+          # remove deps which will reinstall torch
+          pip install --no-deps accelerate
+          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
+          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
+          pip install -U transformers==4.44.2
+          sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g'  requirements.txt
+          git status && git diff
+          pip install -r requirements.txt
+          python install.py --continue_on_fail
+          # deps for torchrec_dlrm
+          pip install pyre_extensions
+          pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
+          pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
+        fi
+        if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then
+          pip install -U transformers==4.44.2
+        fi
+        if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
+          # install timm without dependencies
+          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
+          # install timm dependencies without torch and torchvision
+          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
+        fi
+        pip list |grep -E 'intel|torch'
+    - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
+      shell: bash -xe {0}
+      run: |
+        cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
+        cd ./pytorch
+        # check param
+        function contains() {
+            contains_status="echo 'Start $2 ...'"
+            {
+              [[ $1 =~ (^|,)$2($|,) ]]
+            } || {
+              echo "[Warning] $2 is not suppotted type! Skipped!"
+              contains_status="continue"
+            }
+        }
+        xpu_num=$(clinfo --list |awk 'BEGIN{gpu=0;}{if(gpu==1 && $0~/Platform/){gpu=0;}; if(gpu==1){print $0;}; if($0~/Platform.*Graphics/){gpu=1;}}' |wc -l)
+        cores_per_instance="$(lscpu |grep -E 'Core\(s\) per socket:|Socket\(s\):' |awk -v i="${xpu_num}" 'BEGIN{sum=1}{sum*=$NF}END{print sum/i}')"
+        export OMP_NUM_THREADS=${cores_per_instance}
+        for suite in $(echo ${{ inputs.suite }} |sed 's/,/ /g')
+        do
+          if [ "${suite}" == "pt2e" ];then
+            continue
+          fi
+          contains "huggingface,timm_models,torchbench" $suite
+          $contains_status
+          for dt in $(echo ${{ inputs.dt }} |sed 's/,/ /g')
+          do
+            contains "float32,bfloat16,float16,amp_bf16,amp_fp16" $dt
+            $contains_status
+            for mode in $(echo ${{ inputs.mode }} |sed 's/,/ /g')
+            do
+              contains "inference,training" $mode
+              $contains_status
+              for scenario in $(echo ${{ inputs.scenario }} |sed 's/,/ /g')
+              do
+                contains "accuracy,performance" $scenario
+                $contains_status
+                if [ "${MODEL_ONLY_NAME}" == "" ];then
+                  for xpu_id in $(seq 0 $[ ${xpu_num} - 1 ])
+                  do
+                    cpu_list="$(echo "${cores_per_instance} ${xpu_id}" |awk '{printf("%d-%d", $1*$2, $1*$2+$1-1)}')"
+                    numactl --localalloc --physcpubind=${cpu_list} bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${xpu_id} &
+                  done
+                else
+                  for test_model in $(echo ${MODEL_ONLY_NAME} |sed 's/,/ /g')
+                  do
+                    numactl --localalloc bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${test_model}
+                  done
+                fi
+                wait
+                # summarize pass rate
+                LOG_DIR="inductor_log/${suite}/${dt}"
+                LOG_NAME=inductor_${suite}_${dt}_${mode}_xpu_${scenario}_all.log
+                rm -f ${LOG_DIR}/${LOG_NAME}
+                find ${LOG_DIR}/ -name "inductor_${suite}_${dt}_${mode}_xpu_${scenario}_card*.log" |xargs cat >> ${LOG_DIR}/${LOG_NAME} 2>&1
+              done
+            done
+          done
+        done
+
+    - name: Summary E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
+      shell: bash -xe {0}
+      run: |
+        cd ./pytorch
+        rm -f inductor_log/summary_accuracy.csv
+        for var in $(find inductor_log/ -name "inductor_*_xpu_accuracy.csv")
+        do
+          sed -i "s/$/,$(basename $var)/" $var
+          cat $var >> inductor_log/summary_accuracy.csv
+        done
+        cd ${{ github.workspace }}
+        cp ./.github/scripts/inductor_summary.py ./pytorch
+        cd ./pytorch
+        pip install styleFrame scipy pandas
+        dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
+        mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
+        suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')
+        scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g')
+        python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario}
diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
new file mode 100644
index 0000000000..20e2bf15bc
--- /dev/null
+++ b/.github/actions/linux-testenv/action.yml
@@ -0,0 +1,135 @@
+name: Setup Test Environment
+
+inputs:
+  test_type:
+    required: true
+    type: string
+    description: Test scope
+  pytorch:
+    type: string
+    default: 'main'
+    description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+  torch_xpu_ops:
+    type: string
+    default: 'main'
+    description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
+  oneapi:
+    type: string
+    default: 'installed'
+    description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+  python:
+    type: string
+    default: '3.10'
+    description: Python version
+
+permissions: read-all
+
+runs:
+  using: composite
+  steps:
+    - name: Setup python-${{ inputs.python }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ inputs.python }}
+    - name: Check runner
+      shell: bash -xe {0}
+      run: |
+        hostname && id
+        cat /etc/os-release
+        gcc -v && g++ -v
+        which python && python -V
+        which pip && pip list
+        pip install -U pip wheel setuptools
+        uname -a
+        dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
+        clinfo --list
+        cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
+        rm -rf ~/.triton /tmp/*inductor*
+        pip install pandas psutil scipy requests pytest-timeout
+    - name: Checkout torch-xpu-ops
+      uses: actions/checkout@v4
+      with:
+        path: torch-xpu-ops
+    - name: Install oneAPI DLE
+      shell: bash -xe {0}
+      if: ${{ inputs.oneapi != 'installed' }}
+      run: |
+        rm -rf ~/intel ~/.intel
+        wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+        bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
+        echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
+        source ${HOME}/intel/oneapi/setvars.sh
+        sycl-ls && icpx -v
+    - name: Download Pytorch wheel
+      if: ${{ ! contains(inputs.test_type, 'wheel') }}
+      uses: actions/download-artifact@v4
+      with:
+        pattern: Torch-XPU-Wheel-*
+    - name: Prepare Stock Pytorch
+      shell: bash -xe {0}
+      run: |
+        # install pytorch
+        if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" -c) -ne 0 ];then
+          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/xpu
+        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "test_wheel" -c) -ne 0 ];then
+          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/test/xpu
+        elif [ $(echo "${{ inputs.pytorch }}" |grep -w "nightly_wheel" -c) -ne 0 ];then
+          pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
+        else
+          pip install --force-reinstall $(find ${{ github.workspace }}/ -name "*torch*.whl")
+        fi
+        pip list |grep torch
+        TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
+        if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
+          PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
+        else
+          PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
+        fi
+        git clone ${PYTORCH_REPO} pytorch
+        cd pytorch
+        git checkout ${TORCH_COMMIT_ID}
+        pip install -r .ci/docker/requirements-ci.txt
+        # apply extra PRs for stock pytorch
+        if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
+          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
+        else
+          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
+        fi
+        git status && git diff && git show -s
+    - name: Prepare Torch-xpu-ops
+      shell: bash -xe {0}
+      if: ${{ inputs.torch_xpu_ops != 'skipped' }}
+      run: |
+        cd pytorch
+        rm -rf third_party/torch-xpu-ops
+        if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
+          TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
+          TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
+        else
+          TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
+          if [ "${{ inputs.torch_xpu_ops }}" == "pinned" ];then
+            TORCH_XPU_OPS_COMMIT="$(cat third_party/xpu.txt)"
+          else
+            TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
+          fi
+        fi
+        if [ "${{ inputs.test_type }}" == "cicd" ];then
+          cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
+        else
+          git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
+        fi
+        cd third_party/torch-xpu-ops
+        git checkout ${TORCH_XPU_OPS_COMMIT}
+        git status && git diff && git show -s
+    - name: Torch Config
+      shell: bash -xe {0}
+      run: |
+        printenv
+        python -c "import torch; print(torch.__config__.show())"
+        python -c "import torch; print(torch.__config__.parallel_info())"
+        python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+        python -c "import torchvision; print(torchvision.__version__)"
+        python -c "import torchaudio; print(torchaudio.__version__)"
+        python -c "import triton; print(triton.__version__)"
+        python pytorch/torch/utils/collect_env.py
+        pip list |grep -E 'torch|intel'
diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
new file mode 100644
index 0000000000..af01261071
--- /dev/null
+++ b/.github/actions/linux-uttest/action.yml
@@ -0,0 +1,136 @@
+name: Linux Unit Test
+
+inputs:
+  test_type:
+    required: true
+    type: string
+    description: Test scope
+
+permissions: read-all
+
+runs:
+  using: composite
+  steps:
+    - name: op_regression
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_regression' }}
+      run: |
+        cd pytorch/third_party/torch-xpu-ops/test/regressions
+        pytest --timeout 600 -v --junit-xml=../../ut_log/op_regression.xml
+    - name: op_transformers
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_transformers' }}
+      run: |
+        export PYTORCH_TEST_WITH_SLOW=1
+        cd pytorch
+        pytest --timeout 600 -v test/test_transformers.py -k xpu \
+          --junit-xml=$GITHUB_WORKSPACE/ut_log/op_transformers.xml
+    - name: op_extended
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_extended' }}
+      run: |
+        export PYTORCH_TEST_WITH_SLOW=1
+        cd pytorch/third_party/torch-xpu-ops/test/xpu/extended
+        timeout 3600 python run_test_with_skip.py
+        cp op_extended.xml $GITHUB_WORKSPACE/ut_log
+    - name: op_ut
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_ut' }}
+      run: |
+        export PYTORCH_TEST_WITH_SLOW=1
+        export PYTORCH_ENABLE_XPU_FALLBACK=1
+        cd pytorch/third_party/torch-xpu-ops/test/xpu
+        timeout 10000 python run_test_with_skip.py \
+          2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test.log
+        cp *.xml $GITHUB_WORKSPACE/ut_log
+        find op_ut_with_skip_nn op_ut_with_skip_quantization/core -type f -exec sh -c '
+            dir_path=$(dirname "$1");
+            case "$dir_path" in
+                *"op_ut_with_skip_quantization/core"*)
+                    dir_name="op_ut_with_skip_quantization_core";;
+                *)
+                    dir_name=$(basename "$dir_path");;
+            esac;
+            mv "$1" "$dir_path/${dir_name}_$(basename "$1")"
+        ' _ {} \;
+        cp op_ut_with_skip_nn/*.xml $GITHUB_WORKSPACE/ut_log
+        cp op_ut_with_skip_quantization/core/*.xml $GITHUB_WORKSPACE/ut_log
+        # Cases run with a on-demand white list, since some suites are too
+        # slow to go through all operators on CPU. So add cases on-demand
+        # when XPU implementatoin is done.
+        # test_foreach, test_decomp
+        # Run with only
+        timeout 10000 python run_test_with_only.py \
+          2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test.log
+        cp op_ut_with_only.xml $GITHUB_WORKSPACE/ut_log
+    - name: torch_xpu
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'torch_xpu' }}
+      run: |
+        export PYTORCH_TEST_WITH_SLOW=1
+        export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
+        cd pytorch
+        test_cmd="python test/run_test.py --include "
+        for test in $(ls test/inductor | grep test); do test_cmd="${test_cmd} inductor/$test"; done
+        for test in $(ls test/xpu | grep test); do test_cmd="${test_cmd} xpu/$test"; done
+        if [ -f "test/test_xpu.py" ]; then test_cmd="${test_cmd} test_xpu.py"; fi
+        eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log
+    - name: xpu_profiling
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'xpu_profiling' }}
+      run: |
+        mkdir -p ut_log/profile_test/issue_reproduce
+        cd pytorch/third_party/torch-xpu-ops
+        # RN50 Test
+        PROFILE=1 python -u test/profiling/rn50.py -a resnet50 --dummy ./ --num-iterations 20 --xpu 0
+        cp profiling.fp32.train.pt $GITHUB_WORKSPACE/ut_log/profile_test
+        # All Issue Reproduce UT
+        python -u test/profiling/correlation_id_mixed.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/correlation_id_mixed.log
+        python -u test/profiling/reproducer.missing.gpu.kernel.time.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/reproducer.missing.gpu.kernel.time.log
+        python -u test/profiling/time_precision_in_profile.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/time_precision_in_profile.log
+        python -u test/profiling/profile_partial_runtime_ops.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/profile_partial_runtime_ops.log
+        python -u test/profiling/triton_xpu_ops_time.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
+        # All xpu ut under test/profiler
+        cd ../pytorch/test/profiler
+        python -m pytest --timeout 600 -vs test_cpp_thread.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_cpp_thread.log
+        python -m pytest --timeout 600 -vs test_execution_trace.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_execution_trace.log
+        python -m pytest --timeout 600 -vs test_memory_profiler.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_memory_profiler.log
+        python -m pytest --timeout 600 -vs test_profiler_tree.py | \
+          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_profiler_tree.log
+
+    - name: op_dev1
+      shell: bash -xe {0}
+      if: ${{ inputs.test_type == 'op_dev1' }}
+      run: |
+        mkdir -p ut_log/op_dev1
+        cd pytorch/third_party/torch-xpu-ops/test/regressions
+        pytest --timeout 200 -v test_operation_on_device_1.py \
+          --junit-xml=$GITHUB_WORKSPACE/ut_log/op_dev1.xml \
+          2>${{ github.workspace }}/ut_log/op_dev1/op_dev1_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/op_dev1/op_dev1_test.log
+
+    - name: xpu_distributed
+      shell: bash -x -e -o pipefail {0}
+      if: ${{ inputs.test_type == 'xpu_distributed' }}
+      run: |
+        mkdir -p ut_log/xpu_distributed
+        cd pytorch/third_party/torch-xpu-ops/test/xpu
+        XCCL_ENABLE=$(python -c "import torch;print(torch.distributed.is_xccl_available())")
+        if [[ "${XCCL_ENABLE,,}" == 'false' ]] || [[ "${XCCL_ENABLE}" == '0' ]]; then
+          echo -e "[ERROR] XCCL is not enabled"
+          exit 1
+        fi
+        timeout 1800 python run_distributed.py \
+          2>${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log

From 2467e9e03a939d7c8d2305f683dad5458598afc3 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 11:12:19 +0800
Subject: [PATCH 097/160] update

---
 .github/workflows/_linux_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 53fdc621a0..afbae93cd8 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -59,7 +59,7 @@ jobs:
         uses: ./.github/actions/get-runner
 
   build:
-    name: ${{ contains(inputs.test_type, 'wheel') && inputs.pytorch || 'build' }}
+    name: ${{ inputs.pytorch }}
     needs: runner
     if: ${{ ! contains(inputs.test_type, 'wheel') }}
     runs-on: ${{ needs.runner.outputs.runner_id }}

From 96ff039d997483bc0e2d93161843dedc1b024af0 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 11:44:30 +0800
Subject: [PATCH 098/160] update

---
 .github/workflows/_linux_e2e.yml          | 13 ++++++++-----
 .github/workflows/_linux_op_benchmark.yml |  2 +-
 .github/workflows/_linux_ut.yml           |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 763dca0ca1..984dec240b 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -78,7 +78,7 @@ jobs:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
+      options: --device=/dev/mem --device=/dev/dri --group-add video --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g
               -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
       env:
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
@@ -244,15 +244,17 @@ jobs:
         GH_TOKEN: ${{ github.token }}
         REFERENCE_ISSUE_ID: 1645
     steps:
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
       - name: Install gh
         run: |
           apt-get update
           apt-get install gh rsync ca-certificates -y
           find ./ |grep -v "^\./$" |xargs rm -rf
-      - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ inputs.python }}
+          python -m venv /tmp/myvenv
+          echo "PATH=/tmp/myvenv/bin:$PATH" >> ${GITHUB_ENV}
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download Target Artifact
@@ -279,6 +281,7 @@ jobs:
       - name: Get summary
         if: ${{ ! cancelled() }}
         run: |
+          export HOME=/tmp/
           pip install pandas requests
           if [ "${{ inputs.suite }}" != 'pt2e' ];then
             bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 6524c5418f..1dbcfed652 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -59,7 +59,7 @@ jobs:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
+      options: --device=/dev/mem --device=/dev/dri --group-add video --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g
               -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
       env:
         AGENT_TOOLSDIRECTORY: /opt/xpu-tool
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 029a029498..39f219df6c 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -69,7 +69,7 @@ jobs:
       image: mengfeili/intel-pvc-driver:1146-1136
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
-      options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
+      options: --device=/dev/mem --device=/dev/dri --group-add video --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g
               -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
               -e ZE_AFFINITY_MASK
       env:

From da12ea01e975e518741e02fd321ca20efc65c0e3 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 13:35:29 +0800
Subject: [PATCH 099/160] update

---
 .github/workflows/_linux_ut.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 39f219df6c..dce5d44753 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -238,14 +238,14 @@ jobs:
         if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/download-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}
           path: ${{ github.workspace }}/ut_log
       - name: Check UT Results
         if: ${{ contains(inputs.ut, matrix.test) }}
         shell: bash
         run: |
           repo="${{ github.repository }}"
-          cd ${{ github.workspace }}/ut_log/${{ matrix.test.name }}
+          cd ${{ github.workspace }}/ut_log/${{ matrix.test }}
           gh --repo $repo issue view $UT_SKIP_ISSUE --json body -q .body | sed '/^$/d' > Known_issue.log
           gh api "repos/${{ github.repository }}/issues?labels=skipped" \
             --jq '.[] | select(.pull_request == null) | "Issue #\(.number): \(.title)\n\(.body)\n"' > issues.log
@@ -255,10 +255,10 @@ jobs:
           cat issues_temp.log | awk '{print $1}' >> Known_issue.log
           awk -F'::' '{print $1}' issues_op_ut.log | sort -u | paste -sd ',' >> Known_issue.log
           cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
-          bash ut_result_check.sh ${{ matrix.test.name }}
+          bash ut_result_check.sh ${{ matrix.test }}
       - name: Upload Inductor XPU UT Log
         if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test.name }}-checked
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}-checked
           path: ${{ github.workspace }}/ut_log

From 4f6ecfdb01b015c16d59e6ebe79079ca25bb60f1 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 13:45:17 +0800
Subject: [PATCH 100/160] modify ut

---
 .github/actions/linux-uttest/action.yml | 58 ++++++++++++-------------
 .github/scripts/check-ut.py             | 16 +++----
 .github/scripts/ut_result_check.sh      | 18 ++++----
 .github/workflows/_linux_ut.yml         | 18 ++++----
 .github/workflows/_windows_ut.yml       |  4 +-
 .github/workflows/nightly_ondemand.yml  |  6 +--
 .github/workflows/pull.yml              |  4 +-
 7 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index af01261071..8693ab96a0 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -11,60 +11,60 @@ permissions: read-all
 runs:
   using: composite
   steps:
-    - name: op_regression
+    - name: ut_regression
       shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_regression' }}
+      if: ${{ inputs.test_type == 'ut_regression' }}
       run: |
         cd pytorch/third_party/torch-xpu-ops/test/regressions
-        pytest --timeout 600 -v --junit-xml=../../ut_log/op_regression.xml
-    - name: op_transformers
+        pytest --timeout 600 -v --junit-xml=../../ut_log/ut_regression.xml
+    - name: ut_transformers
       shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_transformers' }}
+      if: ${{ inputs.test_type == 'ut_transformers' }}
       run: |
         export PYTORCH_TEST_WITH_SLOW=1
         cd pytorch
         pytest --timeout 600 -v test/test_transformers.py -k xpu \
-          --junit-xml=$GITHUB_WORKSPACE/ut_log/op_transformers.xml
-    - name: op_extended
+          --junit-xml=$GITHUB_WORKSPACE/ut_log/ut_transformers.xml
+    - name: ut_extended
       shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_extended' }}
+      if: ${{ inputs.test_type == 'ut_extended' }}
       run: |
         export PYTORCH_TEST_WITH_SLOW=1
         cd pytorch/third_party/torch-xpu-ops/test/xpu/extended
         timeout 3600 python run_test_with_skip.py
-        cp op_extended.xml $GITHUB_WORKSPACE/ut_log
-    - name: op_ut
+        cp ut_extended.xml $GITHUB_WORKSPACE/ut_log
+    - name: ut_op
       shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_ut' }}
+      if: ${{ inputs.test_type == 'ut_op' }}
       run: |
         export PYTORCH_TEST_WITH_SLOW=1
         export PYTORCH_ENABLE_XPU_FALLBACK=1
         cd pytorch/third_party/torch-xpu-ops/test/xpu
         timeout 10000 python run_test_with_skip.py \
-          2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test.log
+          2>$GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_skip_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_skip_test.log
         cp *.xml $GITHUB_WORKSPACE/ut_log
-        find op_ut_with_skip_nn op_ut_with_skip_quantization/core -type f -exec sh -c '
+        find ut_op_with_skip_nn ut_op_with_skip_quantization/core -type f -exec sh -c '
             dir_path=$(dirname "$1");
             case "$dir_path" in
-                *"op_ut_with_skip_quantization/core"*)
-                    dir_name="op_ut_with_skip_quantization_core";;
+                *"ut_op_with_skip_quantization/core"*)
+                    dir_name="ut_op_with_skip_quantization_core";;
                 *)
                     dir_name=$(basename "$dir_path");;
             esac;
             mv "$1" "$dir_path/${dir_name}_$(basename "$1")"
         ' _ {} \;
-        cp op_ut_with_skip_nn/*.xml $GITHUB_WORKSPACE/ut_log
-        cp op_ut_with_skip_quantization/core/*.xml $GITHUB_WORKSPACE/ut_log
+        cp ut_op_with_skip_nn/*.xml $GITHUB_WORKSPACE/ut_log
+        cp ut_op_with_skip_quantization/core/*.xml $GITHUB_WORKSPACE/ut_log
         # Cases run with a on-demand white list, since some suites are too
         # slow to go through all operators on CPU. So add cases on-demand
         # when XPU implementatoin is done.
         # test_foreach, test_decomp
         # Run with only
         timeout 10000 python run_test_with_only.py \
-          2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_only_test.log
-        cp op_ut_with_only.xml $GITHUB_WORKSPACE/ut_log
+          2>$GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_only_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_only_test.log
+        cp ut_op_with_only.xml $GITHUB_WORKSPACE/ut_log
     - name: torch_xpu
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'torch_xpu' }}
@@ -78,9 +78,9 @@ runs:
         if [ -f "test/test_xpu.py" ]; then test_cmd="${test_cmd} test_xpu.py"; fi
         eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test_error.log | \
           tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log
-    - name: xpu_profiling
+    - name: ut_profiling
       shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'xpu_profiling' }}
+      if: ${{ inputs.test_type == 'ut_profiling' }}
       run: |
         mkdir -p ut_log/profile_test/issue_reproduce
         cd pytorch/third_party/torch-xpu-ops
@@ -109,16 +109,16 @@ runs:
         python -m pytest --timeout 600 -vs test_profiler_tree.py | \
           tee $GITHUB_WORKSPACE/ut_log/profile_test/test_profiler_tree.log
 
-    - name: op_dev1
+    - name: xpu_dev1
       shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'op_dev1' }}
+      if: ${{ inputs.test_type == 'xpu_dev1' }}
       run: |
-        mkdir -p ut_log/op_dev1
+        mkdir -p ut_log/xpu_dev1
         cd pytorch/third_party/torch-xpu-ops/test/regressions
         pytest --timeout 200 -v test_operation_on_device_1.py \
-          --junit-xml=$GITHUB_WORKSPACE/ut_log/op_dev1.xml \
-          2>${{ github.workspace }}/ut_log/op_dev1/op_dev1_test_error.log | \
-          tee ${{ github.workspace }}/ut_log/op_dev1/op_dev1_test.log
+          --junit-xml=$GITHUB_WORKSPACE/ut_log/xpu_dev1.xml \
+          2>${{ github.workspace }}/ut_log/xpu_dev1/xpu_dev1_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/xpu_dev1/xpu_dev1_test.log
 
     - name: xpu_distributed
       shell: bash -x -e -o pipefail {0}
diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py
index be84a64793..3364efa61c 100644
--- a/.github/scripts/check-ut.py
+++ b/.github/scripts/check-ut.py
@@ -186,14 +186,14 @@ def parse_log_file(log_file):
     return summary
 
 def determine_category(ut):
-    if ut == 'op_regression':
-        return 'op_regression'
-    elif ut == 'op_dev1':
-        return 'op_dev1'
-    elif ut == 'op_extended':
-        return 'op_extended'
-    elif 'op_ut' in ut:
-        return 'op_ut'
+    if ut == 'ut_regression':
+        return 'ut_regression'
+    elif ut == 'xpu_dev1':
+        return 'xpu_dev1'
+    elif ut == 'ut_extended':
+        return 'ut_extended'
+    elif 'ut_op' in ut:
+        return 'ut_op'
     else:
         return 'unknown'
 
diff --git a/.github/scripts/ut_result_check.sh b/.github/scripts/ut_result_check.sh
index 0ad52580f7..bd7ccd490a 100644
--- a/.github/scripts/ut_result_check.sh
+++ b/.github/scripts/ut_result_check.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-ut_suite="${1:-op_regression}"   # op_regression / op_extended / op_ut / torch_xpu
+ut_suite="${1:-ut_regression}"   # ut_regression / ut_extended / ut_op / torch_xpu
 
 # usage
 # compare_and_filter_logs <UT'log> <Known_issue.log> [output.log]
@@ -44,7 +44,7 @@ compare_and_filter_logs() {
     fi
 }
 
-if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_dev1' || "${ut_suite}" == 'op_extended' || "${ut_suite}" == 'op_transformers' ]]; then
+if [[ "${ut_suite}" == 'ut_regression' || "${ut_suite}" == 'xpu_dev1' || "${ut_suite}" == 'ut_extended' || "${ut_suite}" == 'ut_transformers' ]]; then
     grep -E "FAILED" "${ut_suite}"_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_failed.log
     grep -E "have failures" "${ut_suite}"_test.log | awk '{print $1}' >> ./"${ut_suite}"_failed.log
     grep "PASSED" "${ut_suite}"_test.log | awk '{print $1}' > ./"${ut_suite}"_passed.log
@@ -66,11 +66,11 @@ if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_dev1' || "${ut_su
       echo -e "[PASS] UT ${ut_suite} test Pass"
     fi
 fi
-if [[ "${ut_suite}" == 'op_ut' ]]; then
-    grep -E "FAILED" op_ut_with_skip_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_with_skip_test_failed.log
-    grep -E "have failures" op_ut_with_skip_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_skip_test_failed.log
-    grep -E "FAILED" op_ut_with_only_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_with_only_test_failed.log
-    grep -E "have failures" op_ut_with_only_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_only_test_failed.log
+if [[ "${ut_suite}" == 'ut_op' ]]; then
+    grep -E "FAILED" ut_op_with_skip_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_with_skip_test_failed.log
+    grep -E "have failures" ut_op_with_skip_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_skip_test_failed.log
+    grep -E "FAILED" ut_op_with_only_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_with_only_test_failed.log
+    grep -E "have failures" ut_op_with_only_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_only_test_failed.log
     compare_and_filter_logs "${ut_suite}"_with_skip_test_failed.log Known_issue.log
     if [[ -f "${ut_suite}_with_skip_test_failed_filtered.log" ]]; then
       num_failed_with_skip=$(wc -l < "./${ut_suite}_with_skip_test_failed_filtered.log")
@@ -92,8 +92,8 @@ if [[ "${ut_suite}" == 'op_ut' ]]; then
     echo -e "========================================================================="
     cat "./${ut_suite}_with_only_test_failed.log"
     ((num_failed=num_failed_with_skip+num_failed_with_only))
-    grep "PASSED" op_ut_with_skip_test.log | awk '{print $1}' > ./"${ut_suite}"_with_skip_test_passed.log
-    grep "PASSED" op_ut_with_only_test.log | awk '{print $1}' > ./"${ut_suite}"_with_only_test_passed.log
+    grep "PASSED" ut_op_with_skip_test.log | awk '{print $1}' > ./"${ut_suite}"_with_skip_test_passed.log
+    grep "PASSED" ut_op_with_only_test.log | awk '{print $1}' > ./"${ut_suite}"_with_only_test_passed.log
     num_passed_with_skip=$(wc -l < "./${ut_suite}_with_skip_test_passed.log")
     num_passed_with_only=$(wc -l < "./${ut_suite}_with_only_test_passed.log")
     ((num_passed=num_passed_with_skip+num_passed_with_only))
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index dce5d44753..f1940bd088 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -30,7 +30,7 @@ on:
       ut:
         required: true
         type: string
-        description: UT scope. `op_regression,op_dev1,op_transformers,op_extended,op_ut,torch_xpu` Delimiter is comma
+        description: UT scope. `ut_regression,xpu_dev1,ut_transformers,ut_extended,ut_op,torch_xpu` Delimiter is comma
       disabled_tests:
         type: string
         default: ''
@@ -63,7 +63,7 @@ jobs:
   normal:
     needs: runner
     runs-on: ${{ needs.runner.outputs.runner_id }}
-    if: ${{ inputs.ut != 'xpu_distributed' && !contains(inputs.disabled_tests, 'disable_ut') }}
+    if: ${{ contains(inputs.ut, 'p') && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 300
     container:
       image: mengfeili/intel-pvc-driver:1146-1136
@@ -79,7 +79,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test: [op_regression, op_transformers, op_extended, op_ut, torch_xpu, xpu_profiling]
+        test: [ut_regression, ut_transformers, ut_extended, ut_op, torch_xpu, ut_profiling]
     steps:
       - name: Cleanup workspace
         if: ${{ contains(inputs.ut, matrix.test) }}
@@ -125,7 +125,7 @@ jobs:
 
   devices:
     runs-on: pvc_rolling
-    if: ${{ contains(inputs.ut, 'op_dev1') && !contains(inputs.disabled_tests, 'disable_ut') }}
+    if: ${{ contains(inputs.ut, 'xpu_dev1') && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 5
     env:
       GH_TOKEN: ${{ github.token }}
@@ -154,12 +154,12 @@ jobs:
       - name: Run XPU UT Test
         uses: ./.github/actions/linux-uttest
         with:
-          test_type: op_dev1
+          test_type: xpu_dev1
       - name: Upload Inductor XPU UT Log
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-op_dev1
+          name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-xpu_dev1
           path: ${{ github.workspace }}/ut_log
 
   distributed:
@@ -222,7 +222,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test: [op_regression, op_transformers, op_extended, op_ut, torch_xpu, xpu_profiling, op_dev1, xpu_distributed]
+        test: [ut_regression, ut_transformers, ut_extended, ut_op, torch_xpu, ut_profiling, xpu_dev1, xpu_distributed]
     env:
       GH_TOKEN: ${{ github.token }}
       UT_SKIP_ISSUE: 1624
@@ -251,9 +251,9 @@ jobs:
             --jq '.[] | select(.pull_request == null) | "Issue #\(.number): \(.title)\n\(.body)\n"' > issues.log
           awk '/Cases:/ {flag=1; next} /^\|\||^$/ {flag=0} flag' issues.log | \
             grep -Eo 'test[^[:space:]]+( \|\| [^[:space:]]+)?' | sed 's/ *|| */ /g' | sort -u > issues_temp.log
-          awk '$2 == "op_ut" {print $1}' issues_temp.log > issues_op_ut.log
+          awk '$2 == "ut_op" {print $1}' issues_temp.log > issues_ut_op.log
           cat issues_temp.log | awk '{print $1}' >> Known_issue.log
-          awk -F'::' '{print $1}' issues_op_ut.log | sort -u | paste -sd ',' >> Known_issue.log
+          awk -F'::' '{print $1}' issues_ut_op.log | sort -u | paste -sd ',' >> Known_issue.log
           cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
           bash ut_result_check.sh ${{ matrix.test }}
       - name: Upload Inductor XPU UT Log
diff --git a/.github/workflows/_windows_ut.yml b/.github/workflows/_windows_ut.yml
index 9cb27a30f1..f30e469749 100644
--- a/.github/workflows/_windows_ut.yml
+++ b/.github/workflows/_windows_ut.yml
@@ -17,7 +17,7 @@ on:
         required: true
         type: string
         default: ''
-        description: UT scope. `op_regression,op_dev1,op_extended,op_ut,torch_xpu` Delimiter is comma
+        description: UT scope. `ut_regression,xpu_dev1,ut_extended,ut_op,torch_xpu` Delimiter is comma
       python:
         required: false
         type: string
@@ -157,7 +157,7 @@ jobs:
           path: 'C:\actions-runner\_work\torch-xpu-ops\pytorch\dist'
 
       - name: Run XPU OP Extended UT
-        if: contains(inputs.ut, 'op_extended') || github.event_name == 'schedule'
+        if: contains(inputs.ut, 'ut_extended') || github.event_name == 'schedule'
         shell: cmd
         run: |
           call "C:\ProgramData\miniforge3\Scripts\activate.bat"
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 1f00b63263..f0a452ae33 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -29,7 +29,7 @@ on:
       ut:
         type: string
         default: ''
-        description: UT scope. `op_regression,op_dev1,op_transformers,op_extended,op_ut,xpu_profiling,xpu_distributed,microbench,windows`. Delimiter is comma
+        description: UT scope. `ut_regression,ut_transformers,ut_extended,ut_op,ut_profiling,xpu_dev1,xpu_distributed,microbench,windows`. Delimiter is comma
       suite:
         type: string
         default: ''
@@ -130,7 +130,7 @@ jobs:
       torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
-      ut: ${{ github.event_name == 'schedule' && 'op_regression,op_dev1,op_transformers,op_extended,op_ut' || inputs.ut }}
+      ut: ${{ github.event_name == 'schedule' && 'ut_regression,xpu_dev1,ut_transformers,ut_extended,ut_op' || inputs.ut }}
 
   Linux-Nightly-Ondemand-E2E-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
@@ -168,7 +168,7 @@ jobs:
     name: windows
     uses: ./.github/workflows/_windows_ut.yml
     with:
-      ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}
+      ut: ${{ github.event_name == 'schedule' && 'ut_extended,torch_xpu' || inputs.ut }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       src_changed: false
       has_label: true
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index b2f098efaf..0a7def8e9e 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -108,7 +108,7 @@ jobs:
       runner: linux.idc.xpu
       test_type: build-cicd
       pytorch: main
-      ut: op_regression,op_dev1,op_transformers,op_extended,op_ut,xpu_distributed
+      ut: ut_regression,xpu_dev1,ut_transformers,ut_extended,ut_op,xpu_distributed
       disabled_tests: ${{ needs.conditions-filter.outputs.disabled_tests }}
 
   linux-e2e:
@@ -127,7 +127,7 @@ jobs:
     needs: [conditions-filter]
     uses: ./.github/workflows/_windows_ut.yml
     with: 
-      ut: op_extended,torch_xpu
+      ut: ut_extended,torch_xpu
       runner: Windows_CI
       src_changed: ${{ needs.conditions-filter.outputs.src_changed }}
       has_label: ${{ needs.conditions-filter.outputs.has_label }}

From ba97507afa546c37d033fa4f670687c394d1d886 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 13:54:26 +0800
Subject: [PATCH 101/160] modify ut

---
 .github/actions/linux-uttest/action.yml | 8 ++++----
 .github/scripts/ut_result_check.sh      | 8 ++++----
 .github/workflows/_linux_ut.yml         | 8 ++++----
 .github/workflows/_windows_ut.yml       | 4 ++--
 .github/workflows/nightly_ondemand.yml  | 6 +++---
 .github/workflows/pull.yml              | 4 ++--
 6 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 8693ab96a0..43dcc5a3fa 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -65,9 +65,9 @@ runs:
           2>$GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_only_test_error.log | \
           tee $GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_only_test.log
         cp ut_op_with_only.xml $GITHUB_WORKSPACE/ut_log
-    - name: torch_xpu
+    - name: ut_torch
       shell: bash -xe {0}
-      if: ${{ inputs.test_type == 'torch_xpu' }}
+      if: ${{ inputs.test_type == 'ut_torch' }}
       run: |
         export PYTORCH_TEST_WITH_SLOW=1
         export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
@@ -76,8 +76,8 @@ runs:
         for test in $(ls test/inductor | grep test); do test_cmd="${test_cmd} inductor/$test"; done
         for test in $(ls test/xpu | grep test); do test_cmd="${test_cmd} xpu/$test"; done
         if [ -f "test/test_xpu.py" ]; then test_cmd="${test_cmd} test_xpu.py"; fi
-        eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log
+        eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/ut_torch/torch_xpu_test_error.log | \
+          tee $GITHUB_WORKSPACE/ut_log/ut_torch/torch_xpu_test.log
     - name: ut_profiling
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_profiling' }}
diff --git a/.github/scripts/ut_result_check.sh b/.github/scripts/ut_result_check.sh
index bd7ccd490a..7e370f813d 100644
--- a/.github/scripts/ut_result_check.sh
+++ b/.github/scripts/ut_result_check.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-ut_suite="${1:-ut_regression}"   # ut_regression / ut_extended / ut_op / torch_xpu
+ut_suite="${1:-ut_regression}"   # ut_regression / ut_extended / ut_op / ut_torch
 
 # usage
 # compare_and_filter_logs <UT'log> <Known_issue.log> [output.log]
@@ -104,13 +104,13 @@ if [[ "${ut_suite}" == 'ut_op' ]]; then
       echo -e "[PASS] UT ${ut_suite} test Pass"
     fi
 fi
-if [[ "${ut_suite}" == 'torch_xpu' ]]; then
+if [[ "${ut_suite}" == 'ut_torch' ]]; then
     echo "Pytorch XPU binary UT checking"
     cd ../../pytorch || exit
     for xpu_case in build/bin/*{xpu,sycl}*; do
       if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
         case_name=$(basename "$xpu_case")
-        cd ../ut_log/torch_xpu || exit
+        cd ../ut_log/ut_torch || exit
         grep -E "FAILED|have failures" binary_ut_"${ut_suite}"_"${case_name}"_test.log | awk '{print $2}' > ./binary_ut_"${ut_suite}"_"${case_name}"_failed.log
         wc -l < "./binary_ut_${ut_suite}_${case_name}_failed.log" | tee -a ./binary_ut_"${ut_suite}"_failed_summary.log
         grep -E "PASSED|Pass" binary_ut_"${ut_suite}"_"${case_name}"_test.log | awk '{print $2}' > ./binary_ut_"${ut_suite}"_"${case_name}"_passed.log
@@ -121,7 +121,7 @@ if [[ "${ut_suite}" == 'torch_xpu' ]]; then
     echo -e "========================================================================="
     echo -e "Show Failed cases in ${ut_suite}"
     echo -e "========================================================================="
-    cd ../ut_log/torch_xpu || exit
+    cd ../ut_log/ut_torch || exit
     cat "./binary_ut_${ut_suite}_${case_name}_failed.log"
     num_failed_binary_ut=$(awk '{sum += $1};END {print sum}' binary_ut_"${ut_suite}"_failed_summary.log)
     num_passed_binary_ut=$(awk '{sum += $1};END {print sum}' binary_ut_"${ut_suite}"_passed_summary.log)
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index f1940bd088..0427b0a47b 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -30,7 +30,7 @@ on:
       ut:
         required: true
         type: string
-        description: UT scope. `ut_regression,xpu_dev1,ut_transformers,ut_extended,ut_op,torch_xpu` Delimiter is comma
+        description: UT scope. `ut_regression,ut_transformers,ut_extended,ut_op,ut_torch,xpu_dev1` Delimiter is comma
       disabled_tests:
         type: string
         default: ''
@@ -63,7 +63,7 @@ jobs:
   normal:
     needs: runner
     runs-on: ${{ needs.runner.outputs.runner_id }}
-    if: ${{ contains(inputs.ut, 'p') && !contains(inputs.disabled_tests, 'disable_ut') }}
+    if: ${{ contains(inputs.ut, 'ut_') && !contains(inputs.disabled_tests, 'disable_ut') }}
     timeout-minutes: 300
     container:
       image: mengfeili/intel-pvc-driver:1146-1136
@@ -79,7 +79,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test: [ut_regression, ut_transformers, ut_extended, ut_op, torch_xpu, ut_profiling]
+        test: [ut_regression, ut_transformers, ut_extended, ut_op, ut_torch, ut_profiling]
     steps:
       - name: Cleanup workspace
         if: ${{ contains(inputs.ut, matrix.test) }}
@@ -222,7 +222,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test: [ut_regression, ut_transformers, ut_extended, ut_op, torch_xpu, ut_profiling, xpu_dev1, xpu_distributed]
+        test: [ut_regression, ut_transformers, ut_extended, ut_op, ut_torch, ut_profiling, xpu_dev1, xpu_distributed]
     env:
       GH_TOKEN: ${{ github.token }}
       UT_SKIP_ISSUE: 1624
diff --git a/.github/workflows/_windows_ut.yml b/.github/workflows/_windows_ut.yml
index f30e469749..3c211ccfc2 100644
--- a/.github/workflows/_windows_ut.yml
+++ b/.github/workflows/_windows_ut.yml
@@ -17,7 +17,7 @@ on:
         required: true
         type: string
         default: ''
-        description: UT scope. `ut_regression,xpu_dev1,ut_extended,ut_op,torch_xpu` Delimiter is comma
+        description: UT scope. `ut_regression,xpu_dev1,ut_extended,ut_op,ut_torch` Delimiter is comma
       python:
         required: false
         type: string
@@ -169,7 +169,7 @@ jobs:
           python run_test_with_skip_mtl.py
 
       - name: Run Test XPU UT
-        if: contains(inputs.ut, 'torch_xpu') || github.event_name == 'schedule'
+        if: contains(inputs.ut, 'ut_torch') || github.event_name == 'schedule'
         shell: cmd
         run: |
           call "C:\ProgramData\miniforge3\Scripts\activate.bat"
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index f0a452ae33..43f74faa50 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -29,7 +29,7 @@ on:
       ut:
         type: string
         default: ''
-        description: UT scope. `ut_regression,ut_transformers,ut_extended,ut_op,ut_profiling,xpu_dev1,xpu_distributed,microbench,windows`. Delimiter is comma
+        description: UT scope. `ut_regression,ut_transformers,ut_extended,ut_op,ut_profiling,ut_torch,xpu_dev1,xpu_distributed,microbench,windows`. Delimiter is comma
       suite:
         type: string
         default: ''
@@ -119,7 +119,7 @@ jobs:
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
 
   Linux-Nightly-Ondemand-UT-Tests:
-    if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'p') }}
+    if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'ut_') || contains(inputs.ut, 'xpu_') }}
     name: linux-ut
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_ut.yml
@@ -168,7 +168,7 @@ jobs:
     name: windows
     uses: ./.github/workflows/_windows_ut.yml
     with:
-      ut: ${{ github.event_name == 'schedule' && 'ut_extended,torch_xpu' || inputs.ut }}
+      ut: ${{ github.event_name == 'schedule' && 'ut_extended,ut_torch' || inputs.ut }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       src_changed: false
       has_label: true
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 0a7def8e9e..f009d10cb9 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -108,7 +108,7 @@ jobs:
       runner: linux.idc.xpu
       test_type: build-cicd
       pytorch: main
-      ut: ut_regression,xpu_dev1,ut_transformers,ut_extended,ut_op,xpu_distributed
+      ut: ut_regression,ut_transformers,ut_extended,ut_op,xpu_dev1,xpu_distributed
       disabled_tests: ${{ needs.conditions-filter.outputs.disabled_tests }}
 
   linux-e2e:
@@ -127,7 +127,7 @@ jobs:
     needs: [conditions-filter]
     uses: ./.github/workflows/_windows_ut.yml
     with: 
-      ut: ut_extended,torch_xpu
+      ut: ut_extended,ut_torch
       runner: Windows_CI
       src_changed: ${{ needs.conditions-filter.outputs.src_changed }}
       has_label: ${{ needs.conditions-filter.outputs.has_label }}

From 98964410b82a1f432ad4d20f4e83f01c62816130 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 14:30:11 +0800
Subject: [PATCH 102/160] fix pip warnings

---
 .github/workflows/_linux_build.yml | 8 +++++---
 .github/workflows/_linux_e2e.yml   | 5 +++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index afbae93cd8..e9115348bb 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -71,6 +71,8 @@ jobs:
         PATH: /tmp/xpu-venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
         GH_TOKEN: ${{ github.token }}
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
+        PIP_CACHE_DIR: /tmp/xpu-tool/.pipcache
+        PIP_ROOT_USER_ACTION: ignore
     timeout-minutes: 300
     steps:
       - name: Install gh-cli
@@ -120,10 +122,10 @@ jobs:
           source /opt/rh/gcc-toolset-11/enable
           # oneAPI DLE
           if [ "${{ inputs.oneapi }}" != "installed" ];then
-            rm -rf ${HOME}/intel ${HOME}/.intel
+            rm -rf ${HOME}/intel ${HOME}/.intel /opt/intel
             wget -q -O oneapi.sh "${{ inputs.oneapi }}"
-            bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
-            export XPU_ONEAPI_PATH="${HOME}/intel/oneapi"
+            bash oneapi.sh -a -s --eula accept --action install --install-dir /opt/intel/oneapi
+            export XPU_ONEAPI_PATH="/opt/intel/oneapi"
           fi
           source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 984dec240b..6d202ade4b 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -240,9 +240,11 @@ jobs:
     container:
       image: ubuntu:latest
       env:
-        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         GH_TOKEN: ${{ github.token }}
         REFERENCE_ISSUE_ID: 1645
+        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
+        PIP_CACHE_DIR: /tmp/xpu-tool/.pipcache
+        PIP_ROOT_USER_ACTION: ignore
     steps:
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
@@ -281,7 +283,6 @@ jobs:
       - name: Get summary
         if: ${{ ! cancelled() }}
         run: |
-          export HOME=/tmp/
           pip install pandas requests
           if [ "${{ inputs.suite }}" != 'pt2e' ];then
             bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}

From 50467eea4e1a1dd0993df29e67d89fa2e77af5a9 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 14:33:20 +0800
Subject: [PATCH 103/160] modify ut logs path

---
 .github/workflows/_linux_ut.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 0427b0a47b..039f5547f3 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -115,7 +115,7 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}
-          path: ${{ github.workspace }}/ut_log
+          path: ${{ github.workspace }}/ut_log/${{ matrix.test }}
       - name: Upload XPU UT Failure list
         if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/upload-artifact@v4

From 5c62bc925fb2e937631095e3db71b3cd40de3ad5 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 14:34:39 +0800
Subject: [PATCH 104/160] modify ut logs path

---
 .github/workflows/_linux_ut.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 039f5547f3..bf5f1e877e 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -245,6 +245,7 @@ jobs:
         shell: bash
         run: |
           repo="${{ github.repository }}"
+          ls -al ${{ github.workspace }}/ut_log
           cd ${{ github.workspace }}/ut_log/${{ matrix.test }}
           gh --repo $repo issue view $UT_SKIP_ISSUE --json body -q .body | sed '/^$/d' > Known_issue.log
           gh api "repos/${{ github.repository }}/issues?labels=skipped" \

From 8b33c216239ba1d7b5edec1eaccbf3f91812b07d Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 15:20:42 +0800
Subject: [PATCH 105/160] set run name for nightly and on-demand tests

---
 .github/workflows/nightly_ondemand.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 43f74faa50..e68bade8b7 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -53,6 +53,8 @@ on:
 
 permissions: read-all
 
+run-name: ${{ (contains(github.event.schedule, '13') && 'Nightly') || (contains(github.event.schedule, '16') && 'Weekly') || 'On-demand' }} / ${{ (contains(github.event.schedule, '10') && 'Source Code') || (contains(github.event.schedule, '30') && 'CD Wheel') || inputs.pytorch }}
+
 jobs:
   Conditions-Filter:
     name: conditions-filter

From f08c528527be4855bcb29745381759a9f6a6fa6c Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 15:25:21 +0800
Subject: [PATCH 106/160] modify ut logs path

---
 .github/workflows/_linux_ut.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index bf5f1e877e..a86c514c88 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -107,8 +107,8 @@ jobs:
         run: |
           pip install junitparser
           python ./.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
-          if [ -e "ut_failure_list.csv" ];then
-              cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv
+          if [ -e ut_failure_list.csv ];then
+              cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv || true
           fi
       - name: Upload Inductor XPU UT Log
         if: ${{ contains(inputs.ut, matrix.test) }}

From 55bd5dc4e3885ceb5aea8a89c27de120e1396095 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 15:36:36 +0800
Subject: [PATCH 107/160] ut summray always

---
 .github/workflows/_linux_build.yml | 7 +++----
 .github/workflows/_linux_e2e.yml   | 6 +++---
 .github/workflows/_linux_ut.yml    | 1 +
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index e9115348bb..ebb6b6fb46 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -68,11 +68,10 @@ jobs:
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
-        PATH: /tmp/xpu-venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+        PATH: /tmp/xpu-tool/myvenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
         GH_TOKEN: ${{ github.token }}
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         PIP_CACHE_DIR: /tmp/xpu-tool/.pipcache
-        PIP_ROOT_USER_ACTION: ignore
     timeout-minutes: 300
     steps:
       - name: Install gh-cli
@@ -88,9 +87,9 @@ jobs:
           gh --version
       - name: Setup python-${{ inputs.python }}
         run: |
-          rm -rf /tmp/xpu-venv
+          rm -rf /tmp/xpu-tool/myvenv
           local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-venv
+          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-tool/myvenv
           which python && python -V
           which pip && pip list
           pip install -U pip wheel setuptools
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 6d202ade4b..3f932b60e4 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -244,7 +244,7 @@ jobs:
         REFERENCE_ISSUE_ID: 1645
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         PIP_CACHE_DIR: /tmp/xpu-tool/.pipcache
-        PIP_ROOT_USER_ACTION: ignore
+        PATH: /tmp/xpu-tool/myvenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
     steps:
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
@@ -255,8 +255,8 @@ jobs:
           apt-get update
           apt-get install gh rsync ca-certificates -y
           find ./ |grep -v "^\./$" |xargs rm -rf
-          python -m venv /tmp/myvenv
-          echo "PATH=/tmp/myvenv/bin:$PATH" >> ${GITHUB_ENV}
+          rm -rf /tmp/xpu-tool/myvenv
+          python -m venv /tmp/xpu-tool/myvenv
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download Target Artifact
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index a86c514c88..6d6527ea17 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -217,6 +217,7 @@ jobs:
 
   summary:
     needs: [normal, devices, distributed]
+    if: ${{ ! cancelled() }}
     runs-on: ubuntu-latest
     timeout-minutes: 30
     strategy:

From dbd3a27c52989b530470c811ad177beca1178da8 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 16:11:18 +0800
Subject: [PATCH 108/160] fix ut logs path

---
 .github/actions/linux-uttest/action.yml | 63 ++++++++++++++-----------
 1 file changed, 36 insertions(+), 27 deletions(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 43dcc5a3fa..48afe04125 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -15,24 +15,33 @@ runs:
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_regression' }}
       run: |
+        mkdir -p ut_log/ut_regression
         cd pytorch/third_party/torch-xpu-ops/test/regressions
-        pytest --timeout 600 -v --junit-xml=../../ut_log/ut_regression.xml
+        pytest --timeout 600 -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
+          2> ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test.log
     - name: ut_transformers
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_transformers' }}
       run: |
         export PYTORCH_TEST_WITH_SLOW=1
+        mkdir -p ut_log/ut_transformers
         cd pytorch
         pytest --timeout 600 -v test/test_transformers.py -k xpu \
-          --junit-xml=$GITHUB_WORKSPACE/ut_log/ut_transformers.xml
+          --junit-xml=${{ github.workspace }}/ut_log/ut_transformers.xml \
+          2> ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test.log
     - name: ut_extended
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_extended' }}
       run: |
         export PYTORCH_TEST_WITH_SLOW=1
+        mkdir -p ut_log/ut_extended
         cd pytorch/third_party/torch-xpu-ops/test/xpu/extended
-        timeout 3600 python run_test_with_skip.py
-        cp ut_extended.xml $GITHUB_WORKSPACE/ut_log
+        timeout 3600 python run_test_with_skip.py \
+          2> ${{ github.workspace }}/ut_log/ut_extended/ut_extended_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/ut_extended/ut_extended_test.log
+        cp ut_extended.xml ${{ github.workspace }}/ut_log
     - name: ut_op
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_op' }}
@@ -41,9 +50,9 @@ runs:
         export PYTORCH_ENABLE_XPU_FALLBACK=1
         cd pytorch/third_party/torch-xpu-ops/test/xpu
         timeout 10000 python run_test_with_skip.py \
-          2>$GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_skip_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_skip_test.log
-        cp *.xml $GITHUB_WORKSPACE/ut_log
+          2> ${{ github.workspace }}/ut_log/ut_op/ut_op_with_skip_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/ut_op/ut_op_with_skip_test.log
+        cp *.xml ${{ github.workspace }}/ut_log
         find ut_op_with_skip_nn ut_op_with_skip_quantization/core -type f -exec sh -c '
             dir_path=$(dirname "$1");
             case "$dir_path" in
@@ -54,17 +63,17 @@ runs:
             esac;
             mv "$1" "$dir_path/${dir_name}_$(basename "$1")"
         ' _ {} \;
-        cp ut_op_with_skip_nn/*.xml $GITHUB_WORKSPACE/ut_log
-        cp ut_op_with_skip_quantization/core/*.xml $GITHUB_WORKSPACE/ut_log
+        cp ut_op_with_skip_nn/*.xml ${{ github.workspace }}/ut_log
+        cp ut_op_with_skip_quantization/core/*.xml ${{ github.workspace }}/ut_log
         # Cases run with a on-demand white list, since some suites are too
         # slow to go through all operators on CPU. So add cases on-demand
         # when XPU implementatoin is done.
         # test_foreach, test_decomp
         # Run with only
         timeout 10000 python run_test_with_only.py \
-          2>$GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_only_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/ut_op/ut_op_with_only_test.log
-        cp ut_op_with_only.xml $GITHUB_WORKSPACE/ut_log
+          2> ${{ github.workspace }}/ut_log/ut_op/ut_op_with_only_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/ut_op/ut_op_with_only_test.log
+        cp ut_op_with_only.xml ${{ github.workspace }}/ut_log
     - name: ut_torch
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_torch' }}
@@ -76,8 +85,8 @@ runs:
         for test in $(ls test/inductor | grep test); do test_cmd="${test_cmd} inductor/$test"; done
         for test in $(ls test/xpu | grep test); do test_cmd="${test_cmd} xpu/$test"; done
         if [ -f "test/test_xpu.py" ]; then test_cmd="${test_cmd} test_xpu.py"; fi
-        eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/ut_torch/torch_xpu_test_error.log | \
-          tee $GITHUB_WORKSPACE/ut_log/ut_torch/torch_xpu_test.log
+        eval $test_cmd 2> ${{ github.workspace }}/ut_log/ut_torch/torch_xpu_test_error.log | \
+          tee ${{ github.workspace }}/ut_log/ut_torch/torch_xpu_test.log
     - name: ut_profiling
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_profiling' }}
@@ -86,28 +95,28 @@ runs:
         cd pytorch/third_party/torch-xpu-ops
         # RN50 Test
         PROFILE=1 python -u test/profiling/rn50.py -a resnet50 --dummy ./ --num-iterations 20 --xpu 0
-        cp profiling.fp32.train.pt $GITHUB_WORKSPACE/ut_log/profile_test
+        cp profiling.fp32.train.pt ${{ github.workspace }}/ut_log/profile_test
         # All Issue Reproduce UT
         python -u test/profiling/correlation_id_mixed.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/correlation_id_mixed.log
+          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/correlation_id_mixed.log
         python -u test/profiling/reproducer.missing.gpu.kernel.time.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/reproducer.missing.gpu.kernel.time.log
+          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/reproducer.missing.gpu.kernel.time.log
         python -u test/profiling/time_precision_in_profile.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/time_precision_in_profile.log
+          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/time_precision_in_profile.log
         python -u test/profiling/profile_partial_runtime_ops.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/profile_partial_runtime_ops.log
+          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/profile_partial_runtime_ops.log
         python -u test/profiling/triton_xpu_ops_time.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
+          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
         # All xpu ut under test/profiler
         cd ../pytorch/test/profiler
         python -m pytest --timeout 600 -vs test_cpp_thread.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_cpp_thread.log
+          tee ${{ github.workspace }}/ut_log/profile_test/test_cpp_thread.log
         python -m pytest --timeout 600 -vs test_execution_trace.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_execution_trace.log
+          tee ${{ github.workspace }}/ut_log/profile_test/test_execution_trace.log
         python -m pytest --timeout 600 -vs test_memory_profiler.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_memory_profiler.log
+          tee ${{ github.workspace }}/ut_log/profile_test/test_memory_profiler.log
         python -m pytest --timeout 600 -vs test_profiler_tree.py | \
-          tee $GITHUB_WORKSPACE/ut_log/profile_test/test_profiler_tree.log
+          tee ${{ github.workspace }}/ut_log/profile_test/test_profiler_tree.log
 
     - name: xpu_dev1
       shell: bash -xe {0}
@@ -116,8 +125,8 @@ runs:
         mkdir -p ut_log/xpu_dev1
         cd pytorch/third_party/torch-xpu-ops/test/regressions
         pytest --timeout 200 -v test_operation_on_device_1.py \
-          --junit-xml=$GITHUB_WORKSPACE/ut_log/xpu_dev1.xml \
-          2>${{ github.workspace }}/ut_log/xpu_dev1/xpu_dev1_test_error.log | \
+          --junit-xml=${{ github.workspace }}/ut_log/xpu_dev1.xml \
+          2> ${{ github.workspace }}/ut_log/xpu_dev1/xpu_dev1_test_error.log | \
           tee ${{ github.workspace }}/ut_log/xpu_dev1/xpu_dev1_test.log
 
     - name: xpu_distributed
@@ -132,5 +141,5 @@ runs:
           exit 1
         fi
         timeout 1800 python run_distributed.py \
-          2>${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
+          2> ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
           tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log

From 2e7680d6d12cb3bd9854dc82d57f6784d88f159e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 16:35:51 +0800
Subject: [PATCH 109/160] fix e2e summary permission

---
 .github/workflows/_linux_e2e.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 3f932b60e4..ccc71b4bb6 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -312,3 +312,7 @@ jobs:
             echo "Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}" |tee -a new_body.txt
           fi
           gh --repo ${GITHUB_REPOSITORY} issue edit ${REFERENCE_ISSUE_ID} --body-file new_body.txt
+      - name: Set permissions
+        if: ${{ always() }}
+        run: |
+          chmod 777 /__w -R

From 0a78df1851ef5fce946cb448fe6eeafe68161e49 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 17:00:13 +0800
Subject: [PATCH 110/160] fix ut log path

---
 .github/workflows/_linux_ut.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 6d6527ea17..359ea9199f 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -115,7 +115,7 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}
-          path: ${{ github.workspace }}/ut_log/${{ matrix.test }}
+          path: ${{ github.workspace }}/ut_log
       - name: Upload XPU UT Failure list
         if: ${{ contains(inputs.ut, matrix.test) }}
         uses: actions/upload-artifact@v4

From 074992f241e031942b11ace8640011c97cf649c8 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 17:03:46 +0800
Subject: [PATCH 111/160] update

---
 .github/workflows/_linux_e2e.yml | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index ccc71b4bb6..ab9a6d2b69 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -232,19 +232,16 @@ jobs:
           path: ${{ github.workspace }}/upload_files
 
   summary:
-    runs-on: [self-hosted, Linux]
+    runs-on: ubuntu-latest
     if: ${{ ! cancelled() }}
     needs: test
     permissions:
       issues: write
-    container:
-      image: ubuntu:latest
-      env:
-        GH_TOKEN: ${{ github.token }}
-        REFERENCE_ISSUE_ID: 1645
-        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
-        PIP_CACHE_DIR: /tmp/xpu-tool/.pipcache
-        PATH: /tmp/xpu-tool/myvenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+    env:
+      GH_TOKEN: ${{ github.token }}
+      REFERENCE_ISSUE_ID: 1645
+      AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
+      PATH: /tmp/xpu-tool/myvenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
     steps:
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5

From b11510f06bdcef67506610a59225436e93065b76 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 17:04:40 +0800
Subject: [PATCH 112/160] update

---
 .github/workflows/_linux_e2e.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index ab9a6d2b69..6f91780e3d 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -312,4 +312,4 @@ jobs:
       - name: Set permissions
         if: ${{ always() }}
         run: |
-          chmod 777 /__w -R
+          find ./ |grep -v "^\./$" |xargs rm -rf

From a18995b6618d868056c551ed4f71b0dd594ce4a9 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 18:28:43 +0800
Subject: [PATCH 113/160] modify e2e summary

---
 .github/workflows/_linux_e2e.yml | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 6f91780e3d..b9c813c067 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -241,19 +241,16 @@ jobs:
       GH_TOKEN: ${{ github.token }}
       REFERENCE_ISSUE_ID: 1645
       AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
-      PATH: /tmp/xpu-tool/myvenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
     steps:
       - name: Setup python-${{ inputs.python }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ inputs.python }}
-      - name: Install gh
+      - name: Install gh-cli
         run: |
-          apt-get update
-          apt-get install gh rsync ca-certificates -y
+          sudo apt-get update
+          sudo apt-get install gh rsync ca-certificates -y
           find ./ |grep -v "^\./$" |xargs rm -rf
-          rm -rf /tmp/xpu-tool/myvenv
-          python -m venv /tmp/xpu-tool/myvenv
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download Target Artifact

From 754202d4838ec597d8ebd7b4659e3de06f453ccf Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 18:32:18 +0800
Subject: [PATCH 114/160] modify e2e summary

---
 .github/actions/linux-uttest/action.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 48afe04125..8a741bf1d1 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -48,6 +48,7 @@ runs:
       run: |
         export PYTORCH_TEST_WITH_SLOW=1
         export PYTORCH_ENABLE_XPU_FALLBACK=1
+        mkdir -p ut_log/ut_op
         cd pytorch/third_party/torch-xpu-ops/test/xpu
         timeout 10000 python run_test_with_skip.py \
           2> ${{ github.workspace }}/ut_log/ut_op/ut_op_with_skip_test_error.log | \
@@ -80,6 +81,7 @@ runs:
       run: |
         export PYTORCH_TEST_WITH_SLOW=1
         export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
+        mkdir -p ut_log/ut_torch
         cd pytorch
         test_cmd="python test/run_test.py --include "
         for test in $(ls test/inductor | grep test); do test_cmd="${test_cmd} inductor/$test"; done

From 27c5cff7996a5cccccf09e3c841786800d998560 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 20:52:16 +0800
Subject: [PATCH 115/160] modify e2e summary

---
 .github/actions/linux-e2etest/action.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
index 559b3b307b..1404414b3e 100644
--- a/.github/actions/linux-e2etest/action.yml
+++ b/.github/actions/linux-e2etest/action.yml
@@ -61,6 +61,7 @@ runs:
           # install timm dependencies without torch and torchvision
           pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
         fi
+        pip install -U numpy==1.26.4
         pip list |grep -E 'intel|torch'
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
       shell: bash -xe {0}

From 92d7ff1237486d0410762a44c10ac8149627a89a Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Fri, 25 Jul 2025 20:55:49 +0800
Subject: [PATCH 116/160] update

---
 .github/workflows/_linux_e2e.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index b9c813c067..bb256881d3 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -296,7 +296,7 @@ jobs:
             fi
           fi
       - name: Upload Reference Run ID
-        if: ${{ ! (contains(inputs.test_type, 'ondemand') && contains(inputs.test_type, 'cicd')) && github.repository_owner == 'intel' }}
+        if: ${{ ! (contains(inputs.test_type, 'ondemand') || contains(inputs.test_type, 'cicd')) && github.repository_owner == 'intel' }}
         run: |
           gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee new_body.txt 2>&1
           has_or_not="$(grep -c 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt)"

From 56520ca1ae843fe819b912394118ebe1206c81d9 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 4 Aug 2025 09:40:38 +0800
Subject: [PATCH 117/160] update

---
 .github/actions/linux-uttest/action.yml   | 10 +++++-----
 .github/workflows/_linux_op_benchmark.yml |  4 ++--
 test/xpu/extended/run_test_with_skip.py   |  2 +-
 test/xpu/run_test_with_only.py            |  6 +++---
 test/xpu/xpu_test_utils.py                |  6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 8a741bf1d1..7b6ad49cfe 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -17,7 +17,7 @@ runs:
       run: |
         mkdir -p ut_log/ut_regression
         cd pytorch/third_party/torch-xpu-ops/test/regressions
-        pytest --timeout 600 -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
+        pytest --timeout 600 --timeout_method=thread -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
           2> ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test.log
     - name: ut_transformers
@@ -27,7 +27,7 @@ runs:
         export PYTORCH_TEST_WITH_SLOW=1
         mkdir -p ut_log/ut_transformers
         cd pytorch
-        pytest --timeout 600 -v test/test_transformers.py -k xpu \
+        pytest --timeout 600 --timeout_method=thread -v test/test_transformers.py -k xpu \
           --junit-xml=${{ github.workspace }}/ut_log/ut_transformers.xml \
           2> ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test.log
@@ -38,7 +38,7 @@ runs:
         export PYTORCH_TEST_WITH_SLOW=1
         mkdir -p ut_log/ut_extended
         cd pytorch/third_party/torch-xpu-ops/test/xpu/extended
-        timeout 3600 python run_test_with_skip.py \
+        python run_test_with_skip.py \
           2> ${{ github.workspace }}/ut_log/ut_extended/ut_extended_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_extended/ut_extended_test.log
         cp ut_extended.xml ${{ github.workspace }}/ut_log
@@ -50,7 +50,7 @@ runs:
         export PYTORCH_ENABLE_XPU_FALLBACK=1
         mkdir -p ut_log/ut_op
         cd pytorch/third_party/torch-xpu-ops/test/xpu
-        timeout 10000 python run_test_with_skip.py \
+        python run_test_with_skip.py \
           2> ${{ github.workspace }}/ut_log/ut_op/ut_op_with_skip_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_op/ut_op_with_skip_test.log
         cp *.xml ${{ github.workspace }}/ut_log
@@ -71,7 +71,7 @@ runs:
         # when XPU implementatoin is done.
         # test_foreach, test_decomp
         # Run with only
-        timeout 10000 python run_test_with_only.py \
+        python run_test_with_only.py \
           2> ${{ github.workspace }}/ut_log/ut_op/ut_op_with_only_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_op/ut_op_with_only_test.log
         cp ut_op_with_only.xml ${{ github.workspace }}/ut_log
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index f0166cb6f3..c23d0e0278 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -103,8 +103,8 @@ jobs:
           path: ${{ github.workspace }}/op_benchmark
 
   op_benchmark_test_results_check:
-    needs: op_benchmark_test
-    runs-on: ubuntu-22.04
+    needs: op_benchmark
+    runs-on: ubuntu-latest
     env:
       GH_TOKEN: ${{ github.token }}
       reference_issue: 1689
diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index 01a608ae6d..01fc294823 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -17,7 +17,7 @@
 skip_options += '"'
 
 os.environ["PYTORCH_TEST_WITH_SLOW"] = "1"
-test_command = "pytest --timeout 600 -v --timeout_method=thread --junit-xml=./op_extended.xml test_ops_xpu.py"
+test_command = "pytest --timeout 600 -v --timeout_method=thread --junit-xml=./ut_extended.xml test_ops_xpu.py"
 test_command += skip_options
 res = os.system(test_command)
 sys.exit(res)
diff --git a/test/xpu/run_test_with_only.py b/test/xpu/run_test_with_only.py
index 9d70896b11..642cb699eb 100644
--- a/test/xpu/run_test_with_only.py
+++ b/test/xpu/run_test_with_only.py
@@ -15,7 +15,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
         skip_options += '"'
         test_command = (
             "pytest --timeout 600 -v "
-            + "--junit-xml=./op_ut_with_only.xml "
+            + "--junit-xml=./ut_op_with_only.xml "
             + test_case
             + skip_options
         )
@@ -28,14 +28,14 @@ def launch_test(test_case, skip_list=None, exe_list=None):
         exe_options += '"'
         test_command = (
             "pytest --timeout 600 -v "
-            + "--junit-xml=./op_ut_with_only.xml "
+            + "--junit-xml=./ut_op_with_only.xml "
             + test_case
             + exe_options
         )
         return os.system(test_command)
     else:
         test_command = (
-            "pytest --timeout 600 -v --junit-xml=./op_ut_with_only.xml " + test_case
+            "pytest --timeout 600 -v --junit-xml=./ut_op_with_only.xml " + test_case
         )
         return os.system(test_command)
 
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index df524100b3..25e239f750 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -1170,7 +1170,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             skip_options += skip_option
         skip_options += '"'
         test_command = (
-            f"pytest --timeout 600 -v --junit-xml=./op_ut_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
         test_command += skip_options
@@ -1181,13 +1181,13 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             exe_options += exe_option
         exe_options += '"'
         test_command = (
-            f"pytest --timeout 600 -v --junit-xml=./op_ut_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
         test_command += exe_options
     else:
         test_command = (
-            f"pytest --timeout 600 -v --junit-xml=./op_ut_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
     return os.system(test_command)

From 9117a0c7be1ee40a5ce1bb4ce764ec629d0b2480 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 4 Aug 2025 10:22:47 +0800
Subject: [PATCH 118/160] update

---
 .github/workflows/_linux_e2e.yml          | 8 ++++----
 .github/workflows/_linux_op_benchmark.yml | 5 +++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index bb256881d3..f089020f23 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -242,15 +242,15 @@ jobs:
       REFERENCE_ISSUE_ID: 1645
       AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
     steps:
-      - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ inputs.python }}
       - name: Install gh-cli
         run: |
           sudo apt-get update
           sudo apt-get install gh rsync ca-certificates -y
           find ./ |grep -v "^\./$" |xargs rm -rf
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Download Target Artifact
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index c23d0e0278..4034c5f385 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -109,6 +109,11 @@ jobs:
       GH_TOKEN: ${{ github.token }}
       reference_issue: 1689
     steps:
+      - name: Install gh-cli
+        run: |
+          sudo apt-get update
+          sudo apt-get install gh rsync ca-certificates -y
+          find ./ |grep -v "^\./$" |xargs rm -rf
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
       - name: Setup python-${{ inputs.python }}

From 587aa953614f20186fd71004887e4152920ae919 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 4 Aug 2025 15:50:10 +0800
Subject: [PATCH 119/160] update

---
 .github/actions/linux-uttest/action.yml | 8 ++++++--
 .github/workflows/_linux_ut.yml         | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 7b6ad49cfe..4d4dfd61c2 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -41,7 +41,8 @@ runs:
         python run_test_with_skip.py \
           2> ${{ github.workspace }}/ut_log/ut_extended/ut_extended_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_extended/ut_extended_test.log
-        cp ut_extended.xml ${{ github.workspace }}/ut_log
+        ls -al
+        cp *.xml ${{ github.workspace }}/ut_log
     - name: ut_op
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_op' }}
@@ -53,6 +54,7 @@ runs:
         python run_test_with_skip.py \
           2> ${{ github.workspace }}/ut_log/ut_op/ut_op_with_skip_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_op/ut_op_with_skip_test.log
+        ls -al
         cp *.xml ${{ github.workspace }}/ut_log
         find ut_op_with_skip_nn ut_op_with_skip_quantization/core -type f -exec sh -c '
             dir_path=$(dirname "$1");
@@ -64,6 +66,7 @@ runs:
             esac;
             mv "$1" "$dir_path/${dir_name}_$(basename "$1")"
         ' _ {} \;
+        ls -al ut_op_with_skip_nn ut_op_with_skip_quantization/core
         cp ut_op_with_skip_nn/*.xml ${{ github.workspace }}/ut_log
         cp ut_op_with_skip_quantization/core/*.xml ${{ github.workspace }}/ut_log
         # Cases run with a on-demand white list, since some suites are too
@@ -74,7 +77,8 @@ runs:
         python run_test_with_only.py \
           2> ${{ github.workspace }}/ut_log/ut_op/ut_op_with_only_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_op/ut_op_with_only_test.log
-        cp ut_op_with_only.xml ${{ github.workspace }}/ut_log
+        ls -al
+        cp *.xml ${{ github.workspace }}/ut_log
     - name: ut_torch
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_torch' }}
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index af9126825b..03d690de11 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -127,7 +127,7 @@ jobs:
   devices:
     runs-on: pvc_rolling
     if: ${{ contains(inputs.ut, 'xpu_dev1') && !contains(inputs.disabled_tests, 'disable_ut') }}
-    timeout-minutes: 5
+    timeout-minutes: 30
     env:
       GH_TOKEN: ${{ github.token }}
       AGENT_TOOLSDIRECTORY: ${{ github.workspace }}/xpu-tool

From 3b0b94d13fd1fc96983df33231c7bc7a84d3ece7 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 4 Aug 2025 15:52:24 +0800
Subject: [PATCH 120/160] update

---
 .github/actions/linux-uttest/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 4d4dfd61c2..17512a722a 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -114,7 +114,7 @@ runs:
         python -u test/profiling/triton_xpu_ops_time.py | \
           tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
         # All xpu ut under test/profiler
-        cd ../pytorch/test/profiler
+        cd ../../test/profiler
         python -m pytest --timeout 600 -vs test_cpp_thread.py | \
           tee ${{ github.workspace }}/ut_log/profile_test/test_cpp_thread.log
         python -m pytest --timeout 600 -vs test_execution_trace.py | \

From e47b3e45b2927fbf48299a223c9766fbf0cd8aaa Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 5 Aug 2025 13:34:41 +0800
Subject: [PATCH 121/160] update

---
 .github/workflows/_linux_ut.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 03d690de11..29cd114ef6 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -104,7 +104,6 @@ jobs:
           test_type: ${{ matrix.test }}
       - name: UT Test Results Summary
         if: ${{ contains(inputs.ut, matrix.test) }}
-
         run: |
           pip install junitparser
           python ./.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
@@ -112,17 +111,19 @@ jobs:
               cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv || true
           fi
       - name: Upload Inductor XPU UT Log
-        if: ${{ contains(inputs.ut, matrix.test) }}
+        if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}
           path: ${{ github.workspace }}/ut_log
+          if-no-files-found: ignore
       - name: Upload XPU UT Failure list
-        if: ${{ contains(inputs.ut, matrix.test) }}
+        if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
           name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ matrix.test }}
           path: ${{ github.workspace }}/ut_log/ut_failure_list.csv
+          if-no-files-found: ignore
 
   devices:
     runs-on: pvc_rolling

From 51578bd58ff9c03285604dbb7d76d9960ebad7a0 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 6 Aug 2025 10:04:33 +0800
Subject: [PATCH 122/160] enable pytest to survive crashing tests and
 potentially complete the remaining tests

---
 .github/actions/linux-testenv/action.yml |  2 +-
 .github/actions/linux-uttest/action.yml  | 12 ++++++------
 test/xpu/extended/run_test_with_skip.py  |  2 +-
 test/xpu/run_test_with_only.py           |  6 +++---
 test/xpu/xpu_test_utils.py               |  6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
index 20e2bf15bc..d9907ed651 100644
--- a/.github/actions/linux-testenv/action.yml
+++ b/.github/actions/linux-testenv/action.yml
@@ -45,7 +45,7 @@ runs:
         clinfo --list
         cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
         rm -rf ~/.triton /tmp/*inductor*
-        pip install pandas psutil scipy requests pytest-timeout
+        pip install pandas psutil scipy requests pytest-timeout pytest-xdist
     - name: Checkout torch-xpu-ops
       uses: actions/checkout@v4
       with:
diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 17512a722a..f903c29fa7 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -17,7 +17,7 @@ runs:
       run: |
         mkdir -p ut_log/ut_regression
         cd pytorch/third_party/torch-xpu-ops/test/regressions
-        pytest --timeout 600 --timeout_method=thread -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
+        pytest --timeout 600 -n 4 --timeout_method=thread -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
           2> ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test.log
     - name: ut_transformers
@@ -27,7 +27,7 @@ runs:
         export PYTORCH_TEST_WITH_SLOW=1
         mkdir -p ut_log/ut_transformers
         cd pytorch
-        pytest --timeout 600 --timeout_method=thread -v test/test_transformers.py -k xpu \
+        pytest --timeout 600 -n 4 --timeout_method=thread -v test/test_transformers.py -k xpu \
           --junit-xml=${{ github.workspace }}/ut_log/ut_transformers.xml \
           2> ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test.log
@@ -115,13 +115,13 @@ runs:
           tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
         # All xpu ut under test/profiler
         cd ../../test/profiler
-        python -m pytest --timeout 600 -vs test_cpp_thread.py | \
+        python -m pytest --timeout 600 -n 4 -vs test_cpp_thread.py | \
           tee ${{ github.workspace }}/ut_log/profile_test/test_cpp_thread.log
-        python -m pytest --timeout 600 -vs test_execution_trace.py | \
+        python -m pytest --timeout 600 -n 4 -vs test_execution_trace.py | \
           tee ${{ github.workspace }}/ut_log/profile_test/test_execution_trace.log
-        python -m pytest --timeout 600 -vs test_memory_profiler.py | \
+        python -m pytest --timeout 600 -n 4 -vs test_memory_profiler.py | \
           tee ${{ github.workspace }}/ut_log/profile_test/test_memory_profiler.log
-        python -m pytest --timeout 600 -vs test_profiler_tree.py | \
+        python -m pytest --timeout 600 -n 4 -vs test_profiler_tree.py | \
           tee ${{ github.workspace }}/ut_log/profile_test/test_profiler_tree.log
 
     - name: xpu_dev1
diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index 01fc294823..17a8bbeb7a 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -17,7 +17,7 @@
 skip_options += '"'
 
 os.environ["PYTORCH_TEST_WITH_SLOW"] = "1"
-test_command = "pytest --timeout 600 -v --timeout_method=thread --junit-xml=./ut_extended.xml test_ops_xpu.py"
+test_command = "pytest --timeout 600 -n 4 -v --timeout_method=thread --junit-xml=./ut_extended.xml test_ops_xpu.py"
 test_command += skip_options
 res = os.system(test_command)
 sys.exit(res)
diff --git a/test/xpu/run_test_with_only.py b/test/xpu/run_test_with_only.py
index 642cb699eb..f7b8f097fd 100644
--- a/test/xpu/run_test_with_only.py
+++ b/test/xpu/run_test_with_only.py
@@ -14,7 +14,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             skip_options += skip_option
         skip_options += '"'
         test_command = (
-            "pytest --timeout 600 -v "
+            "pytest --timeout 600 -n 4 -v "
             + "--junit-xml=./ut_op_with_only.xml "
             + test_case
             + skip_options
@@ -27,7 +27,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             exe_options += exe_option
         exe_options += '"'
         test_command = (
-            "pytest --timeout 600 -v "
+            "pytest --timeout 600 -n 4 -v "
             + "--junit-xml=./ut_op_with_only.xml "
             + test_case
             + exe_options
@@ -35,7 +35,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
         return os.system(test_command)
     else:
         test_command = (
-            "pytest --timeout 600 -v --junit-xml=./ut_op_with_only.xml " + test_case
+            "pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_only.xml " + test_case
         )
         return os.system(test_command)
 
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index 25e239f750..967bc192a6 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -1170,7 +1170,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             skip_options += skip_option
         skip_options += '"'
         test_command = (
-            f"pytest --timeout 600 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
         test_command += skip_options
@@ -1181,13 +1181,13 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             exe_options += exe_option
         exe_options += '"'
         test_command = (
-            f"pytest --timeout 600 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
         test_command += exe_options
     else:
         test_command = (
-            f"pytest --timeout 600 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
     return os.system(test_command)

From 75c99ffbb2fa190c21a8522f71acf5a0e078f914 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 6 Aug 2025 10:19:46 +0800
Subject: [PATCH 123/160] update

---
 .github/actions/linux-testenv/action.yml | 7 ++++---
 .github/workflows/pull.yml               | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
index 20e2bf15bc..2199e0aa64 100644
--- a/.github/actions/linux-testenv/action.yml
+++ b/.github/actions/linux-testenv/action.yml
@@ -113,13 +113,14 @@ runs:
             TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
         fi
-        if [ "${{ inputs.test_type }}" == "cicd" ];then
+        if [ "${{ inputs.torch_xpu_ops }}" == "cicd" ];then
           cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
+          cd third_party/torch-xpu-ops
         else
           git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
+          cd third_party/torch-xpu-ops
+          git checkout ${TORCH_XPU_OPS_COMMIT}
         fi
-        cd third_party/torch-xpu-ops
-        git checkout ${TORCH_XPU_OPS_COMMIT}
         git status && git diff && git show -s
     - name: Torch Config
       shell: bash -xe {0}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index f009d10cb9..d8533f4091 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -108,6 +108,7 @@ jobs:
       runner: linux.idc.xpu
       test_type: build-cicd
       pytorch: main
+      torch_xpu_ops: cicd
       ut: ut_regression,ut_transformers,ut_extended,ut_op,xpu_dev1,xpu_distributed
       disabled_tests: ${{ needs.conditions-filter.outputs.disabled_tests }}
 
@@ -120,6 +121,7 @@ jobs:
       runner: pvc_rolling
       test_type: build-cicd
       pytorch: main
+      torch_xpu_ops: cicd
 
   windows:
     name: windows

From dcc44334f66d25d2b122c564796f6f0bc1338d24 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 6 Aug 2025 10:22:49 +0800
Subject: [PATCH 124/160] fix lint issue

---
 test/xpu/run_test_with_only.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/xpu/run_test_with_only.py b/test/xpu/run_test_with_only.py
index f7b8f097fd..52bbcc1ced 100644
--- a/test/xpu/run_test_with_only.py
+++ b/test/xpu/run_test_with_only.py
@@ -35,7 +35,8 @@ def launch_test(test_case, skip_list=None, exe_list=None):
         return os.system(test_command)
     else:
         test_command = (
-            "pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_only.xml " + test_case
+            "pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_only.xml "
+            + test_case
         )
         return os.system(test_command)
 

From e244cb17878def59180ec81277811c4dcff7f05d Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@Intel.com>
Date: Wed, 6 Aug 2025 14:31:14 +0800
Subject: [PATCH 125/160] Update pull.yml

---
 .github/workflows/pull.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index d8533f4091..7e385d8c98 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -121,7 +121,6 @@ jobs:
       runner: pvc_rolling
       test_type: build-cicd
       pytorch: main
-      torch_xpu_ops: cicd
 
   windows:
     name: windows

From 47cbdf5bba6fbc99383e5f4fa1f38b4b55b6f8f0 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 7 Aug 2025 10:20:04 +0800
Subject: [PATCH 126/160] modify pt2e

---
 .github/actions/linux-e2etest/action.yml |  1 -
 .github/actions/pt2e/action.yml          | 13 -------------
 2 files changed, 14 deletions(-)

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
index 1404414b3e..559b3b307b 100644
--- a/.github/actions/linux-e2etest/action.yml
+++ b/.github/actions/linux-e2etest/action.yml
@@ -61,7 +61,6 @@ runs:
           # install timm dependencies without torch and torchvision
           pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
         fi
-        pip install -U numpy==1.26.4
         pip list |grep -E 'intel|torch'
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
       shell: bash -xe {0}
diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml
index 65fde6a03b..6fdf926a2a 100644
--- a/.github/actions/pt2e/action.yml
+++ b/.github/actions/pt2e/action.yml
@@ -39,18 +39,6 @@ runs:
         fi
         # deps
         if [[ ${{ inputs.scenario }} == *"performance"* ]]; then
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            rm -rf pt2e-audio
-            git clone --single-branch -b main https://github.com/pytorch/audio pt2e-audio
-            cd pt2e-audio && git checkout $TORCHAUDIO_COMMIT_ID
-            python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install dist/*.whl
-            cd ../
-            rm -rf pt2e-vision
-            git clone --single-branch -b main https://github.com/pytorch/vision pt2e-vision
-            cd pt2e-vision && git checkout $TORCHVISION_COMMIT_ID
-            python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
-            cd ../
-          fi
           # torchbench
           python -c "import torch, torchvision, torchaudio"
           cd pt2e-performance
@@ -68,7 +56,6 @@ runs:
           pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
           pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
         fi
-        pip install numpy==1.26.4
         # dataset
         if [ ! -d ${HOME}/datasets/imagenet ];then
           rm -rf ${HOME}/datasets/imagenet

From de15a3fe580fa034546e97305e26cddc3a4680ca Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 7 Aug 2025 10:30:45 +0800
Subject: [PATCH 127/160] update

---
 .github/scripts/build.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
index b419883740..41c46c99b1 100755
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -57,17 +57,17 @@ if [ "${XPU_ONEAPI_PATH}" == "" ];then
         intel-cmplr-lib-ur==2025.1.1 | \
         intel-cmplr-lic-rt==2025.1.1 | \
         intel-sycl-rt==2025.1.1 | \
-        oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
-        oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
-        impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+        impi-rt==2021.15.0 | \
+        dpcpp-cpp-rt==2025.1.1 | \
+        oneccl-devel==2021.15.2 | \
+        oneccl==2021.15.2 | \
+        mkl==2025.1.0 | \
         onemkl-sycl-blas==2025.1.0 | \
         onemkl-sycl-dft==2025.1.0 | \
         onemkl-sycl-lapack==2025.1.0 | \
         onemkl-sycl-rng==2025.1.0 | \
         onemkl-sycl-sparse==2025.1.0 | \
-        dpcpp-cpp-rt==2025.1.1 | \
         intel-opencl-rt==2025.1.1 | \
-        mkl==2025.1.0 | \
         intel-openmp==2025.1.1 | \
         tbb==2022.1.0 | \
         tcmlib==1.3.0 | \

From 8445b8bc65b2e16e3e2349b20d344aa2bb04a542 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 10:28:39 +0800
Subject: [PATCH 128/160] e2e test matrix tests

---
 .github/actions/linux-e2etest/action.yml |  2 +-
 .github/workflows/_linux_e2e.yml         | 14 +++++++-------
 .github/workflows/pull.yml               |  6 ++++++
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
index 559b3b307b..7fc921330e 100644
--- a/.github/actions/linux-e2etest/action.yml
+++ b/.github/actions/linux-e2etest/action.yml
@@ -35,7 +35,7 @@ runs:
         if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
           python -c "import torch, torchvision, torchaudio"
           cd ./pytorch
-          TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt)
+          TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt 2> /dev/null || cat .ci/docker/ci_commit_pins/torchbench.txt)
           git clone https://github.com/pytorch/benchmark.git xpu-benchmark
           cd xpu-benchmark && git checkout $TORCHBENCH_COMMIT_ID
           # remove deps which will reinstall torch
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index f089020f23..4456a2a075 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -102,7 +102,7 @@ jobs:
 
       # CICD launch
       - name: Nightly Huggingface BF16 & FP16 Training Test
-        if: ${{ contains(inputs.test_type, 'cicd') }}
+        if: ${{ contains(inputs.test_type, 'cicd') && contains(inputs.suite, 'huggingface') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
@@ -110,21 +110,21 @@ jobs:
           dt: bfloat16,float16
           mode: training
           scenario: accuracy,performance
-      - name: Nightly Torchbench BF16 Training Test
-        if: ${{ contains(inputs.test_type, 'cicd') }}
+      - name: Nightly Timm_models BF16 Training Test
+        if: ${{ contains(inputs.test_type, 'cicd') && contains(inputs.suite, 'timm_models') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
-          suite: torchbench
+          suite: timm_models
           dt: bfloat16
           mode: training
           scenario: accuracy,performance
-      - name: Nightly Timm_models BF16 Training Test
-        if: ${{ contains(inputs.test_type, 'cicd') }}
+      - name: Nightly Torchbench BF16 Training Test
+        if: ${{ contains(inputs.test_type, 'cicd') && contains(inputs.suite, 'torchbench') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
-          suite: timm_models
+          suite: torchbench
           dt: bfloat16
           mode: training
           scenario: accuracy,performance
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 7e385d8c98..15923ebc6d 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -116,11 +116,17 @@ jobs:
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
     permissions: write-all
     needs: [conditions-filter, linux-build]
+    name: ${{ matrix.suite }}
+    strategy:
+      fail-fast: false
+      matrix:
+        suite: [huggingface, timm_models, torchbench]
     uses: ./.github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: build-cicd
       pytorch: main
+      suite: ${{ matrix.suite }}
 
   windows:
     name: windows

From 8fe34c522c0cf0107adef1724bd7f81636f63ef7 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 15:01:33 +0800
Subject: [PATCH 129/160] modify e2e summary

---
 .github/workflows/_linux_e2e.yml         |  82 +------------------
 .github/workflows/_linux_e2e_summary.yml | 100 +++++++++++++++++++++++
 .github/workflows/nightly_ondemand.yml   |  12 ++-
 .github/workflows/pull.yml               |   7 +-
 4 files changed, 118 insertions(+), 83 deletions(-)
 create mode 100644 .github/workflows/_linux_e2e_summary.yml

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 4456a2a075..cdbc7cbd0e 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -228,85 +228,5 @@ jobs:
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
-          name: Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
+          name: Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}-${{ inputs.suite }}
           path: ${{ github.workspace }}/upload_files
-
-  summary:
-    runs-on: ubuntu-latest
-    if: ${{ ! cancelled() }}
-    needs: test
-    permissions:
-      issues: write
-    env:
-      GH_TOKEN: ${{ github.token }}
-      REFERENCE_ISSUE_ID: 1645
-      AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
-    steps:
-      - name: Install gh-cli
-        run: |
-          sudo apt-get update
-          sudo apt-get install gh rsync ca-certificates -y
-          find ./ |grep -v "^\./$" |xargs rm -rf
-      - name: Setup python-${{ inputs.python }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ inputs.python }}
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-      - name: Download Target Artifact
-        run: |
-          mkdir target/
-          cd target/
-          target_dir="Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}"
-          gh --repo ${GITHUB_REPOSITORY} run download ${GITHUB_RUN_ID} -n "${target_dir}"
-      - name: Download Baseline Artifact
-        run: |
-          mkdir baseline/
-          artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/;s/cicd/weekly/')"
-          gh --repo intel/torch-xpu-ops issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee body.txt
-          REFERENCE_RUN_ID="$(cat body.txt |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo '')"
-          if [ "${REFERENCE_RUN_ID}" != "" ];then
-            gh --repo intel/torch-xpu-ops run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
-            baseline_dir="$(find . -name 'Inductor-*-XPU-E2E-*' -type d)"
-            if [ -d "${baseline_dir}" ];then
-              rsync -avzq --delete ${baseline_dir}/ baseline/
-              ls -al baseline/
-              rm -rf ${baseline_dir}/
-            fi
-          fi
-      - name: Get summary
-        if: ${{ ! cancelled() }}
-        run: |
-          pip install pandas requests
-          if [ "${{ inputs.suite }}" != 'pt2e' ];then
-            bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
-            exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
-            if [ ${exit_label} -ne 0 ];then
-              grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
-              echo "There are ${exit_label} cases that need look into!!! Please check them"
-              exit ${exit_label}
-            fi
-          fi
-          pt2e_summary_csv="$(find ./target/ -name "summary.csv")"
-          if [ -f "${pt2e_summary_csv}" ];then
-            cat ${pt2e_summary_csv}
-            failed_num=$(grep -c ',failed' ${pt2e_summary_csv})
-            if [ ${failed_num} -ne 0 ];then
-              echo "[Warning] PT2E has failures!"
-            fi
-          fi
-      - name: Upload Reference Run ID
-        if: ${{ ! (contains(inputs.test_type, 'ondemand') || contains(inputs.test_type, 'cicd')) && github.repository_owner == 'intel' }}
-        run: |
-          gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee new_body.txt 2>&1
-          has_or_not="$(grep -c 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt)"
-          if [ ${has_or_not} -ne 0 ];then
-            sed -i "s/Inductor-${{ inputs.test_type }}-LTS2:.*/Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}/" new_body.txt
-          else
-            echo "Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}" |tee -a new_body.txt
-          fi
-          gh --repo ${GITHUB_REPOSITORY} issue edit ${REFERENCE_ISSUE_ID} --body-file new_body.txt
-      - name: Set permissions
-        if: ${{ always() }}
-        run: |
-          find ./ |grep -v "^\./$" |xargs rm -rf
diff --git a/.github/workflows/_linux_e2e_summary.yml b/.github/workflows/_linux_e2e_summary.yml
new file mode 100644
index 0000000000..98438c9c78
--- /dev/null
+++ b/.github/workflows/_linux_e2e_summary.yml
@@ -0,0 +1,100 @@
+name: Linux E2E Test
+
+on:
+  workflow_call:
+    inputs:
+      test_type:
+        type: string
+        default: 'build-from-source'
+        description: Build from source or install nightly wheel
+      python:
+        type: string
+        default: '3.10'
+        description: Python version
+      suite:
+        type: string
+        default: 'huggingface'
+        description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
+
+permissions: read-all
+
+defaults:
+  run:
+    shell: bash -xe {0}
+
+jobs:
+  summary:
+    runs-on: ubuntu-latest
+    if: ${{ ! cancelled() }}
+    permissions:
+      issues: write
+    env:
+      GH_TOKEN: ${{ github.token }}
+      REFERENCE_ISSUE_ID: 1645
+      AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
+    steps:
+      - name: Install gh-cli
+        run: |
+          sudo apt-get update
+          sudo apt-get install gh rsync ca-certificates -y
+          find ./ |grep -v "^\./$" |xargs rm -rf
+      - name: Setup python-${{ inputs.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
+      - name: Download Target Artifact
+        run: |
+          mkdir target/
+          cd target/
+          target_dir="Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-*"
+          gh --repo ${GITHUB_REPOSITORY} run download ${GITHUB_RUN_ID} -p "${target_dir}"
+          mv Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-*/* .
+      - name: Download Baseline Artifact
+        run: |
+          mkdir baseline/
+          cd baseline/
+          artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/;s/cicd/weekly/')"
+          gh --repo intel/torch-xpu-ops issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee body.txt
+          REFERENCE_RUN_ID="$(cat body.txt |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo '')"
+          if [ "${REFERENCE_RUN_ID}" != "" ];then
+            gh --repo intel/torch-xpu-ops run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
+            mv Inductor-*-XPU-E2E-*/* .
+          fi
+      - name: Get summary
+        if: ${{ ! cancelled() }}
+        run: |
+          pip install pandas requests
+          if [ "${{ inputs.suite }}" != 'pt2e' ];then
+            bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
+            exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
+            if [ ${exit_label} -ne 0 ];then
+              grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
+              echo "There are ${exit_label} cases that need look into!!! Please check them"
+              exit ${exit_label}
+            fi
+          fi
+          pt2e_summary_csv="$(find ./target/ -name "summary.csv")"
+          if [ -f "${pt2e_summary_csv}" ];then
+            cat ${pt2e_summary_csv}
+            failed_num=$(grep -c ',failed' ${pt2e_summary_csv})
+            if [ ${failed_num} -ne 0 ];then
+              echo "[Warning] PT2E has failures!"
+            fi
+          fi
+      - name: Upload Reference Run ID
+        if: ${{ ! (contains(inputs.test_type, 'ondemand') || contains(inputs.test_type, 'cicd')) && github.repository_owner == 'intel' }}
+        run: |
+          gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee new_body.txt 2>&1
+          has_or_not="$(grep -c 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt)"
+          if [ ${has_or_not} -ne 0 ];then
+            sed -i "s/Inductor-${{ inputs.test_type }}-LTS2:.*/Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}/" new_body.txt
+          else
+            echo "Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}" |tee -a new_body.txt
+          fi
+          gh --repo ${GITHUB_REPOSITORY} issue edit ${REFERENCE_ISSUE_ID} --body-file new_body.txt
+      - name: Set permissions
+        if: ${{ always() }}
+        run: |
+          find ./ |grep -v "^\./$" |xargs rm -rf
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index e68bade8b7..9158fe1361 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -137,7 +137,6 @@ jobs:
   Linux-Nightly-Ondemand-E2E-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
     name: linux-e2e
-    permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_e2e.yml
     with:
@@ -151,6 +150,17 @@ jobs:
       mode: ${{ github.event_name == 'schedule' && 'inference' || inputs.mode }}
       scenario: ${{ github.event_name == 'schedule' && 'accuracy' || inputs.scenario }}
       model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
+  Linux-Nightly-Ondemand-E2E-Tests-Summary:
+    if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
+    name: linux-e2e-summary
+    permissions: write-all
+    needs: [Conditions-Filter, Linux-Nightly-Ondemand-E2E-Tests]
+    uses: ./.github/workflows/_linux_e2e_summary.yml
+    with:
+      runner: pvc_rolling
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
+      python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
+      suite: ${{ github.event_name == 'schedule' && 'huggingface' || inputs.suite }}
 
   Linux-Nightly-Ondemand-OP-Microbench-Tests-Rolling:
     if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'microbench') }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 15923ebc6d..1720132a53 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -114,7 +114,6 @@ jobs:
 
   linux-e2e:
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
-    permissions: write-all
     needs: [conditions-filter, linux-build]
     name: ${{ matrix.suite }}
     strategy:
@@ -127,6 +126,12 @@ jobs:
       test_type: build-cicd
       pytorch: main
       suite: ${{ matrix.suite }}
+  linux-e2e-summary:
+    permissions: write-all
+    needs: [linux-e2e]
+    uses: ./.github/workflows/_linux_e2e_summary.yml
+    with:
+      test_type: build-cicd
 
   windows:
     name: windows

From bfc98daa851712678fc0928332a3fcb0915f70eb Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 15:12:06 +0800
Subject: [PATCH 130/160] update

---
 .github/workflows/nightly_ondemand.yml | 2 +-
 .github/workflows/pull.yml             | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 9158fe1361..7cab7199c7 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -152,7 +152,7 @@ jobs:
       model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
   Linux-Nightly-Ondemand-E2E-Tests-Summary:
     if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
-    name: linux-e2e-summary
+    name: linux-e2e
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-E2E-Tests]
     uses: ./.github/workflows/_linux_e2e_summary.yml
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 1720132a53..a142f5b800 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -127,6 +127,7 @@ jobs:
       pytorch: main
       suite: ${{ matrix.suite }}
   linux-e2e-summary:
+    name: linux-e2e
     permissions: write-all
     needs: [linux-e2e]
     uses: ./.github/workflows/_linux_e2e_summary.yml

From 80641265967681d42b99e4754cf1029cd49e8f73 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 15:25:11 +0800
Subject: [PATCH 131/160] update

---
 .github/workflows/nightly_ondemand.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 7cab7199c7..cbd9f65c45 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -157,7 +157,6 @@ jobs:
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-E2E-Tests]
     uses: ./.github/workflows/_linux_e2e_summary.yml
     with:
-      runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
       suite: ${{ github.event_name == 'schedule' && 'huggingface' || inputs.suite }}

From 1ea6a628828355ec55bd3afb87e10f054f786b49 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 15:46:41 +0800
Subject: [PATCH 132/160] update

---
 .github/workflows/_linux_e2e.yml       | 66 ++++++++++++++++----------
 .github/workflows/nightly_ondemand.yml |  9 ++--
 2 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index cdbc7cbd0e..d585b9f375 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -131,7 +131,7 @@ jobs:
 
       # Nihglty launch
       - name: Nightly Huggingface Full Test
-        if: ${{ contains(inputs.test_type, 'nightly') }}
+        if: ${{ contains(inputs.test_type, 'nightly') && contains(inputs.suite, 'huggingface') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
@@ -139,26 +139,26 @@ jobs:
           dt: float32,bfloat16,float16,amp_bf16,amp_fp16
           mode: inference,training
           scenario: accuracy,performance
-      - name: Nightly Torchbench BF16 Training Test
-        if: ${{ contains(inputs.test_type, 'nightly') }}
+      - name: Nightly Timm_models FP16 Training Test
+        if: ${{ contains(inputs.test_type, 'nightly') && contains(inputs.suite, 'timm_models') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
-          suite: torchbench
-          dt: bfloat16
+          suite: timm_models
+          dt: float16
           mode: training
           scenario: accuracy,performance
-      - name: Nightly Timm_models FP16 Training Test
-        if: ${{ contains(inputs.test_type, 'nightly') }}
+      - name: Nightly Torchbench BF16 Training Test
+        if: ${{ contains(inputs.test_type, 'nightly') && contains(inputs.suite, 'torchbench') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
-          suite: timm_models
-          dt: float16
+          suite: torchbench
+          dt: bfloat16
           mode: training
           scenario: accuracy,performance
       - name: Nightly PT2E Full Test
-        if: ${{ contains(inputs.test_type, 'nightly') }}
+        if: ${{ contains(inputs.test_type, 'nightly') && contains(inputs.suite, 'pt2e') }}
         uses: ./.github/actions/pt2e
         with:
           env_prepare: true
@@ -166,8 +166,8 @@ jobs:
           scenario: accuracy,performance
 
       # Weekly launch
-      - name: Nightly Huggingface Full Test
-        if: ${{ contains(inputs.test_type, 'weekly') }}
+      - name: Weekly Huggingface Full Test
+        if: ${{ contains(inputs.test_type, 'weekly') && contains(inputs.suite, 'huggingface') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
@@ -175,26 +175,26 @@ jobs:
           dt: float32,bfloat16,float16,amp_bf16,amp_fp16
           mode: inference,training
           scenario: accuracy,performance
-      - name: Nightly Torchbench BF16 Training Test
-        if: ${{ contains(inputs.test_type, 'weekly') }}
+      - name: Weekly Timm_models Full Test
+        if: ${{ contains(inputs.test_type, 'weekly') && contains(inputs.suite, 'timm_models') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
-          suite: torchbench
+          suite: timm_models
           dt: float32,bfloat16,float16,amp_bf16,amp_fp16
           mode: inference,training
           scenario: accuracy,performance
-      - name: Nightly Timm_models FP16 Training Test
-        if: ${{ contains(inputs.test_type, 'weekly') }}
+      - name: Weekly Torchbench Full Test
+        if: ${{ contains(inputs.test_type, 'weekly') && contains(inputs.suite, 'torchbench') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
-          suite: timm_models
+          suite: torchbench
           dt: float32,bfloat16,float16,amp_bf16,amp_fp16
           mode: inference,training
           scenario: accuracy,performance
-      - name: Nightly PT2E Full Test
-        if: ${{ contains(inputs.test_type, 'weekly') }}
+      - name: Weekly PT2E Full Test
+        if: ${{ contains(inputs.test_type, 'weekly') && contains(inputs.suite, 'pt2e') }}
         uses: ./.github/actions/pt2e
         with:
           env_prepare: true
@@ -202,16 +202,34 @@ jobs:
           scenario: accuracy,performance
 
       # On-demand launch
-      - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-        if: ${{ contains(inputs.test_type, 'ondemand') && inputs.suite != 'pt2e' }}
+      - name: OnDemand Test (huggingface)
+        if: ${{ contains(inputs.test_type, 'ondemand') && contains(inputs.suite, 'huggingface') }}
+        uses: ./.github/actions/linux-e2etest
+        with:
+          env_prepare: true
+          suite: huggingface
+          dt: ${{ inputs.dt }}
+          mode: ${{ inputs.mode }}
+          scenario: ${{ inputs.scenario }}
+      - name: OnDemand Test (timm_models)
+        if: ${{ contains(inputs.test_type, 'ondemand') && contains(inputs.suite, 'timm_models') }}
         uses: ./.github/actions/linux-e2etest
         with:
           env_prepare: true
-          suite: ${{ inputs.suite }}
+          suite: timm_models
+          dt: ${{ inputs.dt }}
+          mode: ${{ inputs.mode }}
+          scenario: ${{ inputs.scenario }}
+      - name: OnDemand Test (torchbench)
+        if: ${{ contains(inputs.test_type, 'ondemand') && contains(inputs.suite, 'torchbench') }}
+        uses: ./.github/actions/linux-e2etest
+        with:
+          env_prepare: true
+          suite: torchbench
           dt: ${{ inputs.dt }}
           mode: ${{ inputs.mode }}
           scenario: ${{ inputs.scenario }}
-      - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
+      - name: OnDemand PT2E Test (pt2e)
         if: ${{ contains(inputs.test_type, 'ondemand') && contains(inputs.suite, 'pt2e') }}
         uses: ./.github/actions/pt2e
         with:
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index cbd9f65c45..3cc5514a5e 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -31,9 +31,8 @@ on:
         default: ''
         description: UT scope. `ut_regression,ut_transformers,ut_extended,ut_op,ut_profiling,ut_torch,xpu_dev1,xpu_distributed,microbench,windows`. Delimiter is comma
       suite:
-        type: string
         default: ''
-        description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
+        description: Dynamo benchmarks test suite. `[huggingface,timm_models,torchbench,pt2e]`. Delimiter is comma
       dt:
         type: string
         default: ''
@@ -139,13 +138,17 @@ jobs:
     name: linux-e2e
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_e2e.yml
+    strategy:
+      fail-fast: false
+      matrix:
+        suite: ${{ fromJSON(inputs.suite) }}
     with:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
-      suite: ${{ github.event_name == 'schedule' && 'huggingface' || inputs.suite }}
+      suite: ${{ matrix.suite }}
       dt: ${{ github.event_name == 'schedule' && 'float32' || inputs.dt }}
       mode: ${{ github.event_name == 'schedule' && 'inference' || inputs.mode }}
       scenario: ${{ github.event_name == 'schedule' && 'accuracy' || inputs.scenario }}

From 530af25780f4172b2bca0c8b3f455f7648d69b96 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 16:07:59 +0800
Subject: [PATCH 133/160] update

---
 .github/workflows/_linux_e2e_summary.yml | 7 ++-----
 .github/workflows/nightly_ondemand.yml   | 7 ++-----
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/_linux_e2e_summary.yml b/.github/workflows/_linux_e2e_summary.yml
index 98438c9c78..d65e47a3ae 100644
--- a/.github/workflows/_linux_e2e_summary.yml
+++ b/.github/workflows/_linux_e2e_summary.yml
@@ -11,10 +11,6 @@ on:
         type: string
         default: '3.10'
         description: Python version
-      suite:
-        type: string
-        default: 'huggingface'
-        description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
 
 permissions: read-all
 
@@ -66,7 +62,8 @@ jobs:
         if: ${{ ! cancelled() }}
         run: |
           pip install pandas requests
-          if [ "${{ inputs.suite }}" != 'pt2e' ];then
+          e2e_summary_csv="$(find ./target/ -name "inductor_*.csv" |head -n 1)"
+          if [ -f "${e2e_summary_csv}" ];then
             bash ./.github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
             exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
             if [ ${exit_label} -ne 0 ];then
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 3cc5514a5e..efc60f4600 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -31,7 +31,7 @@ on:
         default: ''
         description: UT scope. `ut_regression,ut_transformers,ut_extended,ut_op,ut_profiling,ut_torch,xpu_dev1,xpu_distributed,microbench,windows`. Delimiter is comma
       suite:
-        default: ''
+        default: '[]'
         description: Dynamo benchmarks test suite. `[huggingface,timm_models,torchbench,pt2e]`. Delimiter is comma
       dt:
         type: string
@@ -134,7 +134,7 @@ jobs:
       ut: ${{ github.event_name == 'schedule' && 'ut_regression,xpu_dev1,ut_transformers,ut_extended,ut_op' || inputs.ut }}
 
   Linux-Nightly-Ondemand-E2E-Tests:
-    if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
+    if: ${{ github.event_name == 'schedule' || inputs.suite != '[]' }}
     name: linux-e2e
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
     uses: ./.github/workflows/_linux_e2e.yml
@@ -154,15 +154,12 @@ jobs:
       scenario: ${{ github.event_name == 'schedule' && 'accuracy' || inputs.scenario }}
       model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
   Linux-Nightly-Ondemand-E2E-Tests-Summary:
-    if: ${{ github.event_name == 'schedule' || contains(inputs.suite, 'e') }}
     name: linux-e2e
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-E2E-Tests]
     uses: ./.github/workflows/_linux_e2e_summary.yml
     with:
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
-      python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
-      suite: ${{ github.event_name == 'schedule' && 'huggingface' || inputs.suite }}
 
   Linux-Nightly-Ondemand-OP-Microbench-Tests-Rolling:
     if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'microbench') }}

From 091678f8604ad4eb59eff54a2bc3d12c414c95f6 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 16:22:23 +0800
Subject: [PATCH 134/160] update

---
 .github/workflows/nightly_ondemand.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index efc60f4600..d5adf534a6 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -137,11 +137,11 @@ jobs:
     if: ${{ github.event_name == 'schedule' || inputs.suite != '[]' }}
     name: linux-e2e
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-Build]
-    uses: ./.github/workflows/_linux_e2e.yml
     strategy:
       fail-fast: false
       matrix:
         suite: ${{ fromJSON(inputs.suite) }}
+    uses: ./.github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
       test_type: ${{ needs.Conditions-Filter.outputs.test_type }}

From eaa4bc4c5d1d27b020f33fcf17a8582246b865ce Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 16:56:39 +0800
Subject: [PATCH 135/160] update

---
 .github/workflows/nightly_ondemand.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index d5adf534a6..8fb9ffe14d 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -32,7 +32,7 @@ on:
         description: UT scope. `ut_regression,ut_transformers,ut_extended,ut_op,ut_profiling,ut_torch,xpu_dev1,xpu_distributed,microbench,windows`. Delimiter is comma
       suite:
         default: '[]'
-        description: Dynamo benchmarks test suite. `[huggingface,timm_models,torchbench,pt2e]`. Delimiter is comma
+        description: Dynamo benchmarks test suite. `["huggingface","timm_models","torchbench","pt2e"]`. Delimiter is comma
       dt:
         type: string
         default: ''

From f70ef8abc90c509e1a9e6a6696802b4a8c0633cb Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 22:12:38 +0800
Subject: [PATCH 136/160] update deps

---
 .github/actions/linux-testenv/action.yml | 9 +++++++--
 .github/workflows/nightly_ondemand.yml   | 8 ++++----
 .github/workflows/pull.yml               | 6 +++---
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
index 2199e0aa64..85a38de9f1 100644
--- a/.github/actions/linux-testenv/action.yml
+++ b/.github/actions/linux-testenv/action.yml
@@ -45,7 +45,6 @@ runs:
         clinfo --list
         cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq -c
         rm -rf ~/.triton /tmp/*inductor*
-        pip install pandas psutil scipy requests pytest-timeout
     - name: Checkout torch-xpu-ops
       uses: actions/checkout@v4
       with:
@@ -88,8 +87,14 @@ runs:
         git clone ${PYTORCH_REPO} pytorch
         cd pytorch
         git checkout ${TORCH_COMMIT_ID}
-        pip install -r .ci/docker/requirements-ci.txt
+        if [[ "${{ inputs.test_type }}" == *"-e2e" ]];then
+          pip install pandas psutil scipy
+        else
+          pip install pytest-timeout
+          pip install -r .ci/docker/requirements-ci.txt
+        fi
         # apply extra PRs for stock pytorch
+        pip install requests
         if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
           python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
         else
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 8fb9ffe14d..a58710682a 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -126,7 +126,7 @@ jobs:
     uses: ./.github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
-      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}-ut
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       torch_xpu_ops: ${{ needs.Conditions-Filter.outputs.torch_xpu_ops }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
@@ -144,7 +144,7 @@ jobs:
     uses: ./.github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
-      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}-e2e
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
@@ -159,7 +159,7 @@ jobs:
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-E2E-Tests]
     uses: ./.github/workflows/_linux_e2e_summary.yml
     with:
-      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}-e2e
 
   Linux-Nightly-Ondemand-OP-Microbench-Tests-Rolling:
     if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'microbench') }}
@@ -169,7 +169,7 @@ jobs:
     uses: ./.github/workflows/_linux_op_benchmark.yml
     with:
       runner: pvc_rolling
-      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}
+      test_type: ${{ needs.Conditions-Filter.outputs.test_type }}-mb
       pytorch: ${{ needs.Conditions-Filter.outputs.pytorch }}
       oneapi: ${{ github.event_name == 'schedule' && 'installed' || inputs.oneapi }}
       python: ${{ github.event_name == 'schedule' && '3.10' || '3.10' }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index a142f5b800..6aece67fff 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -106,7 +106,7 @@ jobs:
     uses: ./.github/workflows/_linux_ut.yml
     with:
       runner: linux.idc.xpu
-      test_type: build-cicd
+      test_type: build-cicd-ut
       pytorch: main
       torch_xpu_ops: cicd
       ut: ut_regression,ut_transformers,ut_extended,ut_op,xpu_dev1,xpu_distributed
@@ -123,7 +123,7 @@ jobs:
     uses: ./.github/workflows/_linux_e2e.yml
     with:
       runner: pvc_rolling
-      test_type: build-cicd
+      test_type: build-cicd-e2e
       pytorch: main
       suite: ${{ matrix.suite }}
   linux-e2e-summary:
@@ -132,7 +132,7 @@ jobs:
     needs: [linux-e2e]
     uses: ./.github/workflows/_linux_e2e_summary.yml
     with:
-      test_type: build-cicd
+      test_type: build-cicd-e2e
 
   windows:
     name: windows

From a12045a5cd15a8e12febe0f367246b05cea747c3 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 22:13:40 +0800
Subject: [PATCH 137/160] update

---
 .github/workflows/_linux_e2e.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index d585b9f375..a5e4d87ee1 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -101,7 +101,7 @@ jobs:
           python: ${{ inputs.python }}
 
       # CICD launch
-      - name: Nightly Huggingface BF16 & FP16 Training Test
+      - name: CICD Huggingface BF16 & FP16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') && contains(inputs.suite, 'huggingface') }}
         uses: ./.github/actions/linux-e2etest
         with:
@@ -110,7 +110,7 @@ jobs:
           dt: bfloat16,float16
           mode: training
           scenario: accuracy,performance
-      - name: Nightly Timm_models BF16 Training Test
+      - name: CICD Timm_models BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') && contains(inputs.suite, 'timm_models') }}
         uses: ./.github/actions/linux-e2etest
         with:
@@ -119,7 +119,7 @@ jobs:
           dt: bfloat16
           mode: training
           scenario: accuracy,performance
-      - name: Nightly Torchbench BF16 Training Test
+      - name: CICD Torchbench BF16 Training Test
         if: ${{ contains(inputs.test_type, 'cicd') && contains(inputs.suite, 'torchbench') }}
         uses: ./.github/actions/linux-e2etest
         with:

From 70415c292c96551669f5d0e920aafb8b7a7baba0 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Mon, 11 Aug 2025 22:32:47 +0800
Subject: [PATCH 138/160] modify cache dir

---
 .github/workflows/_linux_e2e.yml          | 2 ++
 .github/workflows/_linux_op_benchmark.yml | 2 ++
 .github/workflows/_linux_ut.yml           | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index a5e4d87ee1..d784f2e49f 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -84,6 +84,8 @@ jobs:
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+        TORCH_HOME: /tmp/.cache/_torch
+        HF_HOME: /tmp/.cache/_huggingface
         MODEL_ONLY_NAME: ${{ inputs.model }}
     steps:
       - name: Cleanup workspace
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index 4034c5f385..a2e3757042 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -65,6 +65,8 @@ jobs:
         AGENT_TOOLSDIRECTORY: /opt/xpu-tool
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+        TORCH_HOME: /tmp/.cache/_torch
+        HF_HOME: /tmp/.cache/_huggingface
         REFERENCE_ISSUE: 1689
     steps:
       - name: Cleanup workspace
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 29cd114ef6..508d144c9c 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -76,6 +76,8 @@ jobs:
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         GH_TOKEN: ${{ github.token }}
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+        TORCH_HOME: /tmp/.cache/_torch
+        HF_HOME: /tmp/.cache/_huggingface
     strategy:
       fail-fast: false
       matrix:

From 5fcc6c696b2d86208f6a19c0c7a14453005a6f08 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 12 Aug 2025 11:01:26 +0800
Subject: [PATCH 139/160] update

---
 .github/actions/linux-e2etest/action.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
index 7fc921330e..01a7216bab 100644
--- a/.github/actions/linux-e2etest/action.yml
+++ b/.github/actions/linux-e2etest/action.yml
@@ -63,7 +63,7 @@ runs:
         fi
         pip list |grep -E 'intel|torch'
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
-      shell: bash -xe {0}
+      shell: bash -x {0}
       run: |
         cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
         cd ./pytorch
@@ -103,12 +103,12 @@ runs:
                   for xpu_id in $(seq 0 $[ ${xpu_num} - 1 ])
                   do
                     cpu_list="$(echo "${cores_per_instance} ${xpu_id}" |awk '{printf("%d-%d", $1*$2, $1*$2+$1-1)}')"
-                    numactl --localalloc --physcpubind=${cpu_list} bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${xpu_id} &
+                    numactl --localalloc --physcpubind=${cpu_list} bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${xpu_id} &
                   done
                 else
                   for test_model in $(echo ${MODEL_ONLY_NAME} |sed 's/,/ /g')
                   do
-                    numactl --localalloc bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${test_model}
+                    numactl --localalloc bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${test_model}
                   done
                 fi
                 wait

From 18f22e007f2fbf0cfef1cca84757b40f085f06cc Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 12 Aug 2025 22:40:13 +0800
Subject: [PATCH 140/160] update

---
 .github/actions/linux-e2etest/action.yml | 35 +-------------------
 .github/actions/linux-testenv/action.yml | 41 ++++++++++++++++++++----
 .github/actions/linux-uttest/action.yml  |  6 ++++
 .github/actions/pt2e/action.yml          | 19 -----------
 4 files changed, 41 insertions(+), 60 deletions(-)

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
index 01a7216bab..874b250638 100644
--- a/.github/actions/linux-e2etest/action.yml
+++ b/.github/actions/linux-e2etest/action.yml
@@ -28,43 +28,10 @@ inputs:
 runs:
   using: composite
   steps:
-    - name: Prepare ENV
-      if: ${{ inputs.env_prepare }}
-      shell: bash -xe {0}
-      run: |
-        if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
-          python -c "import torch, torchvision, torchaudio"
-          cd ./pytorch
-          TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt 2> /dev/null || cat .ci/docker/ci_commit_pins/torchbench.txt)
-          git clone https://github.com/pytorch/benchmark.git xpu-benchmark
-          cd xpu-benchmark && git checkout $TORCHBENCH_COMMIT_ID
-          # remove deps which will reinstall torch
-          pip install --no-deps accelerate
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
-          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
-          pip install -U transformers==4.44.2
-          sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g'  requirements.txt
-          git status && git diff
-          pip install -r requirements.txt
-          python install.py --continue_on_fail
-          # deps for torchrec_dlrm
-          pip install pyre_extensions
-          pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
-          pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
-        fi
-        if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then
-          pip install -U transformers==4.44.2
-        fi
-        if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
-          # install timm without dependencies
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
-          # install timm dependencies without torch and torchvision
-          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
-        fi
-        pip list |grep -E 'intel|torch'
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
       shell: bash -x {0}
       run: |
+        pip list |grep -E 'intel|torch'
         cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
         cd ./pytorch
         # check param
diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
index 85a38de9f1..12868db55f 100644
--- a/.github/actions/linux-testenv/action.yml
+++ b/.github/actions/linux-testenv/action.yml
@@ -21,6 +21,10 @@ inputs:
     type: string
     default: '3.10'
     description: Python version
+  suite:
+    type: string
+    default: 'huggingface'
+    description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
 
 permissions: read-all
 
@@ -64,6 +68,36 @@ runs:
       uses: actions/download-artifact@v4
       with:
         pattern: Torch-XPU-Wheel-*
+    - name: Install E2E Requirements
+      if: ${{ ! contains(inputs.test_type, 'e2e') }}
+      shell: bash -xe {0}
+      run: |
+        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
+        pip install pandas psutil scipy
+        if [[ "${{ inputs.suite }}" == *"huggingface"* ]];then
+          pip install transformers==4.44.2
+        elif [[ "${{ inputs.suite }}" == *"timm_models"* ]];then
+          pip install timm==1.0.14
+        elif [[ "${{ inputs.suite }}" == *"torchbench"* ]];then
+          rm -rf ./benchmark
+          git clone https://github.com/pytorch/benchmark
+          cd benchmark
+          git checkout e03a63be43e33596f7f0a43b0f530353785e4a59
+          pip install -r requirements.txt
+          pip install -U transformers==4.44.2 timm==1.0.14
+          curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install
+          python install.py --continue_on_fail
+        elif [[ "${{ inputs.suite }}" == *"pt2e"* ]];then
+          rm -rf ./benchmark
+          git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark
+          cd benchmark
+          pip install -r requirements.txt
+          pip install -U transformers==4.44.2 timm==1.0.14
+          curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install
+          python install.py --continue_on_fail
+        fi
+        pip uninstall -y torch torchvision torchaudio pytorch-triton-xpu triton
+        pip uninstall -y torch torchvision torchaudio pytorch-triton-xpu triton
     - name: Prepare Stock Pytorch
       shell: bash -xe {0}
       run: |
@@ -77,7 +111,6 @@ runs:
         else
           pip install --force-reinstall $(find ${{ github.workspace }}/ -name "*torch*.whl")
         fi
-        pip list |grep torch
         TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
         if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
           PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
@@ -87,12 +120,6 @@ runs:
         git clone ${PYTORCH_REPO} pytorch
         cd pytorch
         git checkout ${TORCH_COMMIT_ID}
-        if [[ "${{ inputs.test_type }}" == *"-e2e" ]];then
-          pip install pandas psutil scipy
-        else
-          pip install pytest-timeout
-          pip install -r .ci/docker/requirements-ci.txt
-        fi
         # apply extra PRs for stock pytorch
         pip install requests
         if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then
diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 17512a722a..ded9d5f737 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -11,10 +11,16 @@ permissions: read-all
 runs:
   using: composite
   steps:
+    - name: requirements
+      shell: bash -xe {0}
+      run: |
+        pip install -r pytorch/.ci/docker/requirements-ci.txt
+        pip install -U pytest-timeout
     - name: ut_regression
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_regression' }}
       run: |
+        pip install -r .ci/docker/requirements-ci.txt
         mkdir -p ut_log/ut_regression
         cd pytorch/third_party/torch-xpu-ops/test/regressions
         pytest --timeout 600 --timeout_method=thread -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml
index 6fdf926a2a..5fc3a9993c 100644
--- a/.github/actions/pt2e/action.yml
+++ b/.github/actions/pt2e/action.yml
@@ -37,25 +37,6 @@ runs:
           rm -rf pt2e-performance
           git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark pt2e-performance
         fi
-        # deps
-        if [[ ${{ inputs.scenario }} == *"performance"* ]]; then
-          # torchbench
-          python -c "import torch, torchvision, torchaudio"
-          cd pt2e-performance
-          # remove deps which will reinstall torch
-          pip install --no-deps accelerate
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14
-          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
-          pip install -U transformers==4.44.2
-          sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g'  requirements.txt
-          git status && git diff
-          pip install -r requirements.txt
-          python install.py --continue_on_fail
-          # deps for torchrec_dlrm
-          pip install pyre_extensions
-          pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
-          pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
-        fi
         # dataset
         if [ ! -d ${HOME}/datasets/imagenet ];then
           rm -rf ${HOME}/datasets/imagenet

From 0add64edd402d98885f4a573a609795677cabf4a Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Tue, 12 Aug 2025 22:44:37 +0800
Subject: [PATCH 141/160] update

---
 .github/actions/linux-testenv/action.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
index 12868db55f..7b7a91af06 100644
--- a/.github/actions/linux-testenv/action.yml
+++ b/.github/actions/linux-testenv/action.yml
@@ -84,7 +84,7 @@ runs:
           cd benchmark
           git checkout e03a63be43e33596f7f0a43b0f530353785e4a59
           pip install -r requirements.txt
-          pip install -U transformers==4.44.2 timm==1.0.14
+          pip install -U transformers==4.44.2 timm==1.0.14 pyre-extensions
           curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install
           python install.py --continue_on_fail
         elif [[ "${{ inputs.suite }}" == *"pt2e"* ]];then
@@ -92,7 +92,7 @@ runs:
           git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark
           cd benchmark
           pip install -r requirements.txt
-          pip install -U transformers==4.44.2 timm==1.0.14
+          pip install -U transformers==4.44.2 timm==1.0.14 pyre-extensions
           curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install
           python install.py --continue_on_fail
         fi

From 8902540637e9a9580fac5c9dec46a0396273254e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 09:10:48 +0800
Subject: [PATCH 142/160] update

---
 .github/actions/linux-e2etest/action.yml | 19 ++++++++++---------
 .github/actions/linux-testenv/action.yml |  2 +-
 .github/workflows/_linux_e2e.yml         |  1 +
 .github/workflows/nightly_ondemand.yml   |  1 +
 .github/workflows/pull.yml               |  1 +
 5 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
index 874b250638..52ef8a4cc9 100644
--- a/.github/actions/linux-e2etest/action.yml
+++ b/.github/actions/linux-e2etest/action.yml
@@ -99,12 +99,13 @@ runs:
           sed -i "s/$/,$(basename $var)/" $var
           cat $var >> inductor_log/summary_accuracy.csv
         done
-        cd ${{ github.workspace }}
-        cp ./.github/scripts/inductor_summary.py ./pytorch
-        cd ./pytorch
-        pip install styleFrame scipy pandas
-        dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
-        mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
-        suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')
-        scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g')
-        python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario}
+        cp ${{ github.workspace }}/.github/scripts/inductor_summary.py ./
+        csv_file="$(find inductor_log/ -name "inductor_*_xpu_*.csv" |tail -n 1)"
+        if [ -f "${csv_file}" ];then
+          pip install styleFrame scipy pandas
+          dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
+          mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
+          suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')
+          scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g')
+          python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario}
+        fi
diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
index 7b7a91af06..4250ceed9e 100644
--- a/.github/actions/linux-testenv/action.yml
+++ b/.github/actions/linux-testenv/action.yml
@@ -69,7 +69,7 @@ runs:
       with:
         pattern: Torch-XPU-Wheel-*
     - name: Install E2E Requirements
-      if: ${{ ! contains(inputs.test_type, 'e2e') }}
+      if: ${{ contains(inputs.test_type, 'e2e') }}
       shell: bash -xe {0}
       run: |
         pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index d784f2e49f..61647e05e8 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -101,6 +101,7 @@ jobs:
           torch_xpu_ops: skipped
           oneapi: ${{ inputs.oneapi }}
           python: ${{ inputs.python }}
+          suite: ${{ inputs.suite }}
 
       # CICD launch
       - name: CICD Huggingface BF16 & FP16 Training Test
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index a58710682a..99731123e5 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -154,6 +154,7 @@ jobs:
       scenario: ${{ github.event_name == 'schedule' && 'accuracy' || inputs.scenario }}
       model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
   Linux-Nightly-Ondemand-E2E-Tests-Summary:
+    if: ${{ ! cancelled() }}
     name: linux-e2e
     permissions: write-all
     needs: [Conditions-Filter, Linux-Nightly-Ondemand-E2E-Tests]
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 6aece67fff..2c6999a520 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -127,6 +127,7 @@ jobs:
       pytorch: main
       suite: ${{ matrix.suite }}
   linux-e2e-summary:
+    if: ${{ ! cancelled() }}
     name: linux-e2e
     permissions: write-all
     needs: [linux-e2e]

From 0eda9f76b501f6972b26687c4ff648699bbdc0b9 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 09:11:54 +0800
Subject: [PATCH 143/160] update

---
 .github/actions/linux-uttest/action.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index ded9d5f737..04b0fbcd86 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -20,7 +20,6 @@ runs:
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_regression' }}
       run: |
-        pip install -r .ci/docker/requirements-ci.txt
         mkdir -p ut_log/ut_regression
         cd pytorch/third_party/torch-xpu-ops/test/regressions
         pytest --timeout 600 --timeout_method=thread -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \

From 363145454b3e7250317797fa1d8f79295827a88b Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 11:24:16 +0800
Subject: [PATCH 144/160] update

---
 .github/ci_expected_accuracy/check_expected.py              | 2 +-
 .../rolling/inductor_timm_models_training.csv               | 4 ++--
 .../rolling/inductor_torchbench_inference.csv               | 6 +++---
 .github/scripts/e2e_summary.sh                              | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/ci_expected_accuracy/check_expected.py b/.github/ci_expected_accuracy/check_expected.py
index 48c09606de..3c82666af0 100644
--- a/.github/ci_expected_accuracy/check_expected.py
+++ b/.github/ci_expected_accuracy/check_expected.py
@@ -6,7 +6,7 @@
 # Reference last updated is https://github.com/intel/torch-xpu-ops/pull/1223
 
 parser = argparse.ArgumentParser(description="Accuracy Check", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument("--driver", type=str, default="lts", help="rolling or lts")
+parser.add_argument("--driver", type=str, default="rolling", help="rolling or lts")
 parser.add_argument("--category", type=str, default="inductor", help="inductor")
 parser.add_argument("--suite", type=str, required=True, help="huggingface, timm_models or torchbench")
 parser.add_argument("--mode", type=str, required=True, help="inference or training")
diff --git a/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv b/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv
index 58dd7064d6..4a60aecac6 100644
--- a/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv
+++ b/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv
@@ -2,10 +2,10 @@ name,float32,bfloat16,float16,amp_bf16,amp_fp16
 adv_inception_v3,pass,pass,pass,pass,pass
 beit_base_patch16_224,pass,pass,pass,pass,pass
 botnet26t_256,pass,pass,pass,pass,pass
-cait_m36_384,pass,pass,pass,pass,pass
+cait_m36_384,pass,pass,fail_accuracy,pass,pass
 coat_lite_mini,pass,pass,pass,pass,pass
 convit_base,pass,pass,pass,pass,pass
-convmixer_768_32,pass,pass,pass,pass,pass
+convmixer_768_32,pass,fail_accuracy,pass,fail_accuracy,pass
 # https://github.com/intel/torch-xpu-ops/issues/1274
 convnext_base,pass,fail_accuracy,fail_accuracy,pass,pass
 crossvit_9_240,pass,pass,pass,pass,pass
diff --git a/.github/ci_expected_accuracy/rolling/inductor_torchbench_inference.csv b/.github/ci_expected_accuracy/rolling/inductor_torchbench_inference.csv
index 78a4677a90..29989ad6ff 100644
--- a/.github/ci_expected_accuracy/rolling/inductor_torchbench_inference.csv
+++ b/.github/ci_expected_accuracy/rolling/inductor_torchbench_inference.csv
@@ -19,11 +19,11 @@ densenet121,pass,pass,pass,pass,pass
 # https://github.com/intel/torch-xpu-ops/issues/1278
 detectron2_fasterrcnn_r_101_c4,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
 detectron2_fasterrcnn_r_101_dc5,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
-detectron2_fasterrcnn_r_101_fpn,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
+detectron2_fasterrcnn_r_101_fpn,eager_1st_run_OOM,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
 detectron2_fasterrcnn_r_50_c4,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
 detectron2_fasterrcnn_r_50_dc5,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
-detectron2_fasterrcnn_r_50_fpn,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,pass
-detectron2_fcos_r_50_fpn,pass,pass,pass,pass,pass
+detectron2_fasterrcnn_r_50_fpn,eager_1st_run_OOM,eager_fail_to_run,eager_1st_run_OOM,fail_accuracy,pass
+detectron2_fcos_r_50_fpn,pass,pass,pass,fail_accuracy,pass
 detectron2_maskrcnn,fail_to_run,eager_fail_to_run,fail_to_run,eager_fail_to_run,fail_to_run
 detectron2_maskrcnn_r_101_c4,fail_accuracy,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
 detectron2_maskrcnn_r_101_fpn,fail_accuracy,eager_fail_to_run,eager_1st_run_OOM,eager_1st_run_OOM,fail_accuracy
diff --git a/.github/scripts/e2e_summary.sh b/.github/scripts/e2e_summary.sh
index c858f6f3f5..d4ad299b59 100644
--- a/.github/scripts/e2e_summary.sh
+++ b/.github/scripts/e2e_summary.sh
@@ -98,7 +98,7 @@ Empty means the cases NOT run\n\n"
         suite="$(echo "${csv}" |sed 's/.*inductor_//;s/_.*//;s/timm/timm_models/')"
         mode="$(echo "${csv}" |sed 's/_xpu_accuracy.*//;s/.*_//')"
         dtype="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
-        python "${check_file}" --driver "${LTS_OR_ROLLING:-"lts"}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
+        python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
         test_result="$(sed 's/, /,/g' "/tmp/tmp-${suite}-${mode}-${dtype}.txt" |awk '{
             if($0 ~/Total/){
                 total = $3;

From 00a3720c3f52e8ec4784b640e43ee69d1313192f Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 11:26:30 +0800
Subject: [PATCH 145/160] Revert "update"

This reverts commit 363145454b3e7250317797fa1d8f79295827a88b.
---
 .github/ci_expected_accuracy/check_expected.py              | 2 +-
 .../rolling/inductor_timm_models_training.csv               | 4 ++--
 .../rolling/inductor_torchbench_inference.csv               | 6 +++---
 .github/scripts/e2e_summary.sh                              | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/ci_expected_accuracy/check_expected.py b/.github/ci_expected_accuracy/check_expected.py
index 3c82666af0..48c09606de 100644
--- a/.github/ci_expected_accuracy/check_expected.py
+++ b/.github/ci_expected_accuracy/check_expected.py
@@ -6,7 +6,7 @@
 # Reference last updated is https://github.com/intel/torch-xpu-ops/pull/1223
 
 parser = argparse.ArgumentParser(description="Accuracy Check", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument("--driver", type=str, default="rolling", help="rolling or lts")
+parser.add_argument("--driver", type=str, default="lts", help="rolling or lts")
 parser.add_argument("--category", type=str, default="inductor", help="inductor")
 parser.add_argument("--suite", type=str, required=True, help="huggingface, timm_models or torchbench")
 parser.add_argument("--mode", type=str, required=True, help="inference or training")
diff --git a/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv b/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv
index 4a60aecac6..58dd7064d6 100644
--- a/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv
+++ b/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv
@@ -2,10 +2,10 @@ name,float32,bfloat16,float16,amp_bf16,amp_fp16
 adv_inception_v3,pass,pass,pass,pass,pass
 beit_base_patch16_224,pass,pass,pass,pass,pass
 botnet26t_256,pass,pass,pass,pass,pass
-cait_m36_384,pass,pass,fail_accuracy,pass,pass
+cait_m36_384,pass,pass,pass,pass,pass
 coat_lite_mini,pass,pass,pass,pass,pass
 convit_base,pass,pass,pass,pass,pass
-convmixer_768_32,pass,fail_accuracy,pass,fail_accuracy,pass
+convmixer_768_32,pass,pass,pass,pass,pass
 # https://github.com/intel/torch-xpu-ops/issues/1274
 convnext_base,pass,fail_accuracy,fail_accuracy,pass,pass
 crossvit_9_240,pass,pass,pass,pass,pass
diff --git a/.github/ci_expected_accuracy/rolling/inductor_torchbench_inference.csv b/.github/ci_expected_accuracy/rolling/inductor_torchbench_inference.csv
index 29989ad6ff..78a4677a90 100644
--- a/.github/ci_expected_accuracy/rolling/inductor_torchbench_inference.csv
+++ b/.github/ci_expected_accuracy/rolling/inductor_torchbench_inference.csv
@@ -19,11 +19,11 @@ densenet121,pass,pass,pass,pass,pass
 # https://github.com/intel/torch-xpu-ops/issues/1278
 detectron2_fasterrcnn_r_101_c4,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
 detectron2_fasterrcnn_r_101_dc5,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
-detectron2_fasterrcnn_r_101_fpn,eager_1st_run_OOM,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
+detectron2_fasterrcnn_r_101_fpn,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
 detectron2_fasterrcnn_r_50_c4,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
 detectron2_fasterrcnn_r_50_dc5,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
-detectron2_fasterrcnn_r_50_fpn,eager_1st_run_OOM,eager_fail_to_run,eager_1st_run_OOM,fail_accuracy,pass
-detectron2_fcos_r_50_fpn,pass,pass,pass,fail_accuracy,pass
+detectron2_fasterrcnn_r_50_fpn,pass,eager_fail_to_run,fail_accuracy,fail_accuracy,pass
+detectron2_fcos_r_50_fpn,pass,pass,pass,pass,pass
 detectron2_maskrcnn,fail_to_run,eager_fail_to_run,fail_to_run,eager_fail_to_run,fail_to_run
 detectron2_maskrcnn_r_101_c4,fail_accuracy,eager_fail_to_run,fail_accuracy,fail_accuracy,fail_accuracy
 detectron2_maskrcnn_r_101_fpn,fail_accuracy,eager_fail_to_run,eager_1st_run_OOM,eager_1st_run_OOM,fail_accuracy
diff --git a/.github/scripts/e2e_summary.sh b/.github/scripts/e2e_summary.sh
index d4ad299b59..c858f6f3f5 100644
--- a/.github/scripts/e2e_summary.sh
+++ b/.github/scripts/e2e_summary.sh
@@ -98,7 +98,7 @@ Empty means the cases NOT run\n\n"
         suite="$(echo "${csv}" |sed 's/.*inductor_//;s/_.*//;s/timm/timm_models/')"
         mode="$(echo "${csv}" |sed 's/_xpu_accuracy.*//;s/.*_//')"
         dtype="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
-        python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
+        python "${check_file}" --driver "${LTS_OR_ROLLING:-"lts"}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
         test_result="$(sed 's/, /,/g' "/tmp/tmp-${suite}-${mode}-${dtype}.txt" |awk '{
             if($0 ~/Total/){
                 total = $3;

From 0aab07a859ba5f607ad8e4ca079db158470949f7 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 15:04:19 +0800
Subject: [PATCH 146/160] update

---
 .github/actions/linux-e2etest/action.yml       | 2 ++
 .github/actions/linux-uttest/action.yml        | 2 ++
 .github/actions/pt2e/action.yml                | 2 ++
 .github/ci_expected_accuracy/check_expected.py | 2 +-
 .github/scripts/e2e_summary.sh                 | 2 +-
 .github/workflows/_linux_build.yml             | 3 ++-
 .github/workflows/_linux_e2e.yml               | 4 ++--
 .github/workflows/_linux_op_benchmark.yml      | 4 ++--
 .github/workflows/_linux_ut.yml                | 4 ++--
 9 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
index 52ef8a4cc9..a8c1865aac 100644
--- a/.github/actions/linux-e2etest/action.yml
+++ b/.github/actions/linux-e2etest/action.yml
@@ -27,6 +27,8 @@ inputs:
 
 runs:
   using: composite
+  env:
+    HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
   steps:
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
       shell: bash -x {0}
diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 04b0fbcd86..e94525082e 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -10,6 +10,8 @@ permissions: read-all
 
 runs:
   using: composite
+  env:
+    HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
   steps:
     - name: requirements
       shell: bash -xe {0}
diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml
index 5fc3a9993c..10425ffa8c 100644
--- a/.github/actions/pt2e/action.yml
+++ b/.github/actions/pt2e/action.yml
@@ -22,6 +22,8 @@ inputs:
 
 runs:
   using: composite
+  env:
+    HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
   steps:
     - name: Prepare ENV
       if: ${{ inputs.env_prepare }}
diff --git a/.github/ci_expected_accuracy/check_expected.py b/.github/ci_expected_accuracy/check_expected.py
index 48c09606de..3c82666af0 100644
--- a/.github/ci_expected_accuracy/check_expected.py
+++ b/.github/ci_expected_accuracy/check_expected.py
@@ -6,7 +6,7 @@
 # Reference last updated is https://github.com/intel/torch-xpu-ops/pull/1223
 
 parser = argparse.ArgumentParser(description="Accuracy Check", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument("--driver", type=str, default="lts", help="rolling or lts")
+parser.add_argument("--driver", type=str, default="rolling", help="rolling or lts")
 parser.add_argument("--category", type=str, default="inductor", help="inductor")
 parser.add_argument("--suite", type=str, required=True, help="huggingface, timm_models or torchbench")
 parser.add_argument("--mode", type=str, required=True, help="inference or training")
diff --git a/.github/scripts/e2e_summary.sh b/.github/scripts/e2e_summary.sh
index c858f6f3f5..d4ad299b59 100644
--- a/.github/scripts/e2e_summary.sh
+++ b/.github/scripts/e2e_summary.sh
@@ -98,7 +98,7 @@ Empty means the cases NOT run\n\n"
         suite="$(echo "${csv}" |sed 's/.*inductor_//;s/_.*//;s/timm/timm_models/')"
         mode="$(echo "${csv}" |sed 's/_xpu_accuracy.*//;s/.*_//')"
         dtype="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
-        python "${check_file}" --driver "${LTS_OR_ROLLING:-"lts"}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
+        python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
         test_result="$(sed 's/, /,/g' "/tmp/tmp-${suite}-${mode}-${dtype}.txt" |awk '{
             if($0 ~/Total/){
                 total = $3;
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index ebb6b6fb46..1b9b1be3aa 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -69,9 +69,10 @@ jobs:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
         PATH: /tmp/xpu-tool/myvenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-        GH_TOKEN: ${{ github.token }}
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
         PIP_CACHE_DIR: /tmp/xpu-tool/.pipcache
+    env:
+      GH_TOKEN: ${{ github.token }}
     timeout-minutes: 300
     steps:
       - name: Install gh-cli
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 61647e05e8..47b9014c7e 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -82,11 +82,11 @@ jobs:
               -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
       env:
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
-        GH_TOKEN: ${{ github.token }}
-        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         TORCH_HOME: /tmp/.cache/_torch
         HF_HOME: /tmp/.cache/_huggingface
         MODEL_ONLY_NAME: ${{ inputs.model }}
+    env:
+      GH_TOKEN: ${{ github.token }}
     steps:
       - name: Cleanup workspace
         run: |
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
index a2e3757042..2ab84d571a 100644
--- a/.github/workflows/_linux_op_benchmark.yml
+++ b/.github/workflows/_linux_op_benchmark.yml
@@ -63,11 +63,11 @@ jobs:
               -u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
       env:
         AGENT_TOOLSDIRECTORY: /opt/xpu-tool
-        GH_TOKEN: ${{ github.token }}
-        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         TORCH_HOME: /tmp/.cache/_torch
         HF_HOME: /tmp/.cache/_huggingface
         REFERENCE_ISSUE: 1689
+    env:
+      GH_TOKEN: ${{ github.token }}
     steps:
       - name: Cleanup workspace
         run: |
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 508d144c9c..8ad953e862 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -74,10 +74,10 @@ jobs:
               -e ZE_AFFINITY_MASK
       env:
         AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
-        GH_TOKEN: ${{ github.token }}
-        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         TORCH_HOME: /tmp/.cache/_torch
         HF_HOME: /tmp/.cache/_huggingface
+    env:
+      GH_TOKEN: ${{ github.token }}
     strategy:
       fail-fast: false
       matrix:

From 29a9fd8fb4301d8e0854967ee12f73fa082e6b09 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 15:06:58 +0800
Subject: [PATCH 147/160] update

---
 .github/workflows/pull.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 2c6999a520..740fb6bfee 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -113,9 +113,9 @@ jobs:
       disabled_tests: ${{ needs.conditions-filter.outputs.disabled_tests }}
 
   linux-e2e:
+    name: linux-e2e
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
     needs: [conditions-filter, linux-build]
-    name: ${{ matrix.suite }}
     strategy:
       fail-fast: false
       matrix:

From c69854ff2dac3478b57f4bc2fbf133ee0b232a87 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 16:50:28 +0800
Subject: [PATCH 148/160] update

---
 .github/actions/linux-e2etest/action.yml | 2 --
 .github/actions/linux-uttest/action.yml  | 2 --
 .github/actions/pt2e/action.yml          | 2 --
 .github/workflows/_linux_e2e.yml         | 1 +
 .github/workflows/_linux_ut.yml          | 1 +
 5 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
index a8c1865aac..52ef8a4cc9 100644
--- a/.github/actions/linux-e2etest/action.yml
+++ b/.github/actions/linux-e2etest/action.yml
@@ -27,8 +27,6 @@ inputs:
 
 runs:
   using: composite
-  env:
-    HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
   steps:
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
       shell: bash -x {0}
diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index e94525082e..04b0fbcd86 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -10,8 +10,6 @@ permissions: read-all
 
 runs:
   using: composite
-  env:
-    HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
   steps:
     - name: requirements
       shell: bash -xe {0}
diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml
index 10425ffa8c..5fc3a9993c 100644
--- a/.github/actions/pt2e/action.yml
+++ b/.github/actions/pt2e/action.yml
@@ -22,8 +22,6 @@ inputs:
 
 runs:
   using: composite
-  env:
-    HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
   steps:
     - name: Prepare ENV
       if: ${{ inputs.env_prepare }}
diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml
index 47b9014c7e..9abe81cacb 100644
--- a/.github/workflows/_linux_e2e.yml
+++ b/.github/workflows/_linux_e2e.yml
@@ -87,6 +87,7 @@ jobs:
         MODEL_ONLY_NAME: ${{ inputs.model }}
     env:
       GH_TOKEN: ${{ github.token }}
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
     steps:
       - name: Cleanup workspace
         run: |
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 8ad953e862..33d0b54d8d 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -78,6 +78,7 @@ jobs:
         HF_HOME: /tmp/.cache/_huggingface
     env:
       GH_TOKEN: ${{ github.token }}
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
     strategy:
       fail-fast: false
       matrix:

From a6b2302547ac1222878bc2ea3e968d507e3d6649 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 17:00:55 +0800
Subject: [PATCH 149/160] merge main

---
 .github/actions/linux-uttest/action.yml | 30 ++++++++++++++++---------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 04b0fbcd86..e189754464 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -102,32 +102,40 @@ runs:
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_profiling' }}
       run: |
-        mkdir -p ut_log/profile_test/issue_reproduce
+        mkdir -p ut_log/xpu_profiling/issue_reproduce
         cd pytorch/third_party/torch-xpu-ops
         # RN50 Test
         PROFILE=1 python -u test/profiling/rn50.py -a resnet50 --dummy ./ --num-iterations 20 --xpu 0
-        cp profiling.fp32.train.pt ${{ github.workspace }}/ut_log/profile_test
+        cp profiling.fp32.train.pt ${{ github.workspace }}/ut_log/xpu_profiling
         # All Issue Reproduce UT
         python -u test/profiling/correlation_id_mixed.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/correlation_id_mixed.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/issue_reproduce/correlation_id_mixed.log
         python -u test/profiling/reproducer.missing.gpu.kernel.time.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/reproducer.missing.gpu.kernel.time.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/issue_reproduce/reproducer.missing.gpu.kernel.time.log
         python -u test/profiling/time_precision_in_profile.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/time_precision_in_profile.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/issue_reproduce/time_precision_in_profile.log
         python -u test/profiling/profile_partial_runtime_ops.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/profile_partial_runtime_ops.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/issue_reproduce/profile_partial_runtime_ops.log
         python -u test/profiling/triton_xpu_ops_time.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/issue_reproduce/triton_xpu_ops_time.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/issue_reproduce/triton_xpu_ops_time.log
+
+        # llama case for calls number test
+        pip install transformers
+        python test/profiling/llama.py | \
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/llama.log
+        python .github/scripts/llama_summary.py -i ${{ github.workspace }}/ut_log/xpu_profiling/llama.log -o ${{ github.workspace }}/ut_log/xpu_profiling/llama_summary.csv
+        bash .github/scripts/check_baseline.sh .github/scripts/llama_baseline.csv ${{ github.workspace }}/ut_log/xpu_profiling/llama_summary.csv
+
         # All xpu ut under test/profiler
         cd ../../test/profiler
         python -m pytest --timeout 600 -vs test_cpp_thread.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/test_cpp_thread.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/test_cpp_thread.log
         python -m pytest --timeout 600 -vs test_execution_trace.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/test_execution_trace.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/test_execution_trace.log
         python -m pytest --timeout 600 -vs test_memory_profiler.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/test_memory_profiler.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/test_memory_profiler.log
         python -m pytest --timeout 600 -vs test_profiler_tree.py | \
-          tee ${{ github.workspace }}/ut_log/profile_test/test_profiler_tree.log
+          tee ${{ github.workspace }}/ut_log/xpu_profiling/test_profiler_tree.log
 
     - name: xpu_dev1
       shell: bash -xe {0}

From 0a17050fc0928a6fa6d76aa1a79b84006fba75af Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Wed, 13 Aug 2025 17:15:31 +0800
Subject: [PATCH 150/160] update

---
 .github/workflows/nightly_ondemand.yml | 1 +
 .github/workflows/pull.yml             | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index 99731123e5..f3d967f1f5 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -178,6 +178,7 @@ jobs:
   Windows-Nightly-Ondemand-UT-Tests:
     if: ${{ github.event_name == 'schedule' || contains(inputs.ut, 'windows') }}
     name: windows
+    needs: [Conditions-Filter]
     uses: ./.github/workflows/_windows_ut.yml
     with:
       ut: ${{ github.event_name == 'schedule' && 'ut_extended,ut_torch' || inputs.ut }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 740fb6bfee..23683fa701 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -51,8 +51,7 @@ jobs:
           bash third_party/torch-xpu-ops/.github/scripts/lintrunner.sh
 
   conditions-filter:
-    if: ${{ github.event.pull_request.draft == false }}
-    needs: [preci-lint-check]
+    if: ${{ github.repository_owner == 'intel' && github.event.pull_request.draft == false }}
     runs-on: ubuntu-latest
     timeout-minutes: 10
     env:
@@ -92,7 +91,7 @@ jobs:
 
   linux-build:
     if: ${{ !contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all')}}
-    needs: [conditions-filter]
+    needs: [conditions-filter, preci-lint-check]
     secrets: inherit
     uses: ./.github/workflows/_linux_build.yml
     with:
@@ -138,7 +137,7 @@ jobs:
   windows:
     name: windows
     if: ${{ !(contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.conditions-filter.outputs.disabled_tests, 'disable_win')) }} 
-    needs: [conditions-filter]
+    needs: [conditions-filter, preci-lint-check]
     uses: ./.github/workflows/_windows_ut.yml
     with: 
       ut: ut_extended,ut_torch

From a7257b0f5a2b63973adda86d809745580e19b434 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 09:50:29 +0800
Subject: [PATCH 151/160] modify e2e summary

---
 .github/workflows/_linux_e2e_summary.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_linux_e2e_summary.yml b/.github/workflows/_linux_e2e_summary.yml
index d65e47a3ae..746bc1d565 100644
--- a/.github/workflows/_linux_e2e_summary.yml
+++ b/.github/workflows/_linux_e2e_summary.yml
@@ -46,7 +46,8 @@ jobs:
           cd target/
           target_dir="Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-*"
           gh --repo ${GITHUB_REPOSITORY} run download ${GITHUB_RUN_ID} -p "${target_dir}"
-          mv Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-*/* .
+          find Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-*/ -maxdepth 1 -mindepth 1 -type d |\
+                  while read line; do mv $line .; done
       - name: Download Baseline Artifact
         run: |
           mkdir baseline/
@@ -56,7 +57,7 @@ jobs:
           REFERENCE_RUN_ID="$(cat body.txt |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo '')"
           if [ "${REFERENCE_RUN_ID}" != "" ];then
             gh --repo intel/torch-xpu-ops run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
-            mv Inductor-*-XPU-E2E-*/* .
+            find Inductor-*-XPU-E2E-*/ -maxdepth 1 -mindepth 1 -type d |while read line; do mv $line .; done
           fi
       - name: Get summary
         if: ${{ ! cancelled() }}

From 8a54cfaa4788147c20056353970f1729f3379dbc Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 10:47:08 +0800
Subject: [PATCH 152/160] modify on-demand test

---
 .github/workflows/nightly_ondemand.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index f3d967f1f5..cc7b1ac428 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -91,14 +91,12 @@ jobs:
               torch_xpu_ops="main"
             fi
           else
+            pytorch="${{ inputs.pytorch }}"
+            torch_xpu_ops="${{ inputs.torch_xpu_ops }}"
             if [[ "${{ inputs.pytorch }}" == *"_wheel" ]];then
               test_type="wheel-ondemand"
-              pytorch="${{ inputs.pytorch }}"
-              torch_xpu_ops="pinned"
             else
               test_type="build-ondemand"
-              pytorch="${{ inputs.pytorch }}"
-              torch_xpu_ops="${{ inputs.torch_xpu_ops }}"
             fi
           fi
           echo "test_type=${test_type}" >> ${GITHUB_OUTPUT}

From 23f097f0dc60c1100791c9d576d6900c154e817e Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 10:54:30 +0800
Subject: [PATCH 153/160] modify on-demand test

---
 .github/actions/linux-testenv/action.yml | 2 +-
 .github/workflows/nightly_ondemand.yml   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
index 4250ceed9e..188dacc29b 100644
--- a/.github/actions/linux-testenv/action.yml
+++ b/.github/actions/linux-testenv/action.yml
@@ -145,7 +145,7 @@ runs:
             TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
         fi
-        if [ "${{ inputs.torch_xpu_ops }}" == "cicd" ];then
+        if [ "${{ inputs.torch_xpu_ops }}" == "cicd" ] || [ "${{ inputs.torch_xpu_ops }}" == "triggered" ];then
           cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
           cd third_party/torch-xpu-ops
         else
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
index cc7b1ac428..93826b34a8 100644
--- a/.github/workflows/nightly_ondemand.yml
+++ b/.github/workflows/nightly_ondemand.yml
@@ -16,8 +16,8 @@ on:
         description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
       torch_xpu_ops:
         type: string
-        default: 'main'
-        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
+        default: 'triggered'
+        description: Torch-xpu-ops workflow triggered branch by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
       triton:
         type: string
         default: 'pinned'

From a047accd26af551195937116ebe8296391cdea73 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 16:57:44 +0800
Subject: [PATCH 154/160] rebase

---
 .github/scripts/build.sh           |  67 ++++----
 .github/scripts/env.sh             |   2 +-
 .github/workflows/_linux_build.yml | 251 ++++++++++++-----------------
 3 files changed, 134 insertions(+), 186 deletions(-)

diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
index 41c46c99b1..5bce6eacdf 100755
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -1,18 +1,18 @@
 #!/bin/bash
 # Usage:
 #   ./build.sh --WORKSPACE=<path/to/dir> \
-#       --PYTORCH_REPO=<pytorch repo url> --PYTORCH_COMMIT=<pytorch branch or commit> \
+#       --PYTORCH_REPO=<pytorch repo url> --PYTORCH_VERSION=<pytorch branch or commit> \
 #       --TORCH_XPU_OPS_REPO=<torch-xpu-ops repo url> \
-#       --TORCH_XPU_OPS_COMMIT=<torch-xpu-ops branch, commit or pinned(use pytorch pinned commit)>
+#       --TORCH_XPU_OPS_VERSION=<torch-xpu-ops branch, commit or pinned(use pytorch pinned commit)>
 set -xe
 export GIT_PAGER=cat
 
 # Init params
 WORKSPACE=$(realpath ${WORKSPACE:-"/tmp"})
 PYTORCH_REPO=${PYTORCH_REPO:-"https://github.com/pytorch/pytorch.git"}
-PYTORCH_COMMIT=${PYTORCH_COMMIT:-"main"}
+PYTORCH_VERSION=${PYTORCH_VERSION:-"main"}
 TORCH_XPU_OPS_REPO=${TORCH_XPU_OPS_REPO:-"https://github.com/intel/torch-xpu-ops.git"}
-TORCH_XPU_OPS_COMMIT=${TORCH_XPU_OPS_COMMIT:-"main"}
+TORCH_XPU_OPS_VERSION=${TORCH_XPU_OPS_VERSION:-"main"}
 for var; do
     eval "export $(echo ${var@Q} |sed "s/^'-*//g;s/=/='/")"
 done
@@ -21,20 +21,20 @@ done
 rm -rf ${WORKSPACE}/pytorch
 git clone ${PYTORCH_REPO} ${WORKSPACE}/pytorch
 cd ${WORKSPACE}/pytorch
-git checkout ${PYTORCH_COMMIT}
+git checkout ${PYTORCH_VERSION}
 git remote -v && git branch && git show -s
 git rev-parse HEAD > ${WORKSPACE}/pytorch.commit
 
 # Set torch-xpu-ops
-if [ "${TORCH_XPU_OPS_COMMIT,,}" == "pinned" ];then
+if [ "${TORCH_XPU_OPS_VERSION,,}" == "pinned" ];then
     TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
-    TORCH_XPU_OPS_COMMIT="$(cat ${WORKSPACE}/pytorch/third_party/xpu.txt)"
+    TORCH_XPU_OPS_VERSION="$(cat ${WORKSPACE}/pytorch/third_party/xpu.txt)"
 fi
-if [ "${TORCH_XPU_OPS_COMMIT,,}" != "cicd" ];then
+if [ "${TORCH_XPU_OPS_VERSION,,}" != "cicd" ];then
     rm -rf ${WORKSPACE}/torch-xpu-ops
     git clone ${TORCH_XPU_OPS_REPO} ${WORKSPACE}/torch-xpu-ops
     cd ${WORKSPACE}/torch-xpu-ops
-    git checkout ${TORCH_XPU_OPS_COMMIT}
+    git checkout ${TORCH_XPU_OPS_VERSION}
 fi
 cd ${WORKSPACE}/torch-xpu-ops
 git remote -v && git branch && git show -s
@@ -48,33 +48,30 @@ python -m pip install requests
 python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py
 git submodule sync && git submodule update --init --recursive
 python -m pip install -r requirements.txt
-python -m pip install mkl-static==2025.1.0 mkl-include==2025.1.0
+python -m pip install mkl-static mkl-include
 export USE_STATIC_MKL=1
-export USE_XCCL=1
-if [ "${XPU_ONEAPI_PATH}" == "" ];then
-    export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
-        intel-cmplr-lib-rt==2025.1.1 | \
-        intel-cmplr-lib-ur==2025.1.1 | \
-        intel-cmplr-lic-rt==2025.1.1 | \
-        intel-sycl-rt==2025.1.1 | \
-        impi-rt==2021.15.0 | \
-        dpcpp-cpp-rt==2025.1.1 | \
-        oneccl-devel==2021.15.2 | \
-        oneccl==2021.15.2 | \
-        mkl==2025.1.0 | \
-        onemkl-sycl-blas==2025.1.0 | \
-        onemkl-sycl-dft==2025.1.0 | \
-        onemkl-sycl-lapack==2025.1.0 | \
-        onemkl-sycl-rng==2025.1.0 | \
-        onemkl-sycl-sparse==2025.1.0 | \
-        intel-opencl-rt==2025.1.1 | \
-        intel-openmp==2025.1.1 | \
-        tbb==2022.1.0 | \
-        tcmlib==1.3.0 | \
-        umf==0.10.0 | \
-        intel-pti==0.12.3
-    "
-fi
+export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
+    intel-cmplr-lib-rt==2025.1.1 | \
+    intel-cmplr-lib-ur==2025.1.1 | \
+    intel-cmplr-lic-rt==2025.1.1 | \
+    intel-sycl-rt==2025.1.1 | \
+    oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+    oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+    impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+    onemkl-sycl-blas==2025.1.0 | \
+    onemkl-sycl-dft==2025.1.0 | \
+    onemkl-sycl-lapack==2025.1.0 | \
+    onemkl-sycl-rng==2025.1.0 | \
+    onemkl-sycl-sparse==2025.1.0 | \
+    dpcpp-cpp-rt==2025.1.1 | \
+    intel-opencl-rt==2025.1.1 | \
+    mkl==2025.1.0 | \
+    intel-openmp==2025.1.1 | \
+    tbb==2022.1.0 | \
+    tcmlib==1.3.0 | \
+    umf==0.10.0 | \
+    intel-pti==0.12.3
+"
 
 # Build
 sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh
index d0f7cfd338..3b17170385 100755
--- a/.github/scripts/env.sh
+++ b/.github/scripts/env.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-XPU_ONEAPI_PATH="${XPU_ONEAPI_PATH:-"/opt/intel/oneapi"}"
+XPU_ONEAPI_PATH=${XPU_ONEAPI_PATH:-"/opt/intel/oneapi"}
 
 source ${XPU_ONEAPI_PATH}/compiler/latest/env/vars.sh
 source ${XPU_ONEAPI_PATH}/pti/latest/env/vars.sh
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 1b9b1be3aa..8fbed99275 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -3,214 +3,165 @@ name: Linux PyTorch XPU Build
 on:
   workflow_call:
     inputs:
-      runner:
-        required: true
-        type: string
-        default: 'pvc_rolling'
-        description: Runner label
-      test_type:
-        type: string
-        default: 'build-from-source'
-        description: Build from source or install nightly wheel
       pytorch:
+        required: true
         type: string
         default: 'main'
-        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
-      torch_xpu_ops:
-        type: string
-        default: 'main'
-        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
-      triton:
+        description: Pytorch branch/commit
+      keep_torch_xpu_ops:
         required: false
         type: string
-        default: 'pinned'
-        description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch'
-      oneapi:
+        default: 'false'
+        description: Keep torch-xpu-ops pin. `true` means use pined commit
+      driver:
+        required: false
         type: string
-        default: 'installed'
-        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
+        default: 'lts'
+        description: Driver lts/rolling
       python:
+        required: false
         type: string
         default: '3.10'
         description: Python version
+      runner:
+        required: true
+        type: string
+        default: 'linux.idc.xpu'
+        description: Runner label
+      triton:
+        required: false
+        type: string
+        default: ''
+        description: Triton commit. Use pytorch pined commit by default
+    outputs:
+      torch_commit_id:
+        description: The commit id of the torch build
+        value: ${{ jobs.build.outputs.TORCH_COMMIT_ID }}
 
 permissions: read-all
 
-defaults:
-  run:
-    shell: bash -xe {0}
-
 jobs:
-  runner:
-    runs-on: ${{ inputs.runner }}
-    outputs:
-      runner_id: ${{ steps.runner-info.outputs.runner_id }}
-      user_id: ${{ steps.runner-info.outputs.user_id }}
-      render_id: ${{ steps.runner-info.outputs.render_id }}
-      hostname: ${{ steps.runner-info.outputs.hostname }}
-    steps:
-      - name: Cleanup workspace
-        run: |
-          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
-      - name: Checkout torch-xpu-ops
-        uses: actions/checkout@v4
-      - name: Get runner
-        id: runner-info
-        uses: ./.github/actions/get-runner
-
   build:
-    name: ${{ inputs.pytorch }}
-    needs: runner
-    if: ${{ ! contains(inputs.test_type, 'wheel') }}
-    runs-on: ${{ needs.runner.outputs.runner_id }}
+    runs-on: ${{ inputs.runner }}
     container:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
-        PATH: /tmp/xpu-tool/myvenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
-        PIP_CACHE_DIR: /tmp/xpu-tool/.pipcache
-    env:
-      GH_TOKEN: ${{ github.token }}
+        PATH: /opt/xpu-build/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+        commit_issue: 1280
+        GH_TOKEN: ${{ github.token }}
+        NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
+        DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
+    outputs:
+      TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }}
     timeout-minutes: 300
     steps:
-      - name: Install gh-cli
+      - name: Setup based env
         run: |
-          cat /etc/os-release
-          hostname && id
           # Cleanup workspace
-          find ./ |grep -v "^\./$" |xargs rm -rf
-          # install gh
-          dnf install -y 'dnf-command(config-manager)'
+          rm -rf ${{ github.workspace }}/*
+          # Install gh
+          dnf install 'dnf-command(config-manager)'
           dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
-          dnf install -y gh --repo gh-cli
-          gh --version
-      - name: Setup python-${{ inputs.python }}
-        run: |
-          rm -rf /tmp/xpu-tool/myvenv
+          dnf autoremove -y git236* && dnf install -y git
+          dnf install gh --repo gh-cli -y
+          # Setup python
           local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-tool/myvenv
-          which python && python -V
-          which pip && pip list
+          /opt/python/${local_python}/bin/python -m venv /opt/xpu-build
+          which python && python -V && pip list
           pip install -U pip wheel setuptools
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
         with:
           path: torch-xpu-ops
-      - name: Build Pytorch on ${{ needs.runner.outputs.hostname }}
+      - name: Build Triton XPU
         run: |
-          # only build pvc for CI
-          if [ "${{ inputs.test_type }}" == "build-cicd" ];then
-            export TORCH_XPU_ARCH_LIST='pvc'
+          # gcc 13
+          dnf install -y gcc-toolset-13-gcc-c++
+          source /opt/rh/gcc-toolset-13/enable
+          dnf install -y zlib-devel
+          cd ../ && rm -rf pytorch
+          git clone https://github.com/pytorch/pytorch pytorch
+          cd pytorch
+          if [ -z ${{ inputs.triton }} ]; then
+            TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
+          else
+            TRITON_COMMIT_ID="${{ inputs.triton }}"
+          fi
+          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
+            pip install cmake ninja pybind11
+            rm -rf pytorch_triton_xpu-*.whl
+            TRITON_VERSION_NAME="$(
+              curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
+                      grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
+            )"
+            python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
+            cp pytorch_triton_xpu-*.whl ${{ github.workspace }}
+          fi
+      - name: Build Pytorch XPU
+        run: |
+          set -xe -o pipefail
+          if [ "${{ inputs.driver }}" == "lts" ]; then
+              export TORCH_XPU_ARCH_LIST='pvc'
           fi
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-            PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
+            PYTORCH_VERSION="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
           else
             PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-            PYTORCH_COMMIT="${{ inputs.pytorch }}"
+            PYTORCH_VERSION="${{ inputs.pytorch }}"
           fi
-          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
-            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
-            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
+          if [[ "${{ inputs.keep_torch_xpu_ops }}" == *"https://"* ]];then
+            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.keep_torch_xpu_ops }} |sed 's/@.*//')"
+            TORCH_XPU_OPS_VERSION="$(echo ${{ inputs.keep_torch_xpu_ops }} |sed 's/.*@//')"
+          elif [ "${{ inputs.keep_torch_xpu_ops }}" == "true" ];then
+            TORCH_XPU_OPS_VERSION="pinned"
           else
-            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
-            TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
+            TORCH_XPU_OPS_VERSION="cicd"
           fi
-          # gcc 11
-          source /opt/rh/gcc-toolset-11/enable
           # oneAPI DLE
-          if [ "${{ inputs.oneapi }}" != "installed" ];then
-            rm -rf ${HOME}/intel ${HOME}/.intel /opt/intel
-            wget -q -O oneapi.sh "${{ inputs.oneapi }}"
-            bash oneapi.sh -a -s --eula accept --action install --install-dir /opt/intel/oneapi
-            export XPU_ONEAPI_PATH="/opt/intel/oneapi"
-          fi
           source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
+          # gcc 11
+          source /opt/rh/gcc-toolset-11/enable
+          export USE_XCCL=1
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
             --WORKSPACE="${{ github.workspace }}" \
             --PYTORCH_REPO="${PYTORCH_REPO}" \
-            --PYTORCH_COMMIT="${PYTORCH_COMMIT}" \
+            --PYTORCH_VERSION="${PYTORCH_VERSION}" \
             --TORCH_XPU_OPS_REPO="${TORCH_XPU_OPS_REPO}" \
-            --TORCH_XPU_OPS_COMMIT="${TORCH_XPU_OPS_COMMIT}" \
-            2>&1 |tee ${{ github.workspace }}/build_pytorch_${PYTORCH_COMMIT//\//-}.log
-          if [ $(ls ${{ github.workspace }} |grep -c "torch-.*.whl") -eq 0 ];then
-            echo "Build pytorch got failed"
-            exit 1
-          fi
-      - name: Build Triton
-        run: |
-          # gcc 13
-          dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
-          source /opt/rh/gcc-toolset-13/enable
-          cd ./pytorch
-          pip install cmake ninja pybind11
-          rm -rf pytorch_triton_xpu-*.whl
-          if [ "${{ inputs.triton }}" != "pinned" ];then
-            TRITON_COMMIT_ID="${{ inputs.triton }}"
-          else
-            TRITON_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/triton-xpu.txt)"
-          fi
-          TRITON_VERSION_NAME="$(
-            curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
-                    grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
-          )"
-          python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME} \
-            2>&1 |tee ${{ github.workspace }}/build_triton_${TRITON_COMMIT_ID}.log
-          if [ $(ls |grep -c "pytorch_triton_xpu-.*.whl") -eq 0 ];then
-            echo "Build triton got failed"
-            exit 1
-          fi
-          pip install pytorch_triton_xpu-*.whl
-          cp pytorch_triton_xpu-*.whl ${{ github.workspace }}
-      - name: Build Torchvision and Torchaudio
-        run: |
-          # gcc 13
-          dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
-          source /opt/rh/gcc-toolset-13/enable
-          cd ./pytorch
-          TORCHVISION_COMMIT_ID="$(cat .github/ci_commit_pins/vision.txt)"
-          TORCHAUDIO_COMMIT_ID="$(cat .github/ci_commit_pins/audio.txt)"
-          git clone --single-branch -b main https://github.com/pytorch/vision.git xpu-vision
-          cd xpu-vision && git checkout ${TORCHVISION_COMMIT_ID}
-          python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_vision_${TRITON_COMMIT_ID}.log
-          if [ $(ls dist/ |grep -c "torchvision-.*.whl") -eq 0 ];then
-            echo "Build torchvision got failed"
-            exit 1
-          fi
-          pip install dist/*.whl
-          cp dist/*.whl ${{ github.workspace }}
-          git clone --single-branch -b main https://github.com/pytorch/audio.git xpu-audio
-          cd xpu-audio && git checkout ${TORCHAUDIO_COMMIT_ID}
-          python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_audio_${TRITON_COMMIT_ID}.log
-          if [ $(ls dist/ |grep -c "torchaudio-.*.whl") -eq 0 ];then
-            echo "Build torchaudio got failed"
-            exit 1
-          fi
-          pip install dist/*.whl
-          cp dist/*.whl ${{ github.workspace }}
+            --TORCH_XPU_OPS_VERSION="${TORCH_XPU_OPS_VERSION}" \
+            2>&1 |tee ${{ github.workspace }}/pytorch_build_${PYTORCH_VERSION//\//-}.log
       - name: Torch Config
         run: |
-          printenv
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
-          python -c "import triton; print(triton.__version__)"
-          python -c "import torchvision; print(torchvision.__version__)"
-          python -c "import torchaudio; print(torchaudio.__version__)"
           python pytorch/torch/utils/collect_env.py
-          pip list |grep -E 'torch|intel'
-          chmod 777 /__w -R
+      - name: Identify Build version
+        id: build_version
+        run: |
+          echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}"
       - name: Upload Torch XPU Wheel
-        if: ${{ success() }}
+        if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
           name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/*.whl
+          path: ${{ github.workspace }}/torch*.whl
+      - name: Upload Triton Wheel
+        if: ${{ ! cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: Triton-Wheel-${{ github.event.pull_request.number || github.sha }}
+          path: ${{ github.workspace }}/pytorch_triton_xpu-*.whl
       - name: Upload Build Log
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
           name: Torch-XPU-Build-Log-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/build_*.log
+          path: ${{ github.workspace }}/pytorch_*.log
+      - name: Cleanup
+        if: always()
+        run: |
+          chmod 777 . -R
+          rm -rf pytorch torch-xpu-ops pytorch_*.log torch*.whl pytorch_triton_xpu-*.whl

From 7df6ea3b32dd4a2feb51450c47fffa46b9cc2f7a Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 17:00:49 +0800
Subject: [PATCH 155/160] rebase

---
 .github/scripts/build.sh           |  66 ++++----
 .github/scripts/env.sh             |   2 +-
 .github/workflows/_linux_build.yml | 252 +++++++++++++++++------------
 3 files changed, 186 insertions(+), 134 deletions(-)

diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
index 5bce6eacdf..001e5c9b44 100755
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -1,18 +1,18 @@
 #!/bin/bash
 # Usage:
 #   ./build.sh --WORKSPACE=<path/to/dir> \
-#       --PYTORCH_REPO=<pytorch repo url> --PYTORCH_VERSION=<pytorch branch or commit> \
+#       --PYTORCH_REPO=<pytorch repo url> --PYTORCH_COMMIT=<pytorch branch or commit> \
 #       --TORCH_XPU_OPS_REPO=<torch-xpu-ops repo url> \
-#       --TORCH_XPU_OPS_VERSION=<torch-xpu-ops branch, commit or pinned(use pytorch pinned commit)>
+#       --TORCH_XPU_OPS_COMMIT=<torch-xpu-ops branch, commit or pinned(use pytorch pinned commit)>
 set -xe
 export GIT_PAGER=cat
 
 # Init params
 WORKSPACE=$(realpath ${WORKSPACE:-"/tmp"})
 PYTORCH_REPO=${PYTORCH_REPO:-"https://github.com/pytorch/pytorch.git"}
-PYTORCH_VERSION=${PYTORCH_VERSION:-"main"}
+PYTORCH_COMMIT=${PYTORCH_COMMIT:-"main"}
 TORCH_XPU_OPS_REPO=${TORCH_XPU_OPS_REPO:-"https://github.com/intel/torch-xpu-ops.git"}
-TORCH_XPU_OPS_VERSION=${TORCH_XPU_OPS_VERSION:-"main"}
+TORCH_XPU_OPS_COMMIT=${TORCH_XPU_OPS_COMMIT:-"main"}
 for var; do
     eval "export $(echo ${var@Q} |sed "s/^'-*//g;s/=/='/")"
 done
@@ -21,20 +21,20 @@ done
 rm -rf ${WORKSPACE}/pytorch
 git clone ${PYTORCH_REPO} ${WORKSPACE}/pytorch
 cd ${WORKSPACE}/pytorch
-git checkout ${PYTORCH_VERSION}
+git checkout ${PYTORCH_COMMIT}
 git remote -v && git branch && git show -s
 git rev-parse HEAD > ${WORKSPACE}/pytorch.commit
 
 # Set torch-xpu-ops
-if [ "${TORCH_XPU_OPS_VERSION,,}" == "pinned" ];then
+if [ "${TORCH_XPU_OPS_COMMIT,,}" == "pinned" ];then
     TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
-    TORCH_XPU_OPS_VERSION="$(cat ${WORKSPACE}/pytorch/third_party/xpu.txt)"
+    TORCH_XPU_OPS_COMMIT="$(cat ${WORKSPACE}/pytorch/third_party/xpu.txt)"
 fi
-if [ "${TORCH_XPU_OPS_VERSION,,}" != "cicd" ];then
+if [ "${TORCH_XPU_OPS_COMMIT,,}" != "cicd" ];then
     rm -rf ${WORKSPACE}/torch-xpu-ops
     git clone ${TORCH_XPU_OPS_REPO} ${WORKSPACE}/torch-xpu-ops
     cd ${WORKSPACE}/torch-xpu-ops
-    git checkout ${TORCH_XPU_OPS_VERSION}
+    git checkout ${TORCH_XPU_OPS_COMMIT}
 fi
 cd ${WORKSPACE}/torch-xpu-ops
 git remote -v && git branch && git show -s
@@ -48,30 +48,32 @@ python -m pip install requests
 python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py
 git submodule sync && git submodule update --init --recursive
 python -m pip install -r requirements.txt
-python -m pip install mkl-static mkl-include
+python -m pip install mkl-static==2025.1.0 mkl-include==2025.1.0
 export USE_STATIC_MKL=1
-export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
-    intel-cmplr-lib-rt==2025.1.1 | \
-    intel-cmplr-lib-ur==2025.1.1 | \
-    intel-cmplr-lic-rt==2025.1.1 | \
-    intel-sycl-rt==2025.1.1 | \
-    oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
-    oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | \
-    impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | \
-    onemkl-sycl-blas==2025.1.0 | \
-    onemkl-sycl-dft==2025.1.0 | \
-    onemkl-sycl-lapack==2025.1.0 | \
-    onemkl-sycl-rng==2025.1.0 | \
-    onemkl-sycl-sparse==2025.1.0 | \
-    dpcpp-cpp-rt==2025.1.1 | \
-    intel-opencl-rt==2025.1.1 | \
-    mkl==2025.1.0 | \
-    intel-openmp==2025.1.1 | \
-    tbb==2022.1.0 | \
-    tcmlib==1.3.0 | \
-    umf==0.10.0 | \
-    intel-pti==0.12.3
-"
+if [ "${XPU_ONEAPI_PATH}" == "" ];then
+    export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
+        intel-cmplr-lib-rt==2025.1.1 | \
+        intel-cmplr-lib-ur==2025.1.1 | \
+        intel-cmplr-lic-rt==2025.1.1 | \
+        intel-sycl-rt==2025.1.1 | \
+        impi-rt==2021.15.0 | \
+        dpcpp-cpp-rt==2025.1.1 | \
+        oneccl-devel==2021.15.2 | \
+        oneccl==2021.15.2 | \
+        mkl==2025.1.0 | \
+        onemkl-sycl-blas==2025.1.0 | \
+        onemkl-sycl-dft==2025.1.0 | \
+        onemkl-sycl-lapack==2025.1.0 | \
+        onemkl-sycl-rng==2025.1.0 | \
+        onemkl-sycl-sparse==2025.1.0 | \
+        intel-opencl-rt==2025.1.1 | \
+        intel-openmp==2025.1.1 | \
+        tbb==2022.1.0 | \
+        tcmlib==1.3.0 | \
+        umf==0.10.0 | \
+        intel-pti==0.12.3
+    "
+fi
 
 # Build
 sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh
index 3b17170385..d0f7cfd338 100755
--- a/.github/scripts/env.sh
+++ b/.github/scripts/env.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-XPU_ONEAPI_PATH=${XPU_ONEAPI_PATH:-"/opt/intel/oneapi"}
+XPU_ONEAPI_PATH="${XPU_ONEAPI_PATH:-"/opt/intel/oneapi"}"
 
 source ${XPU_ONEAPI_PATH}/compiler/latest/env/vars.sh
 source ${XPU_ONEAPI_PATH}/pti/latest/env/vars.sh
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 8fbed99275..ccbac87b3d 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -3,165 +3,215 @@ name: Linux PyTorch XPU Build
 on:
   workflow_call:
     inputs:
-      pytorch:
+      runner:
         required: true
+        type: string
+        default: 'pvc_rolling'
+        description: Runner label
+      test_type:
+        type: string
+        default: 'build-from-source'
+        description: Build from source or install nightly wheel
+      pytorch:
         type: string
         default: 'main'
-        description: Pytorch branch/commit
-      keep_torch_xpu_ops:
-        required: false
+        description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      torch_xpu_ops:
         type: string
-        default: 'false'
-        description: Keep torch-xpu-ops pin. `true` means use pined commit
-      driver:
+        default: 'main'
+        description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
+      triton:
         required: false
         type: string
-        default: 'lts'
-        description: Driver lts/rolling
+        default: 'pinned'
+        description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch'
+      oneapi:
+        type: string
+        default: 'installed'
+        description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
       python:
-        required: false
         type: string
         default: '3.10'
         description: Python version
-      runner:
-        required: true
-        type: string
-        default: 'linux.idc.xpu'
-        description: Runner label
-      triton:
-        required: false
-        type: string
-        default: ''
-        description: Triton commit. Use pytorch pined commit by default
-    outputs:
-      torch_commit_id:
-        description: The commit id of the torch build
-        value: ${{ jobs.build.outputs.TORCH_COMMIT_ID }}
 
 permissions: read-all
 
+defaults:
+  run:
+    shell: bash -xe {0}
+
 jobs:
-  build:
+  runner:
     runs-on: ${{ inputs.runner }}
+    outputs:
+      runner_id: ${{ steps.runner-info.outputs.runner_id }}
+      user_id: ${{ steps.runner-info.outputs.user_id }}
+      render_id: ${{ steps.runner-info.outputs.render_id }}
+      hostname: ${{ steps.runner-info.outputs.hostname }}
+    steps:
+      - name: Cleanup workspace
+        run: |
+          sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
+      - name: Checkout torch-xpu-ops
+        uses: actions/checkout@v4
+      - name: Get runner
+        id: runner-info
+        uses: ./.github/actions/get-runner
+
+  build:
+    name: ${{ inputs.pytorch }}
+    needs: runner
+    if: ${{ ! contains(inputs.test_type, 'wheel') }}
+    runs-on: ${{ needs.runner.outputs.runner_id }}
     container:
       image: 'pytorch/manylinux2_28-builder:xpu-main'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
-        PATH: /opt/xpu-build/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-        commit_issue: 1280
-        GH_TOKEN: ${{ github.token }}
-        NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-        DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
-    outputs:
-      TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }}
+        PATH: /tmp/xpu-tool/myvenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+        AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
+        PIP_CACHE_DIR: /tmp/xpu-tool/.pipcache
+    env:
+      GH_TOKEN: ${{ github.token }}
     timeout-minutes: 300
     steps:
-      - name: Setup based env
+      - name: Install gh-cli
         run: |
+          cat /etc/os-release
+          hostname && id
           # Cleanup workspace
-          rm -rf ${{ github.workspace }}/*
-          # Install gh
-          dnf install 'dnf-command(config-manager)'
+          find ./ |grep -v "^\./$" |xargs rm -rf
+          # install gh
+          dnf install -y 'dnf-command(config-manager)'
           dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
-          dnf autoremove -y git236* && dnf install -y git
-          dnf install gh --repo gh-cli -y
-          # Setup python
+          dnf install -y gh --repo gh-cli
+          gh --version
+      - name: Setup python-${{ inputs.python }}
+        run: |
+          rm -rf /tmp/xpu-tool/myvenv
           local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv /opt/xpu-build
-          which python && python -V && pip list
+          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-tool/myvenv
+          which python && python -V
+          which pip && pip list
           pip install -U pip wheel setuptools
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
         with:
           path: torch-xpu-ops
-      - name: Build Triton XPU
+      - name: Build Pytorch on ${{ needs.runner.outputs.hostname }}
         run: |
-          # gcc 13
-          dnf install -y gcc-toolset-13-gcc-c++
-          source /opt/rh/gcc-toolset-13/enable
-          dnf install -y zlib-devel
-          cd ../ && rm -rf pytorch
-          git clone https://github.com/pytorch/pytorch pytorch
-          cd pytorch
-          if [ -z ${{ inputs.triton }} ]; then
-            TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
-          else
-            TRITON_COMMIT_ID="${{ inputs.triton }}"
-          fi
-          if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            pip install cmake ninja pybind11
-            rm -rf pytorch_triton_xpu-*.whl
-            TRITON_VERSION_NAME="$(
-              curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
-                      grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
-            )"
-            python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
-            cp pytorch_triton_xpu-*.whl ${{ github.workspace }}
-          fi
-      - name: Build Pytorch XPU
-        run: |
-          set -xe -o pipefail
-          if [ "${{ inputs.driver }}" == "lts" ]; then
-              export TORCH_XPU_ARCH_LIST='pvc'
+          export USE_XCCL=1
+          # only build pvc for CI
+          if [ "${{ inputs.test_type }}" == "build-cicd" ];then
+            export TORCH_XPU_ARCH_LIST='pvc'
           fi
           if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
             PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
-            PYTORCH_VERSION="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
+            PYTORCH_COMMIT="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
           else
             PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
-            PYTORCH_VERSION="${{ inputs.pytorch }}"
+            PYTORCH_COMMIT="${{ inputs.pytorch }}"
           fi
-          if [[ "${{ inputs.keep_torch_xpu_ops }}" == *"https://"* ]];then
-            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.keep_torch_xpu_ops }} |sed 's/@.*//')"
-            TORCH_XPU_OPS_VERSION="$(echo ${{ inputs.keep_torch_xpu_ops }} |sed 's/.*@//')"
-          elif [ "${{ inputs.keep_torch_xpu_ops }}" == "true" ];then
-            TORCH_XPU_OPS_VERSION="pinned"
+          if [[ "${{ inputs.torch_xpu_ops }}" == *"https://"* ]];then
+            TORCH_XPU_OPS_REPO="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/@.*//')"
+            TORCH_XPU_OPS_COMMIT="$(echo ${{ inputs.torch_xpu_ops }} |sed 's/.*@//')"
           else
-            TORCH_XPU_OPS_VERSION="cicd"
+            TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
+            TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
-          # oneAPI DLE
-          source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
           # gcc 11
           source /opt/rh/gcc-toolset-11/enable
-          export USE_XCCL=1
+          # oneAPI DLE
+          if [ "${{ inputs.oneapi }}" != "installed" ];then
+            rm -rf ${HOME}/intel ${HOME}/.intel /opt/intel
+            wget -q -O oneapi.sh "${{ inputs.oneapi }}"
+            bash oneapi.sh -a -s --eula accept --action install --install-dir /opt/intel/oneapi
+            export XPU_ONEAPI_PATH="/opt/intel/oneapi"
+          fi
+          source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
             --WORKSPACE="${{ github.workspace }}" \
             --PYTORCH_REPO="${PYTORCH_REPO}" \
-            --PYTORCH_VERSION="${PYTORCH_VERSION}" \
+            --PYTORCH_COMMIT="${PYTORCH_COMMIT}" \
             --TORCH_XPU_OPS_REPO="${TORCH_XPU_OPS_REPO}" \
-            --TORCH_XPU_OPS_VERSION="${TORCH_XPU_OPS_VERSION}" \
-            2>&1 |tee ${{ github.workspace }}/pytorch_build_${PYTORCH_VERSION//\//-}.log
+            --TORCH_XPU_OPS_COMMIT="${TORCH_XPU_OPS_COMMIT}" \
+            2>&1 |tee ${{ github.workspace }}/build_pytorch_${PYTORCH_COMMIT//\//-}.log
+          if [ $(ls ${{ github.workspace }} |grep -c "torch-.*.whl") -eq 0 ];then
+            echo "Build pytorch got failed"
+            exit 1
+          fi
+      - name: Build Triton
+        run: |
+          # gcc 13
+          dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
+          source /opt/rh/gcc-toolset-13/enable
+          cd ./pytorch
+          pip install cmake ninja pybind11
+          rm -rf pytorch_triton_xpu-*.whl
+          if [ "${{ inputs.triton }}" != "pinned" ];then
+            TRITON_COMMIT_ID="${{ inputs.triton }}"
+          else
+            TRITON_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/triton-xpu.txt)"
+          fi
+          TRITON_VERSION_NAME="$(
+            curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
+                    grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
+          )"
+          python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME} \
+            2>&1 |tee ${{ github.workspace }}/build_triton_${TRITON_COMMIT_ID}.log
+          if [ $(ls |grep -c "pytorch_triton_xpu-.*.whl") -eq 0 ];then
+            echo "Build triton got failed"
+            exit 1
+          fi
+          pip install pytorch_triton_xpu-*.whl
+          cp pytorch_triton_xpu-*.whl ${{ github.workspace }}
+      - name: Build Torchvision and Torchaudio
+        run: |
+          # gcc 13
+          dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
+          source /opt/rh/gcc-toolset-13/enable
+          cd ./pytorch
+          TORCHVISION_COMMIT_ID="$(cat .github/ci_commit_pins/vision.txt)"
+          TORCHAUDIO_COMMIT_ID="$(cat .github/ci_commit_pins/audio.txt)"
+          git clone --single-branch -b main https://github.com/pytorch/vision.git xpu-vision
+          cd xpu-vision && git checkout ${TORCHVISION_COMMIT_ID}
+          python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_vision_${TRITON_COMMIT_ID}.log
+          if [ $(ls dist/ |grep -c "torchvision-.*.whl") -eq 0 ];then
+            echo "Build torchvision got failed"
+            exit 1
+          fi
+          pip install dist/*.whl
+          cp dist/*.whl ${{ github.workspace }}
+          git clone --single-branch -b main https://github.com/pytorch/audio.git xpu-audio
+          cd xpu-audio && git checkout ${TORCHAUDIO_COMMIT_ID}
+          python setup.py bdist_wheel 2>&1 |tee ${{ github.workspace }}/build_audio_${TRITON_COMMIT_ID}.log
+          if [ $(ls dist/ |grep -c "torchaudio-.*.whl") -eq 0 ];then
+            echo "Build torchaudio got failed"
+            exit 1
+          fi
+          pip install dist/*.whl
+          cp dist/*.whl ${{ github.workspace }}
       - name: Torch Config
         run: |
+          printenv
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
+          python -c "import triton; print(triton.__version__)"
+          python -c "import torchvision; print(torchvision.__version__)"
+          python -c "import torchaudio; print(torchaudio.__version__)"
           python pytorch/torch/utils/collect_env.py
-      - name: Identify Build version
-        id: build_version
-        run: |
-          echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}"
+          pip list |grep -E 'torch|intel'
+          chmod 777 /__w -R
       - name: Upload Torch XPU Wheel
-        if: ${{ ! cancelled() }}
+        if: ${{ success() }}
         uses: actions/upload-artifact@v4
         with:
           name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/torch*.whl
-      - name: Upload Triton Wheel
-        if: ${{ ! cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: Triton-Wheel-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/pytorch_triton_xpu-*.whl
+          path: ${{ github.workspace }}/*.whl
       - name: Upload Build Log
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4
         with:
           name: Torch-XPU-Build-Log-${{ github.event.pull_request.number || github.sha }}
-          path: ${{ github.workspace }}/pytorch_*.log
-      - name: Cleanup
-        if: always()
-        run: |
-          chmod 777 . -R
-          rm -rf pytorch torch-xpu-ops pytorch_*.log torch*.whl pytorch_triton_xpu-*.whl
+          path: ${{ github.workspace }}/build_*.log

From 72c4bb5101564f18b71f9ccaabb6aa4d3e91d99c Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 17:08:02 +0800
Subject: [PATCH 156/160] parallel 1 to skip crash only

---
 .github/actions/linux-uttest/action.yml | 4 ++--
 test/xpu/extended/run_test_with_skip.py | 2 +-
 test/xpu/run_test_with_only.py          | 6 +++---
 test/xpu/xpu_test_utils.py              | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 6d6b8ef606..90b7ac8523 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -22,7 +22,7 @@ runs:
       run: |
         mkdir -p ut_log/ut_regression
         cd pytorch/third_party/torch-xpu-ops/test/regressions
-        pytest --timeout 600 -n 4 --timeout_method=thread -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
+        pytest --timeout 600 -n 1 --timeout_method=thread -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
           2> ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test.log
     - name: ut_transformers
@@ -32,7 +32,7 @@ runs:
         export PYTORCH_TEST_WITH_SLOW=1
         mkdir -p ut_log/ut_transformers
         cd pytorch
-        pytest --timeout 600 -n 4 --timeout_method=thread -v test/test_transformers.py -k xpu \
+        pytest --timeout 600 -n 1 --timeout_method=thread -v test/test_transformers.py -k xpu \
           --junit-xml=${{ github.workspace }}/ut_log/ut_transformers.xml \
           2> ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test.log
diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index 17a8bbeb7a..e062885a6f 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -17,7 +17,7 @@
 skip_options += '"'
 
 os.environ["PYTORCH_TEST_WITH_SLOW"] = "1"
-test_command = "pytest --timeout 600 -n 4 -v --timeout_method=thread --junit-xml=./ut_extended.xml test_ops_xpu.py"
+test_command = "pytest --timeout 600 -n 1 -v --timeout_method=thread --junit-xml=./ut_extended.xml test_ops_xpu.py"
 test_command += skip_options
 res = os.system(test_command)
 sys.exit(res)
diff --git a/test/xpu/run_test_with_only.py b/test/xpu/run_test_with_only.py
index 52bbcc1ced..ca24d3925a 100644
--- a/test/xpu/run_test_with_only.py
+++ b/test/xpu/run_test_with_only.py
@@ -14,7 +14,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             skip_options += skip_option
         skip_options += '"'
         test_command = (
-            "pytest --timeout 600 -n 4 -v "
+            "pytest --timeout 600 -n 1 -v "
             + "--junit-xml=./ut_op_with_only.xml "
             + test_case
             + skip_options
@@ -27,7 +27,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             exe_options += exe_option
         exe_options += '"'
         test_command = (
-            "pytest --timeout 600 -n 4 -v "
+            "pytest --timeout 600 -n 1 -v "
             + "--junit-xml=./ut_op_with_only.xml "
             + test_case
             + exe_options
@@ -35,7 +35,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
         return os.system(test_command)
     else:
         test_command = (
-            "pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_only.xml "
+            "pytest --timeout 600 -n 1 -v --junit-xml=./ut_op_with_only.xml "
             + test_case
         )
         return os.system(test_command)
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index 967bc192a6..22b84a1683 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -1170,7 +1170,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             skip_options += skip_option
         skip_options += '"'
         test_command = (
-            f"pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -n 1 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
         test_command += skip_options
@@ -1181,13 +1181,13 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             exe_options += exe_option
         exe_options += '"'
         test_command = (
-            f"pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -n 1 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
         test_command += exe_options
     else:
         test_command = (
-            f"pytest --timeout 600 -n 4 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f"pytest --timeout 600 -n 1 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
     return os.system(test_command)

From d951abfa73b35c63df7af041de1de2587ef1e54a Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 17:09:02 +0800
Subject: [PATCH 157/160] install pytest-xdist

---
 .github/actions/linux-uttest/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 90b7ac8523..7d56412050 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -15,7 +15,7 @@ runs:
       shell: bash -xe {0}
       run: |
         pip install -r pytorch/.ci/docker/requirements-ci.txt
-        pip install -U pytest-timeout
+        pip install -U pytest-timeout pytest-xdist
     - name: ut_regression
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_regression' }}

From d459b6e97297fca7cc26e0de872027b25954bc2c Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 19:27:11 +0800
Subject: [PATCH 158/160] modify

---
 .github/actions/linux-uttest/action.yml | 24 ++++++++++++++++++------
 test/xpu/extended/run_test_with_skip.py | 10 +++++++++-
 test/xpu/run_test_with_only.py          | 17 ++++++++++++-----
 test/xpu/xpu_test_utils.py              | 14 +++++++++++---
 4 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 7d56412050..1f0c87f2a9 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -16,13 +16,25 @@ runs:
       run: |
         pip install -r pytorch/.ci/docker/requirements-ci.txt
         pip install -U pytest-timeout pytest-xdist
+        xpu_num=$(clinfo --list |awk 'BEGIN{gpu=0;}{if(gpu==1 && $0~/Platform/){gpu=0;}; if(gpu==1){print $0;}; if($0~/Platform.*Graphics/){gpu=1;}}' |wc -l)
+        parallel_options=""
+        if [ ${xpu_num} -gt 1 ];then
+          parallel_options+=" --dist worksteal "
+          for x in $(seq 0 $[ ${xpu_num} - 1 ])
+          do
+            parallel_options+=" --tx popen//env:ZE_AFFINITY_MASK=${x} "
+          done
+        else
+          parallel_options+=" -n 1 "
+        fi
+        echo " --timeout 600 --timeout_method=thread ${parallel_options} " > ${{ github.workspace }}/test-options.txt
     - name: ut_regression
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_regression' }}
       run: |
         mkdir -p ut_log/ut_regression
         cd pytorch/third_party/torch-xpu-ops/test/regressions
-        pytest --timeout 600 -n 1 --timeout_method=thread -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
+        pytest $(cat ${{ github.workspace }}/test-options.txt) -v --junit-xml=${{ github.workspace }}/ut_log/ut_regression.xml \
           2> ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_regression/ut_regression_test.log
     - name: ut_transformers
@@ -32,7 +44,7 @@ runs:
         export PYTORCH_TEST_WITH_SLOW=1
         mkdir -p ut_log/ut_transformers
         cd pytorch
-        pytest --timeout 600 -n 1 --timeout_method=thread -v test/test_transformers.py -k xpu \
+        pytest $(cat ${{ github.workspace }}/test-options.txt) -v test/test_transformers.py -k xpu \
           --junit-xml=${{ github.workspace }}/ut_log/ut_transformers.xml \
           2> ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test_error.log | \
           tee ${{ github.workspace }}/ut_log/ut_transformers/ut_transformers_test.log
@@ -128,13 +140,13 @@ runs:
 
         # All xpu ut under test/profiler
         cd ../../test/profiler
-        python -m pytest --timeout 600 -n 1 -vs test_cpp_thread.py | \
+        python -m pytest $(cat ${{ github.workspace }}/test-options.txt) -vs test_cpp_thread.py | \
           tee ${{ github.workspace }}/ut_log/xpu_profiling/test_cpp_thread.log
-        python -m pytest --timeout 600 -n 1 -vs test_execution_trace.py | \
+        python -m pytest $(cat ${{ github.workspace }}/test-options.txt) -vs test_execution_trace.py | \
           tee ${{ github.workspace }}/ut_log/xpu_profiling/test_execution_trace.log
-        python -m pytest --timeout 600 -n 1 -vs test_memory_profiler.py | \
+        python -m pytest $(cat ${{ github.workspace }}/test-options.txt) -vs test_memory_profiler.py | \
           tee ${{ github.workspace }}/ut_log/xpu_profiling/test_memory_profiler.log
-        python -m pytest --timeout 600 -n 1 -vs test_profiler_tree.py | \
+        python -m pytest $(cat ${{ github.workspace }}/test-options.txt) -vs test_profiler_tree.py | \
           tee ${{ github.workspace }}/ut_log/xpu_profiling/test_profiler_tree.log
 
     - name: xpu_dev1
diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index e062885a6f..eac4046155 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -1,5 +1,6 @@
 import os
 import sys
+import torch
 
 from skip_list_common import skip_dict
 from skip_list_win import skip_dict as skip_dict_win
@@ -16,8 +17,15 @@
     skip_options += skip_option
 skip_options += '"'
 
+# pytest options
+xpu_num = torch.xpu.device_count()
+parallel_options = ' --dist worksteal ' + \
+        ' '.join([f'--tx popen//env:ZE_AFFINITY_MASK={x}' for x in range(xpu_num)]) \
+        if xpu_num > 1 else ' -n 1 '
+test_options = f' --timeout 600 --timeout_method=thread {parallel_options} '
+
 os.environ["PYTORCH_TEST_WITH_SLOW"] = "1"
-test_command = "pytest --timeout 600 -n 1 -v --timeout_method=thread --junit-xml=./ut_extended.xml test_ops_xpu.py"
+test_command = f" pytest {test_options} -v --junit-xml=./ut_extended.xml test_ops_xpu.py "
 test_command += skip_options
 res = os.system(test_command)
 sys.exit(res)
diff --git a/test/xpu/run_test_with_only.py b/test/xpu/run_test_with_only.py
index ca24d3925a..e854b12447 100644
--- a/test/xpu/run_test_with_only.py
+++ b/test/xpu/run_test_with_only.py
@@ -1,5 +1,6 @@
 import os
 import sys
+import torch
 
 # Cases in the file is too slow to run all suites on CPU. So add white list.
 
@@ -7,6 +8,14 @@
 def launch_test(test_case, skip_list=None, exe_list=None):
     os.environ["PYTORCH_ENABLE_XPU_FALLBACK"] = "1"
     os.environ["PYTORCH_TEST_WITH_SLOW"] = "1"
+
+    # pytest options
+    xpu_num = torch.xpu.device_count()
+    parallel_options = ' --dist worksteal ' + \
+            ' '.join([f'--tx popen//env:ZE_AFFINITY_MASK={x}' for x in range(xpu_num)]) \
+            if xpu_num > 1 else ' -n 1 '
+    test_options = f' --timeout 600 --timeout_method=thread {parallel_options} '
+
     if skip_list is not None:
         skip_options = ' -k "not ' + skip_list[0]
         for skip_case in skip_list[1:]:
@@ -14,8 +23,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             skip_options += skip_option
         skip_options += '"'
         test_command = (
-            "pytest --timeout 600 -n 1 -v "
-            + "--junit-xml=./ut_op_with_only.xml "
+            f" pytest {test_options} -v --junit-xml=./ut_op_with_only.xml "
             + test_case
             + skip_options
         )
@@ -27,15 +35,14 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             exe_options += exe_option
         exe_options += '"'
         test_command = (
-            "pytest --timeout 600 -n 1 -v "
-            + "--junit-xml=./ut_op_with_only.xml "
+            f" pytest {test_options} -v --junit-xml=./ut_op_with_only.xml "
             + test_case
             + exe_options
         )
         return os.system(test_command)
     else:
         test_command = (
-            "pytest --timeout 600 -n 1 -v --junit-xml=./ut_op_with_only.xml "
+            f" pytest {test_options} -v --junit-xml=./ut_op_with_only.xml "
             + test_case
         )
         return os.system(test_command)
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index 22b84a1683..54a94a3fb4 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -1163,6 +1163,14 @@ def copy_tests(
 def launch_test(test_case, skip_list=None, exe_list=None):
     os.environ["PYTORCH_ENABLE_XPU_FALLBACK"] = "1"
     os.environ["PYTORCH_TEST_WITH_SLOW"] = "1"
+
+    # pytest options
+    xpu_num = torch.xpu.device_count()
+    parallel_options = ' --dist worksteal ' + \
+            ' '.join([f'--tx popen//env:ZE_AFFINITY_MASK={x}' for x in range(xpu_num)]) \
+            if xpu_num > 1 else ' -n 1 '
+    test_options = f' --timeout 600 --timeout_method=thread {parallel_options} '
+
     if skip_list is not None:
         skip_options = ' -k "not ' + skip_list[0]
         for skip_case in skip_list[1:]:
@@ -1170,7 +1178,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             skip_options += skip_option
         skip_options += '"'
         test_command = (
-            f"pytest --timeout 600 -n 1 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f" pytest {test_options} -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
         test_command += skip_options
@@ -1181,13 +1189,13 @@ def launch_test(test_case, skip_list=None, exe_list=None):
             exe_options += exe_option
         exe_options += '"'
         test_command = (
-            f"pytest --timeout 600 -n 1 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f" pytest {test_options} -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
         test_command += exe_options
     else:
         test_command = (
-            f"pytest --timeout 600 -n 1 -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
+            f" pytest {test_options} -v --junit-xml=./ut_op_with_skip_{test_case}.xml "
             + test_case
         )
     return os.system(test_command)

From 171cc610ba86130b1f459e62ebb6d0bf4333ec1f Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 19:28:06 +0800
Subject: [PATCH 159/160] modify

---
 .github/actions/linux-uttest/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 1f0c87f2a9..d522dc4691 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -27,7 +27,7 @@ runs:
         else
           parallel_options+=" -n 1 "
         fi
-        echo " --timeout 600 --timeout_method=thread ${parallel_options} " > ${{ github.workspace }}/test-options.txt
+        printf " --timeout 600 --timeout_method=thread ${parallel_options} " > ${{ github.workspace }}/test-options.txt
     - name: ut_regression
       shell: bash -xe {0}
       if: ${{ inputs.test_type == 'ut_regression' }}

From d50cf682c29e6b4357e0d34f49b807e574feebb6 Mon Sep 17 00:00:00 2001
From: mengfei25 <mengfei.li@intel.com>
Date: Thu, 14 Aug 2025 21:17:20 +0800
Subject: [PATCH 160/160] lint python

---
 test/xpu/extended/run_test_with_skip.py | 17 +++++++++++------
 test/xpu/run_test_with_only.py          | 15 +++++++++------
 test/xpu/xpu_test_utils.py              | 11 +++++++----
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index eac4046155..49f3be5876 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -1,7 +1,7 @@
 import os
 import sys
-import torch
 
+import torch
 from skip_list_common import skip_dict
 from skip_list_win import skip_dict as skip_dict_win
 
@@ -19,13 +19,18 @@
 
 # pytest options
 xpu_num = torch.xpu.device_count()
-parallel_options = ' --dist worksteal ' + \
-        ' '.join([f'--tx popen//env:ZE_AFFINITY_MASK={x}' for x in range(xpu_num)]) \
-        if xpu_num > 1 else ' -n 1 '
-test_options = f' --timeout 600 --timeout_method=thread {parallel_options} '
+parallel_options = (
+    " --dist worksteal "
+    + " ".join([f"--tx popen//env:ZE_AFFINITY_MASK={x}" for x in range(xpu_num)])
+    if xpu_num > 1
+    else " -n 1 "
+)
+test_options = f" --timeout 600 --timeout_method=thread {parallel_options} "
 
 os.environ["PYTORCH_TEST_WITH_SLOW"] = "1"
-test_command = f" pytest {test_options} -v --junit-xml=./ut_extended.xml test_ops_xpu.py "
+test_command = (
+    f" pytest {test_options} -v --junit-xml=./ut_extended.xml test_ops_xpu.py "
+)
 test_command += skip_options
 res = os.system(test_command)
 sys.exit(res)
diff --git a/test/xpu/run_test_with_only.py b/test/xpu/run_test_with_only.py
index e854b12447..06ebc87e8d 100644
--- a/test/xpu/run_test_with_only.py
+++ b/test/xpu/run_test_with_only.py
@@ -1,5 +1,6 @@
 import os
 import sys
+
 import torch
 
 # Cases in the file is too slow to run all suites on CPU. So add white list.
@@ -11,10 +12,13 @@ def launch_test(test_case, skip_list=None, exe_list=None):
 
     # pytest options
     xpu_num = torch.xpu.device_count()
-    parallel_options = ' --dist worksteal ' + \
-            ' '.join([f'--tx popen//env:ZE_AFFINITY_MASK={x}' for x in range(xpu_num)]) \
-            if xpu_num > 1 else ' -n 1 '
-    test_options = f' --timeout 600 --timeout_method=thread {parallel_options} '
+    parallel_options = (
+        " --dist worksteal "
+        + " ".join([f"--tx popen//env:ZE_AFFINITY_MASK={x}" for x in range(xpu_num)])
+        if xpu_num > 1
+        else " -n 1 "
+    )
+    test_options = f" --timeout 600 --timeout_method=thread {parallel_options} "
 
     if skip_list is not None:
         skip_options = ' -k "not ' + skip_list[0]
@@ -42,8 +46,7 @@ def launch_test(test_case, skip_list=None, exe_list=None):
         return os.system(test_command)
     else:
         test_command = (
-            f" pytest {test_options} -v --junit-xml=./ut_op_with_only.xml "
-            + test_case
+            f" pytest {test_options} -v --junit-xml=./ut_op_with_only.xml " + test_case
         )
         return os.system(test_command)
 
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index 54a94a3fb4..26c0152f71 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -1166,10 +1166,13 @@ def launch_test(test_case, skip_list=None, exe_list=None):
 
     # pytest options
     xpu_num = torch.xpu.device_count()
-    parallel_options = ' --dist worksteal ' + \
-            ' '.join([f'--tx popen//env:ZE_AFFINITY_MASK={x}' for x in range(xpu_num)]) \
-            if xpu_num > 1 else ' -n 1 '
-    test_options = f' --timeout 600 --timeout_method=thread {parallel_options} '
+    parallel_options = (
+        " --dist worksteal "
+        + " ".join([f"--tx popen//env:ZE_AFFINITY_MASK={x}" for x in range(xpu_num)])
+        if xpu_num > 1
+        else " -n 1 "
+    )
+    test_options = f" --timeout 600 --timeout_method=thread {parallel_options} "
 
     if skip_list is not None:
         skip_options = ' -k "not ' + skip_list[0]