[Nightly] Modify nightly test workflows (#1752)

mengfei25 · web-flow · commit ea48cc4eae41 · 2025-06-19T01:24:59.000Z
1. fix windows ut issue
2. modify torchbench installation to reduce reinstalling torch
3. remove merged pr in apply_torch_pr.py
4. remove DLE source to use pypi packages _linux_op_benchmark.yml
disable_ut
disable_distributed
disable_windows

Co-authored-by: mengfeil &lt;test&gt;
diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml
@@ -62,13 +62,19 @@ runs:
           fi
           cd ../ && python -c "import torch, torchvision, torchaudio"
           rm -rf benchmark && git clone https://github.com/pytorch/benchmark.git
-          cd benchmark && git checkout $TORCHBENCH_COMMIT_ID && pip install --no-deps -r requirements.txt
+          cd benchmark && git checkout $TORCHBENCH_COMMIT_ID
+          # remove deps which will reinstall torch
+          pip install --no-deps accelerate
+          pip install --no-deps $(cat requirements.txt |grep 'pytorch-image-models')
+          timm_commit="$(grep 'pytorch-image-models' requirements.txt  |awk -F '@' '{print $2}')"
+          pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/${timm_commit:-"main"}/requirements.txt | grep -vE torch)
+          sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g'  requirements.txt
+          pip install -r requirements.txt
           python install.py --continue_on_fail
           # deps for torchrec_dlrm
           pip install pyre_extensions
           pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
-          pip install torchmetrics==1.0.3
-          pip install torchrec --no-deps --index-url https://download.pytorch.org/whl/nightly/cpu
+          pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
         fi
         if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then
           pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_VERSION}
@@ -127,7 +133,7 @@ runs:
                 contains "accuracy,performance" $scenario
                 $contains_status
                 if [ "${MODEL_ONLY_NAME}" == "" ];then
-                  xpu_list=($(xpu-smi discovery |grep 'DRM Device: /dev/' |sed 's/.*card//;s/[^0-9].*//' |awk '{print $1 - 1":"NR - 1}'))
+                  xpu_list=($(xpu-smi discovery |grep 'DRM Device: /dev/' |sed 's/.*card//;s/[^0-9].*//' |awk '{if($1==0){print $1":"NR - 1}else{print $1 - 1":"NR - 1}}'))
                   for xpu_id in ${xpu_list[*]}
                   do
                     bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id/:*} static ${#xpu_list[*]} ${xpu_id/*:} &
diff --git a/.github/scripts/apply_torch_pr.py b/.github/scripts/apply_torch_pr.py
@@ -13,9 +13,7 @@
         "https://github.com/pytorch/pytorch/pull/126516",
         # Modify the tolerance level in TIMM benchmark
         "https://github.com/pytorch/pytorch/pull/143739",
-        # Allow XPU device for validating the arguments to sparse compressed tensor factory functions
-        "https://github.com/pytorch/pytorch/pull/147306",
-        "Enhance testing infrastructure to add half-precision support for histc on XPU"
+        # "Enhance testing infrastructure to add half-precision support for histc on XPU"
         "https://github.com/pytorch/pytorch/pull/154339",
     ]
 )
diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml
@@ -79,7 +79,6 @@ jobs:
       - name: Install Pytorch XPU
         run: |
           source activate xpu_op_${ZE_AFFINITY_MASK}
-          source .github/scripts/env.sh ${{ inputs.pytorch }}
           if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
             cd ../pytorch
             export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
@@ -94,7 +93,6 @@ jobs:
       - name: Torch Config
         run: |
           source activate xpu_op_${ZE_AFFINITY_MASK}
-          source .github/scripts/env.sh ${{ inputs.pytorch }}
           python -c "import torch; print(torch.__config__.show())"
           python -c "import torch; print(torch.__config__.parallel_info())"
           python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
@@ -106,7 +104,6 @@ jobs:
       - name: Run Torch XPU Op Benchmark
         if: ${{ inputs.driver == 'rolling' }} 
         run: |
-          source .github/scripts/env.sh ${{ inputs.pytorch }}
           source activate xpu_op_${ZE_AFFINITY_MASK}
           mkdir -p ${{ github.workspace }}/op_benchmark
           cd test/microbench
diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml
@@ -313,7 +313,7 @@ jobs:
 
   report:
     needs: tests
-    if: "always()"
+    if: ${{ always() }}
     runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
     steps:
       - name: Download reports
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
@@ -361,8 +361,8 @@ jobs:
       keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
       ut: ${{ github.event_name == 'schedule' && 'op_extended,torch_xpu' || inputs.ut }}
       python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
-      files-changed: false
-      has-label: true
+      src_changed: false
+      has_label: true
       runner: Windows_CI
 
   Tests-Failure-And-Report:

Original file line number	Diff line number	Diff line change
`@@ -13,9 +13,7 @@`
`13`	`13`	`"https://github.com/pytorch/pytorch/pull/126516",`
`14`	`14`	`# Modify the tolerance level in TIMM benchmark`
`15`	`15`	`"https://github.com/pytorch/pytorch/pull/143739",`
`16`		`- # Allow XPU device for validating the arguments to sparse compressed tensor factory functions`
`17`		`- "https://github.com/pytorch/pytorch/pull/147306",`
`18`		`- "Enhance testing infrastructure to add half-precision support for histc on XPU"`
	`16`	`+ # "Enhance testing infrastructure to add half-precision support for histc on XPU"`
`19`	`17`	`"https://github.com/pytorch/pytorch/pull/154339",`
`20`	`18`	`]`
`21`	`19`	`)`