|
| 1 | +name: Bisect Search |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_dispatch: |
| 5 | + inputs: |
| 6 | + runner: |
| 7 | + required: true |
| 8 | + type: string |
| 9 | + default: 'pvc_rolling' |
| 10 | + description: Test node |
| 11 | + search_commits: |
| 12 | + required: true |
| 13 | + type: string |
| 14 | + default: '' |
| 15 | + description: Target commits, such as 'pytorch=old/new,xpu-ops=old/new' |
| 16 | + search_check: |
| 17 | + type: string |
| 18 | + default: '' |
| 19 | + description: Test case type, 'performance, accuracy, <ut_regressions/ut_extended/ut_xpu> or others' |
| 20 | + search_case: |
| 21 | + required: true |
| 22 | + type: string |
| 23 | + default: '' |
| 24 | + description: Test case, such as 'python xxx.py or pytest -k xxx' |
| 25 | + search_criteria: |
| 26 | + type: string |
| 27 | + default: '0.1' |
| 28 | + description: Criteria for performance check, default is 10% |
| 29 | + oneapi: |
| 30 | + type: string |
| 31 | + default: '2025.1.3' |
| 32 | + description: Installed oneAPI DLE on host by default, fill offline.sh url if needed |
| 33 | + python: |
| 34 | + type: string |
| 35 | + default: '3.10' |
| 36 | + description: Python version |
| 37 | + |
| 38 | +permissions: read-all |
| 39 | + |
| 40 | +jobs: |
| 41 | + get_runner: |
| 42 | + runs-on: ${{ inputs.runner }} |
| 43 | + outputs: |
| 44 | + test_host: ${{ steps.runner-info.outputs.test_host }} |
| 45 | + test_user: ${{ steps.runner-info.outputs.test_user }} |
| 46 | + test_group: ${{ steps.runner-info.outputs.test_group }} |
| 47 | + steps: |
| 48 | + - name: Get runner info |
| 49 | + id: runner-info |
| 50 | + run: | |
| 51 | + # get test runner |
| 52 | + echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT} |
| 53 | + echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT} |
| 54 | + echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT} |
| 55 | + # show host info |
| 56 | + cat /etc/os-release |
| 57 | + uname -a |
| 58 | + source /opt/intel/oneapi/setvars.sh |
| 59 | + sycl-ls |
| 60 | + dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev' |
| 61 | + - name: Cleanup workspace |
| 62 | + if: ${{ always() }} |
| 63 | + run: | |
| 64 | + # clean docker cache |
| 65 | + docker stop $(docker ps -aq) || true |
| 66 | + docker system prune -af || true |
| 67 | + # clean files |
| 68 | + ls -al |
| 69 | + sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf |
| 70 | +
|
| 71 | + biisect-search: |
| 72 | + needs: get_runner |
| 73 | + runs-on: ${{ needs.get_runner.outputs.test_host }} |
| 74 | + container: |
| 75 | + image: mengfeili/intel-pvc-driver:1146-1136 |
| 76 | + volumes: |
| 77 | + - ${{ github.workspace }}:${{ github.workspace }} |
| 78 | + options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g |
| 79 | + -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }} |
| 80 | + env: |
| 81 | + AGENT_TOOLSDIRECTORY: /tmp/_tools |
| 82 | + HF_HOME: /tmp/.cache/huggingface |
| 83 | + TORCH_HOME: /tmp/.cache/torch |
| 84 | + GH_TOKEN: ${{ github.token }} |
| 85 | + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} |
| 86 | + SEARCH_COMMITS: ${{ inputs.search_commits }} |
| 87 | + SEARCH_CHECK: ${{ inputs.search_check }} |
| 88 | + SEARCH_CASE: ${{ inputs.search_case }} |
| 89 | + SEARCH_CRITERIA: ${{ inputs.search_criteria }} |
| 90 | + TORCH_XPU_ARCH_LIST: pvc |
| 91 | + USE_XCCL: 0 |
| 92 | + USE_KINETO: 0 |
| 93 | + defaults: |
| 94 | + run: |
| 95 | + shell: bash -xe {0} |
| 96 | + steps: |
| 97 | + - name: Check runner |
| 98 | + run: | |
| 99 | + ls -al |
| 100 | + sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf |
| 101 | + sudo rm -rf /tmp/_tools |
| 102 | + - name: Setup python-${{ inputs.python }} |
| 103 | + uses: actions/setup-python@v5 |
| 104 | + with: |
| 105 | + python-version: ${{ inputs.python }} |
| 106 | + - name: Check runner |
| 107 | + run: | |
| 108 | + hostname && whoami && id |
| 109 | + clinfo --list |
| 110 | + gcc -v && g++ -v |
| 111 | + which python && which pip |
| 112 | + python -V |
| 113 | + pip install -U pip wheel setuptools |
| 114 | + pip list |
| 115 | + uname -a |
| 116 | + dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev' |
| 117 | + pip install cmake ninja pandas psutil scipy requests pybind11 |
| 118 | + mkdir gs-logs gs-search |
| 119 | + echo "Status,Acc,Perf,PyTorch,Torch-xpu-ops" > gs-logs/summary.csv |
| 120 | + - name: Install oneAPI DLE |
| 121 | + if: ${{ inputs.oneapi != 'installed' }} |
| 122 | + run: | |
| 123 | + rm -rf ~/intel ~/.intel /tmp/intel |
| 124 | + if [ "${{ inputs.oneapi }}" == "2025.1.3" ];then |
| 125 | + ONEAPI_URL="https://registrationcenter-download.intel.com/akdlm/IRC_NAS/3435dc45-055e-4f7a-86b1-779931772404/intel-deep-learning-essentials-2025.1.3.7_offline.sh" |
| 126 | + elif [ "${{ inputs.oneapi }}" == "2025.2.0" ];then |
| 127 | + ONEAPI_URL="https://registrationcenter-download.intel.com/akdlm/IRC_NAS/49d38360-b403-4b06-9104-86fa8d886e6d/intel-deep-learning-essentials-2025.2.0.558_offline.sh" |
| 128 | + else |
| 129 | + ONEAPI_URL="${{ inputs.oneapi }}" |
| 130 | + fi |
| 131 | + wget -q -O oneapi.sh "${ONEAPI_URL}" |
| 132 | + bash oneapi.sh -a -s --eula accept --action install --install-dir /tmp/intel/oneapi |
| 133 | + echo "XPU_ONEAPI_PATH=/tmp/intel/oneapi" >> ${GITHUB_ENV} |
| 134 | + - name: Checkout torch-xpu-ops |
| 135 | + uses: actions/checkout@v4 |
| 136 | + with: |
| 137 | + path: gs-scripts |
| 138 | + - name: Prepare source code |
| 139 | + run: | |
| 140 | + git clone https://github.com/pytorch/pytorch gs-pytorch |
| 141 | + cd gs-pytorch |
| 142 | + LATEST_PT_COMMIT="$(git rev-parse HEAD)" |
| 143 | + cd .. |
| 144 | + git clone https://github.com/intel/torch-xpu-ops gs-torch-xpu-ops |
| 145 | + cd gs-torch-xpu-ops |
| 146 | + LATEST_XPU_COMMIT="$(git rev-parse HEAD)" |
| 147 | + cd .. |
| 148 | + echo "LATEST_PT_COMMIT=${LATEST_PT_COMMIT}" >> ${GITHUB_ENV} |
| 149 | + echo "LATEST_XPU_COMMIT=${LATEST_XPU_COMMIT}" >> ${GITHUB_ENV} |
| 150 | + - name: Prepare test env |
| 151 | + run: | |
| 152 | + pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu |
| 153 | + if [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/huggingface.py"* ]];then |
| 154 | + pip install transformers==4.44.2 |
| 155 | + elif [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/timm_models.py"* ]];then |
| 156 | + pip install --no-deps git+https://github.com/huggingface/[email protected] |
| 157 | + pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch) |
| 158 | + elif [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/torchbench.py"* ]];then |
| 159 | + model_name="$(echo ${{ inputs.search_case }} |sed 's+.*\--only *++;s/ .*//')" |
| 160 | + git clone https://github.com/pytorch/benchmark gs-benchmark |
| 161 | + cd gs-benchmark |
| 162 | + echo "PYTHONPATH=${PWD}:${PYTHONPATH}" >> ${GITHUB_ENV} |
| 163 | + python install.py ${model_name} |
| 164 | + else |
| 165 | + pip install -r gs-pytorch/.ci/docker/requirements-ci.txt |
| 166 | + fi |
| 167 | + pip uninstall -y torch && pip uninstall -y torch |
| 168 | + - name: Bisect search pytorch |
| 169 | + if: ${{ contains(inputs.search_commits, 'pytorch') }} |
| 170 | + run: | |
| 171 | + pytorch_commits="$(echo ${{ inputs.search_commits }} |sed 's+.*pytorch=++;s+,.*++')" |
| 172 | + old_commit="$(echo ${pytorch_commits} |awk -F '/' '{print $1}')" |
| 173 | + new_commit="$(echo ${pytorch_commits} |awk -F '/' '{print $2}')" |
| 174 | + old_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ |
| 175 | + --WORKSPACE="${{ github.workspace }}/gs-search" \ |
| 176 | + --PYTORCH_VERSION="${old_commit}" \ |
| 177 | + --TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \ |
| 178 | + > ${{ github.workspace }}/gs-logs/search-${old_commit}-${LATEST_XPU_COMMIT}.log 2>&1 && echo $? || echo $?)" |
| 179 | + old_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)" |
| 180 | + export SEARCH_GOOD_VALUE="$(echo ${old_result} |awk -F, '{print $3}')" |
| 181 | + new_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ |
| 182 | + --WORKSPACE="${{ github.workspace }}/gs-search" \ |
| 183 | + --PYTORCH_VERSION="${new_commit}" \ |
| 184 | + --TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \ |
| 185 | + > ${{ github.workspace }}/gs-logs/search-${new_commit}-${LATEST_XPU_COMMIT}.log 2>&1 && echo $? || echo $?)" |
| 186 | + new_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)" |
| 187 | + if [ "${old_status}" != "${new_status}" ];then |
| 188 | + cd gs-pytorch |
| 189 | + git reset --hard |
| 190 | + rsync -avz --delete ${{ github.workspace }}/gs-scripts/ gs-scripts/ |
| 191 | + git bisect start ${new_commit} ${old_commit} |
| 192 | + git bisect run ./gs-scripts/.github/scripts/bisect_search.sh \ |
| 193 | + --WORKSPACE="${{ github.workspace }}/gs-search" \ |
| 194 | + --PYTORCH_VERSION="search" \ |
| 195 | + --TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \ |
| 196 | + 2>&1 |tee ${{ github.workspace }}/gs-logs/bisect-pytorch.log |
| 197 | + git bisect log |tee ${{ github.workspace }}/gs-logs/result-pytorch.log |
| 198 | + else |
| 199 | + echo "Checked and no regression !" |
| 200 | + fi |
| 201 | + - name: Bisect search torch-xpu-ops |
| 202 | + if: ${{ contains(inputs.search_commits, 'xpu-ops') }} |
| 203 | + run: | |
| 204 | + xpu_ops_commits="$(echo ${{ inputs.search_commits }} |sed 's+.*xpu-ops=++;s+,.*++')" |
| 205 | + old_commit="$(echo ${xpu_ops_commits} |awk -F '/' '{print $1}')" |
| 206 | + new_commit="$(echo ${xpu_ops_commits} |awk -F '/' '{print $2}')" |
| 207 | + old_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ |
| 208 | + --WORKSPACE="${{ github.workspace }}/gs-search" \ |
| 209 | + --PYTORCH_VERSION="${LATEST_PT_COMMIT}" \ |
| 210 | + --TORCH_XPU_OPS_VERSION="${old_commit}" \ |
| 211 | + > ${{ github.workspace }}/gs-logs/search-${LATEST_PT_COMMIT}-${old_commit}.log && echo $? || echo $?)" |
| 212 | + old_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)" |
| 213 | + export SEARCH_GOOD_VALUE="$(echo ${old_result} |awk -F, '{print $3}')" |
| 214 | + new_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ |
| 215 | + --WORKSPACE="${{ github.workspace }}/gs-search" \ |
| 216 | + --PYTORCH_VERSION="${LATEST_PT_COMMIT}" \ |
| 217 | + --TORCH_XPU_OPS_VERSION="${new_commit}" \ |
| 218 | + > ${{ github.workspace }}/gs-logs/search-${LATEST_PT_COMMIT}-${new_commit}.log && echo $? || echo $?)" |
| 219 | + new_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)" |
| 220 | + if [ "${old_status}" != "${new_status}" ];then |
| 221 | + cd gs-pytorch |
| 222 | + git reset --hard |
| 223 | + rsync -avz --delete ${{ github.workspace }}/gs-scripts/ gs-scripts/ |
| 224 | + git bisect start ${new_commit} ${old_commit} |
| 225 | + git bisect run ./gs-scripts/.github/scripts/bisect_search.sh \ |
| 226 | + --WORKSPACE="${{ github.workspace }}/gs-search" \ |
| 227 | + --PYTORCH_VERSION="${LATEST_PT_COMMIT}" \ |
| 228 | + --TORCH_XPU_OPS_VERSION="search" \ |
| 229 | + 2>&1 |tee ${{ github.workspace }}/gs-logs/bisect-torch-xpu-ops.log |
| 230 | + git bisect log |tee ${{ github.workspace }}/gs-logs/result-torch-xpu-ops.log |
| 231 | + else |
| 232 | + echo "Checked and no regression !" |
| 233 | + fi |
| 234 | + - name: Summary |
| 235 | + run: | |
| 236 | + cat gs-logs/summary.csv |tee -a ${GITHUB_STEP_SUMMARY} |
| 237 | + for reulst_log in $(find gs-logs -name "result-*.log") |
| 238 | + do |
| 239 | + echo -e "\n\n\n${reulst_log}" |tee -a ${GITHUB_STEP_SUMMARY} |
| 240 | + cat ${reulst_log} |tee -a ${GITHUB_STEP_SUMMARY} |
| 241 | + done |
| 242 | + - name: Upload Logs |
| 243 | + if: ${{ ! cancelled() }} |
| 244 | + uses: actions/upload-artifact@v4 |
| 245 | + with: |
| 246 | + name: bisect-search |
| 247 | + path: ${{ github.workspace }}/gs-logs |
0 commit comments