diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py index c9afb73eb..facd6bbcf 100644 --- a/.github/scripts/check-ut.py +++ b/.github/scripts/check-ut.py @@ -192,8 +192,12 @@ def determine_category(ut): return 'op_regression_dev1' elif ut == 'op_extended': return 'op_extended' + elif ut == 'op_transformers': + return 'op_transformers' elif 'op_ut' in ut: return 'op_ut' + elif 'inductor_' in ut: + return 'inductor' else: return 'unknown' @@ -211,9 +215,12 @@ def process_xml_file(xml_file): category = determine_category(ut) for suite in xml: + ut_name = f"{ut.split('-')[0]}_rerun" if suite.tests == 1 else ut.split('-')[0] + if "rerun" in ut_name: + continue suite_summary = { 'Category': category, - 'UT': ut, + 'UT': ut_name, 'Test cases': suite.tests, 'Passed': suite.tests - suite.skipped - suite.failures - suite.errors, 'Skipped': suite.skipped, @@ -245,6 +252,8 @@ def print_summary(): } for summary in summaries: + if summary['Test cases'] == 0: + continue print_md_row({ 'Category': summary['Category'], 'UT': summary['UT'], diff --git a/.github/scripts/ut_result_check.sh b/.github/scripts/ut_result_check.sh index 52baa15dd..3cd2d7416 100644 --- a/.github/scripts/ut_result_check.sh +++ b/.github/scripts/ut_result_check.sh @@ -190,29 +190,25 @@ if [[ "${ut_suite}" == 'op_ut' ]]; then echo -e "[PASS] UT ${ut_suite} test Pass" fi fi -if [[ "${ut_suite}" == 'torch_xpu' ]]; then - echo "Pytorch XPU binary UT checking" - cd ../../pytorch || exit - for xpu_case in build/bin/*{xpu,sycl}*; do - if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then - case_name=$(basename "$xpu_case") - cd ../ut_log/torch_xpu || exit - grep -E "FAILED|have failures" binary_ut_"${ut_suite}"_"${case_name}"_test.log | awk '{print $2}' > ./binary_ut_"${ut_suite}"_"${case_name}"_failed.log - wc -l < "./binary_ut_${ut_suite}_${case_name}_failed.log" | tee -a ./binary_ut_"${ut_suite}"_failed_summary.log - grep -E "PASSED|Pass" binary_ut_"${ut_suite}"_"${case_name}"_test.log | awk '{print $2}' > ./binary_ut_"${ut_suite}"_"${case_name}"_passed.log - wc -l < "./binary_ut_${ut_suite}_${case_name}_passed.log" | tee -a ./binary_ut_"${ut_suite}"_passed_summary.log - cd - || exit - fi - done +if [[ "${ut_suite}" =~ ^torch_xpu_[123]$ ]]; then + grep -E "FAILED" inductor_test*.log | awk '{print $3}' | grep -v "/inductor" | awk '!seen[$0]++' | grep '^[a-zA-Z]' > ./"${ut_suite}"_failed.log + grep -E "have failures" inductor_test*.log | awk '{print $1}' >> ./"${ut_suite}"_failed.log + grep "PASSED" inductor_test*.log | awk '{print $1}' > ./"${ut_suite}"_passed.log echo -e "=========================================================================" echo -e "Show Failed cases in ${ut_suite}" echo -e "=========================================================================" - cd ../ut_log/torch_xpu || exit - cat "./binary_ut_${ut_suite}_${case_name}_failed.log" - num_failed_binary_ut=$(awk '{sum += $1};END {print sum}' binary_ut_"${ut_suite}"_failed_summary.log) - num_passed_binary_ut=$(awk '{sum += $1};END {print sum}' binary_ut_"${ut_suite}"_passed_summary.log) - ((num_failed=num_failed_binary_ut)) - if [[ $num_failed -gt 0 ]] || [[ $num_passed_binary_ut -le 0 ]]; then + cat "./${ut_suite}_failed.log" + echo -e "=========================================================================" + echo -e "Checking Failed cases in ${ut_suite}" + echo -e "=========================================================================" + compare_and_filter_logs "${ut_suite}"_failed.log Known_issue.log + if [[ -f "${ut_suite}_failed_filtered.log" ]]; then + num_failed=$(wc -l < "./${ut_suite}_failed_filtered.log") + else + num_failed=$(wc -l < "./${ut_suite}_failed.log") + fi + num_passed=$(wc -l < "./${ut_suite}_passed.log") + if [[ $num_failed -gt 0 ]] || [[ $num_passed -le 0 ]]; then echo -e "[ERROR] UT ${ut_suite} test Fail" exit 1 else diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 07c83ea14..78b1fca86 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -43,6 +43,11 @@ on: type: string default: 'lts' description: Driver lts/rolling + keep_going: + required: false + type: string + default: 'false' + description: Define the Inductor UT test mechanism permissions: read-all @@ -50,7 +55,7 @@ jobs: ut_test: runs-on: ${{ matrix.test.runner || inputs.runner }} if: ${{ inputs.ut != 'xpu_distributed' && !contains(inputs.disabled_tests, 'disable_ut') }} - timeout-minutes: 300 + timeout-minutes: 450 env: GH_TOKEN: ${{ github.token }} NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} @@ -130,19 +135,31 @@ jobs: cp op_ut_with_only.xml $GITHUB_WORKSPACE/ut_log additional_steps: | pip install pytest pytest-timeout - - name: 'torch_xpu' + - name: 'torch_xpu_1' condition: ${{ contains(inputs.ut, 'torch_xpu') }} directory: '../pytorch' - command_script: | - export PYTORCH_TEST_WITH_SLOW=1 - export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu" - test_cmd="python test/run_test.py --include " - for test in $(ls test/inductor | grep test); do test_cmd="${test_cmd} inductor/$test"; done - for test in $(ls test/xpu | grep test); do test_cmd="${test_cmd} xpu/$test"; done - if [ -f "test/test_xpu.py" ]; then test_cmd="${test_cmd} test_xpu.py"; fi - eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test_error.log | \ - tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log - log_prefix: 'torch_xpu' + test_files: + - 'test_codecache.py' + - 'test_kernel_benchmark.py' + - 'test_max_autotune.py' + - 'test_mkldnn_pattern_matcher.py' + - 'test_triton_kernels.py' + - 'test_torchinductor.py' + - 'test_compiled_optimizers.py' + - 'test_compiled_autograd.py' + - 'test_compile_subprocess.py' + additional_steps: | + pip install pytest pytest-timeout + - name: 'torch_xpu_2' + condition: ${{ contains(inputs.ut, 'torch_xpu') }} + directory: '../pytorch' + test_files: 'test_torchinductor_opinfo.py' + additional_steps: | + pip install pytest pytest-timeout + - name: 'torch_xpu_3' + condition: ${{ contains(inputs.ut, 'torch_xpu') }} + directory: '../pytorch' + test_files: 'test_aot_inductor.py' additional_steps: | pip install pytest pytest-timeout - name: 'xpu_profiling' @@ -285,11 +302,59 @@ jobs: cd ${{ matrix.test.directory }} - if [[ "${{ matrix.test.name }}" == "op_ut" ]] || [[ "${{ matrix.test.name }}" == "xpu_profiling" ]] || [[ "${{ matrix.test.name }}" == "torch_xpu" ]]; then + if [[ "${{ matrix.test.name }}" =~ ^(op_ut|xpu_profiling)$ ]]; then bash << "SCRIPT" set -e ${{ matrix.test.command_script }} SCRIPT + elif [[ "${{ matrix.test.name }}" =~ ^(torch_xpu_[123])$ ]]; then + keep_going_flag="" + [[ "${{ inputs.keep_going }}" == "true" ]] && keep_going_flag="--keep-going" + export PYTORCH_TEST_WITH_SLOW=1 + export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu" + printf "%s\n" ${{ join(matrix.test.test_files, ' ') }} > test_files.txt + cat test_files.txt + for line in $(cat test_files.txt) + do + echo "=== Starting test: ${line} ===" + start=$(date +%s) + python test/run_test.py $keep_going_flag --xpu --include inductor/${line} 2>${{ github.workspace }}/ut_log/${{ matrix.test.name }}/torch_xpu_${line}_test_error.log | \ + tee ${{ github.workspace }}/ut_log/${{ matrix.test.name }}/torch_xpu_${line}_test.log + end=$(date +%s) + echo -e "${line} duration: $((end - start))s" + echo "=== Finished test: ${line} ===" + done + cp -r test/test-reports/*.log "${{ github.workspace }}/ut_log/${{ matrix.test.name }}/" + function read_dir(){ + for file in `ls $1` + do + if [ -d $1"/"$file ] + then + cp $1"/"$file"/"*.xml ${{ github.workspace }}/ut_log/ + else + echo "[Warning] $file has no xml" + fi + done + } + read_dir "test/test-reports/python-pytest" + function rename_inductor_files() { + local target_dir="${1:-.}" + if [ ! -d "$target_dir" ]; then + echo "Error: Directory '$target_dir' does not exist" >&2 + return 1 + fi + find "$target_dir" -name 'inductor.*' -exec bash -c ' + for file; do + newfile=$(echo "$file" | sed "s/inductor\./inductor_/") + if [ "$file" != "$newfile" ]; then + echo "Renaming: $file -> $newfile" + mv -v "$file" "$newfile" + fi + done + ' bash {} + + } + rename_inductor_files "${{ github.workspace }}/ut_log/${{ matrix.test.name }}" + rename_inductor_files "${{ github.workspace }}/ut_log/" else ${{ matrix.test.command }} \ 2>${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test_error.log | \ @@ -351,7 +416,11 @@ jobs: condition: ${{ contains(inputs.ut, 'op_extended') }} - name: 'op_ut' condition: ${{ contains(inputs.ut, 'op_ut') }} - - name: 'torch_xpu' + - name: 'torch_xpu_1' + condition: ${{ contains(inputs.ut, 'torch_xpu') }} + - name: 'torch_xpu_2' + condition: ${{ contains(inputs.ut, 'torch_xpu') }} + - name: 'torch_xpu_3' condition: ${{ contains(inputs.ut, 'torch_xpu') }} - name: 'xpu_profiling' condition: ${{ inputs.driver == 'rolling' && contains(inputs.ut, 'xpu_profiling') }} diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 25c3af024..f727f2a5a 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -84,11 +84,12 @@ jobs: uses: ./.github/workflows/_linux_ut.yml with: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} - ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut' || inputs.ut }} + ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,torch_xpu' || inputs.ut }} pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu + keep_going: true Linux-Nightly-Ondemand-E2E-Tests: runs-on: pvc_e2e