Skip to content

Commit 1f7a57f

Browse files
[CI] Enhance the UT results check (#1876)
- Print the new failed cases after filtered by known issue list. - Enhance the timeout check mechanism, which helps capture the case with hang issue - Add UT total number summary in action summary --------- Co-authored-by: libohao1201 <[email protected]>
1 parent 0bc482b commit 1f7a57f

File tree

4 files changed

+105
-30
lines changed

4 files changed

+105
-30
lines changed

.github/scripts/check-ut.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,17 @@ def print_summary():
233233
print("### Results Summary")
234234
print_header = True
235235

236+
totals = {
237+
'Category': '**Total**',
238+
'UT': '',
239+
'Test cases': 0,
240+
'Passed': 0,
241+
'Skipped': 0,
242+
'Failures': 0,
243+
'Errors': 0,
244+
'Source': ''
245+
}
246+
236247
for summary in summaries:
237248
print_md_row({
238249
'Category': summary['Category'],
@@ -246,6 +257,14 @@ def print_summary():
246257
}, print_header)
247258
print_header = False
248259

260+
totals['Test cases'] += summary['Test cases']
261+
totals['Passed'] += summary['Passed']
262+
totals['Skipped'] += summary['Skipped']
263+
totals['Failures'] += summary['Failures']
264+
totals['Errors'] += summary['Errors']
265+
266+
print_md_row(totals)
267+
249268
def main():
250269
for input_file in args.input_files:
251270
if input_file.endswith('.log'):

.github/scripts/ut_result_check.sh

Lines changed: 72 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ compare_and_filter_logs() {
99
local file_known_issue="$2"
1010
local output_file="${3:-${file_UT%.*}_filtered.log}"
1111
local filtered_content="${file_UT%.*}_removed.log"
12+
local temp_file="temp_parts.log"
13+
local temp_file_labeled="temp_parts_labeled.log"
14+
local temp_output="${3:-${file_UT%.*}_filtered_temp.log}"
15+
local temp_final="${file_UT%.*}_final_temp.log"
1216

1317
if [[ $# -lt 2 ]]; then
1418
echo "[ERROR] Need 2 files to compare"
@@ -21,7 +25,6 @@ compare_and_filter_logs() {
2125
echo "$file_UT contains $file_known_issue"
2226
else
2327
echo "$file_UT does not contain $file_known_issue"
24-
return 1
2528
fi
2629

2730
# Filter the same content from file_UT as file_known_issue
@@ -31,34 +34,73 @@ compare_and_filter_logs() {
3134
# Keep the filtered UT cases
3235
grep -noFf "$file_known_issue" "$file_UT" > "$filtered_content"
3336
echo "Filtered cases file: $filtered_content"
37+
true > "$temp_file"
38+
true > "$temp_file_labeled"
39+
true > "$temp_output"
40+
true > "$temp_final"
41+
grep -E '\.py$|,' "$output_file" > "$temp_output"
42+
while IFS= read -r line; do
43+
IFS=',' read -ra parts <<< "$line"
44+
for part in "${parts[@]}"; do
45+
part_trimmed=$(echo "$part" | xargs)
46+
if [[ -n "$part_trimmed" ]] && ! grep -qF "$part_trimmed" "$file_known_issue"; then
47+
echo "$part_trimmed" >> "$temp_file"
48+
echo -e "\n\033[1;33m[Check the failed cases in summary line]\033[0m"
49+
echo -e "\033[1;33mCase not found in ${file_known_issue}: '${part_trimmed}' (from line: '${line}')\033[0m"
50+
else
51+
echo -e "\n\033[1;33m[Check the failed cases in summary line]\033[0m"
52+
echo -e "\n\033[1;32m${part_trimmed} found in ${file_known_issue} (from line: '${line}')\033[0m"
53+
fi
54+
done
55+
done < "$temp_output"
56+
57+
awk '{print $0 " [in summary line]"}' "$temp_file" > "$temp_file_labeled"
58+
grep -vE '\.py$|,' "$output_file" > "$temp_final"
59+
cat "$temp_file_labeled" >> "$temp_final"
60+
mv "$temp_final" "$output_file"
61+
62+
echo -e "\n\033[1;31m[New failed cases Summary]\033[0m"
63+
if [[ -z "$(tr -d ' \t\n\r\f' < "$output_file" 2>/dev/null)" ]]; then
64+
echo -e "\033[1;32mNo new failed cases found\033[0m"
65+
else
66+
echo -e "\n\033[1;31mNew failed cases, not in known issues\033[0m"
67+
cat "$output_file"
68+
fi
69+
3470
if [[ -s "$filtered_content" ]]; then
35-
echo -e "\n\033[1;31m[Filtered Cases]\033[0m"
71+
echo -e "\n\033[1;31m[These failed cases are in skip list, will filter]\033[0m"
3672
awk -F':' '{
3773
line_number = $1
3874
$1 = ""
3975
gsub(/^ /, "", $0)
4076
printf "\033[33m%3d\033[0m: %s\n", line_number, $0
4177
}' "$filtered_content"
4278
else
43-
echo -e "\n\033[1;32mNo Filtered Cases\033[0m"
79+
echo -e "\n\033[1;32mNo Skipped Cases\033[0m"
4480
fi
81+
82+
rm -f ${temp_output} ${temp_file} ${temp_final}
4583
}
4684

4785
if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_regression_dev1' || "${ut_suite}" == 'op_extended' || "${ut_suite}" == 'op_transformers' ]]; then
4886
grep -E "FAILED" "${ut_suite}"_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_failed.log
4987
grep -E "have failures" "${ut_suite}"_test.log | awk '{print $1}' >> ./"${ut_suite}"_failed.log
88+
grep -E "Timeout" "${ut_suite}"_test.log | grep "test" >> ./"${ut_suite}"_failed.log
5089
grep "PASSED" "${ut_suite}"_test.log | awk '{print $1}' > ./"${ut_suite}"_passed.log
90+
echo -e "========================================================================="
91+
echo -e "Show Failed cases in ${ut_suite}"
92+
echo -e "========================================================================="
93+
cat "./${ut_suite}_failed.log"
94+
echo -e "========================================================================="
95+
echo -e "Checking Failed cases in ${ut_suite}"
96+
echo -e "========================================================================="
5197
compare_and_filter_logs "${ut_suite}"_failed.log Known_issue.log
5298
if [[ -f "${ut_suite}_failed_filtered.log" ]]; then
5399
num_failed=$(wc -l < "./${ut_suite}_failed_filtered.log")
54100
else
55101
num_failed=$(wc -l < "./${ut_suite}_failed.log")
56102
fi
57103
num_passed=$(wc -l < "./${ut_suite}_passed.log")
58-
echo -e "========================================================================="
59-
echo -e "Show Failed cases in ${ut_suite}"
60-
echo -e "========================================================================="
61-
cat "./${ut_suite}_failed.log"
62104
if [[ $num_failed -gt 0 ]] || [[ $num_passed -le 0 ]]; then
63105
echo -e "[ERROR] UT ${ut_suite} test Fail"
64106
exit 1
@@ -69,28 +111,36 @@ fi
69111
if [[ "${ut_suite}" == 'op_ut' ]]; then
70112
grep -E "FAILED" op_ut_with_skip_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_with_skip_test_failed.log
71113
grep -E "have failures" op_ut_with_skip_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_skip_test_failed.log
114+
grep -E "Timeout" op_ut_with_skip_test.log | grep "test" >> ./"${ut_suite}"_with_skip_test_failed.log
72115
grep -E "FAILED" op_ut_with_only_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_with_only_test_failed.log
73116
grep -E "have failures" op_ut_with_only_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_only_test_failed.log
117+
grep -E "Timeout" op_ut_with_only_test.log | grep "test" >> ./"${ut_suite}"_with_only_test_failed.log
118+
echo -e "========================================================================="
119+
echo -e "Show Failed cases in ${ut_suite} with skip"
120+
echo -e "========================================================================="
121+
cat "./${ut_suite}_with_skip_test_failed.log"
122+
echo -e "========================================================================="
123+
echo -e "Checking Failed cases in ${ut_suite} with skip"
124+
echo -e "========================================================================="
74125
compare_and_filter_logs "${ut_suite}"_with_skip_test_failed.log Known_issue.log
75126
if [[ -f "${ut_suite}_with_skip_test_failed_filtered.log" ]]; then
76127
num_failed_with_skip=$(wc -l < "./${ut_suite}_with_skip_test_failed_filtered.log")
77128
else
78129
num_failed_with_skip=$(wc -l < "./${ut_suite}_with_skip_test_failed.log")
79130
fi
131+
echo -e "========================================================================="
132+
echo -e "Show Failed cases in ${ut_suite} with only"
133+
echo -e "========================================================================="
134+
cat "./${ut_suite}_with_only_test_failed.log"
135+
echo -e "========================================================================="
136+
echo -e "Checking Failed cases in ${ut_suite} with only"
137+
echo -e "========================================================================="
80138
compare_and_filter_logs "${ut_suite}"_with_only_test_failed.log Known_issue.log
81139
if [[ -f "${ut_suite}_with_only_test_failed_filtered.log" ]]; then
82140
num_failed_with_only=$(wc -l < "./${ut_suite}_with_only_test_failed_filtered.log")
83141
else
84142
num_failed_with_only=$(wc -l < "./${ut_suite}_with_only_test_failed.log")
85143
fi
86-
echo -e "========================================================================="
87-
echo -e "Show Failed cases in ${ut_suite} with skip"
88-
echo -e "========================================================================="
89-
cat "./${ut_suite}_with_skip_test_failed.log"
90-
echo -e "========================================================================="
91-
echo -e "Show Failed cases in ${ut_suite} with only"
92-
echo -e "========================================================================="
93-
cat "./${ut_suite}_with_only_test_failed.log"
94144
((num_failed=num_failed_with_skip+num_failed_with_only))
95145
grep "PASSED" op_ut_with_skip_test.log | awk '{print $1}' > ./"${ut_suite}"_with_skip_test_passed.log
96146
grep "PASSED" op_ut_with_only_test.log | awk '{print $1}' > ./"${ut_suite}"_with_only_test_passed.log
@@ -136,16 +186,19 @@ fi
136186
if [[ "${ut_suite}" == 'xpu_distributed' ]]; then
137187
grep -E "^FAILED" xpu_distributed_test.log | awk '{print $2}' > ./"${ut_suite}"_xpu_distributed_test_failed.log
138188
grep -E "have failures" xpu_distributed_test.log | awk '{print $1}' >> ./"${ut_suite}"_xpu_distributed_test_failed.log
189+
echo -e "========================================================================="
190+
echo -e "Show Failed cases in ${ut_suite} xpu distributed"
191+
echo -e "========================================================================="
192+
cat "./${ut_suite}_xpu_distributed_test_failed.log"
193+
echo -e "========================================================================="
194+
echo -e "Checking Failed cases in ${ut_suite} xpu distributed"
195+
echo -e "========================================================================="
139196
compare_and_filter_logs "${ut_suite}"_xpu_distributed_test_failed.log Known_issue.log
140197
if [[ -f "${ut_suite}_xpu_distributed_test_failed_filtered.log" ]]; then
141198
num_failed_xpu_distributed=$(wc -l < "./${ut_suite}_xpu_distributed_test_failed_filtered.log")
142199
else
143200
num_failed_xpu_distributed=$(wc -l < "./${ut_suite}_xpu_distributed_test_failed.log")
144201
fi
145-
echo -e "========================================================================="
146-
echo -e "Show Failed cases in ${ut_suite} xpu distributed"
147-
echo -e "========================================================================="
148-
cat "./${ut_suite}_xpu_distributed_test_failed.log"
149202
((num_failed=num_failed_xpu_distributed))
150203
if [[ $num_failed -gt 0 ]]; then
151204
echo -e "[ERROR] UT ${ut_suite} test Fail"

.github/workflows/_linux_ut.yml

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,18 +63,16 @@ jobs:
6363
- name: 'op_regression'
6464
condition: ${{ contains(inputs.ut, 'op_regression') }}
6565
directory: 'test/regressions'
66-
command: 'pytest --timeout 600 -v --junit-xml=../../ut_log/op_regression.xml'
66+
command: 'pytest --timeout 600 --timeout_method=thread -v --junit-xml=../../ut_log/op_regression.xml'
6767
log_prefix: 'op_regression'
68-
timeout: 8000
6968
additional_steps: |
7069
clinfo --list
7170
pip install pytest pytest-timeout
7271
- name: 'op_regression_dev1'
7372
condition: ${{ contains(inputs.ut, 'op_regression_dev1') }}
7473
directory: 'test/regressions'
75-
command: 'pytest --timeout 600 -v test_operation_on_device_1.py --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml'
74+
command: 'pytest --timeout 600 --timeout_method=thread -v test_operation_on_device_1.py --junit-xml=$GITHUB_WORKSPACE/ut_log/op_regression_dev1.xml'
7675
log_prefix: 'op_regression_dev1'
77-
timeout: 8000
7876
additional_steps: |
7977
clinfo --list
8078
unset ZE_AFFINITY_MASK
@@ -83,9 +81,8 @@ jobs:
8381
- name: 'op_transformers'
8482
condition: ${{ contains(inputs.ut, 'op_transformers') }}
8583
directory: '../pytorch'
86-
command: 'pytest --timeout 600 -v test/test_transformers.py -k xpu --junit-xml=$GITHUB_WORKSPACE/ut_log/op_transformers.xml'
84+
command: 'pytest --timeout 600 --timeout_method=thread -v test/test_transformers.py -k xpu --junit-xml=$GITHUB_WORKSPACE/ut_log/op_transformers.xml'
8785
log_prefix: 'op_transformers'
88-
timeout: 3600
8986
additional_steps: |
9087
pip install pytest pytest-timeout
9188
export PYTORCH_TEST_WITH_SLOW=1
@@ -94,7 +91,6 @@ jobs:
9491
directory: '../pytorch/third_party/torch-xpu-ops/test/xpu/extended/'
9592
command: 'python run_test_with_skip.py'
9693
log_prefix: 'op_extended'
97-
timeout: 10000
9894
additional_steps: |
9995
pip install pytest pytest-timeout
10096
export PYTORCH_TEST_WITH_SLOW=1
@@ -107,7 +103,7 @@ jobs:
107103
command_script: |
108104
export PYTORCH_ENABLE_XPU_FALLBACK=1
109105
export PYTORCH_TEST_WITH_SLOW=1
110-
timeout 10000 python run_test_with_skip.py \
106+
python run_test_with_skip.py \
111107
2>$GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test_error.log | \
112108
tee $GITHUB_WORKSPACE/ut_log/op_ut/op_ut_with_skip_test.log
113109
cp *.xml $GITHUB_WORKSPACE/ut_log
@@ -147,7 +143,6 @@ jobs:
147143
eval $test_cmd 2>$GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test_error.log | \
148144
tee $GITHUB_WORKSPACE/ut_log/torch_xpu/torch_xpu_test.log
149145
log_prefix: 'torch_xpu'
150-
timeout: 10000
151146
additional_steps: |
152147
pip install pytest pytest-timeout
153148
- name: 'xpu_profiling'
@@ -188,11 +183,13 @@ jobs:
188183
- name: Checkout torch-xpu-ops
189184
uses: actions/checkout@v4
190185
- name: Create unique workspace
186+
shell: bash -xe {0}
191187
run: |
192188
# Create unique conda env for each UT test
193189
random=$(head /dev/urandom | tr -dc A-Za-z0-9_ | head -c ${1:-5} | xargs)
194190
echo "CONDA_ENV_NAME=xpu_op_${ZE_AFFINITY_MASK}_${{ matrix.test.name }}_${random}" >> $GITHUB_ENV
195191
- name: Create Conda Env
192+
shell: bash -xe {0}
196193
run: |
197194
pwd
198195
which conda
@@ -206,6 +203,7 @@ jobs:
206203
with:
207204
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
208205
- name: Prepare Stock Pytorch
206+
shell: bash -xe {0}
209207
run: |
210208
cd ../
211209
rm -rf ./pytorch || sudo rm -rf ./pytorch
@@ -229,6 +227,7 @@ jobs:
229227
git show -s && git status && git diff
230228
pip install -r .ci/docker/requirements-ci.txt
231229
- name: Prepare Torch-xpu-ops
230+
shell: bash -xe {0}
232231
run: |
233232
cd ../pytorch
234233
rm -rf third_party/torch-xpu-ops
@@ -247,10 +246,12 @@ jobs:
247246
name: Triton-Wheel-${{ github.event.pull_request.number || github.sha }}
248247
path: ${{ github.workspace }}
249248
- name: Install Triton
249+
shell: bash -xe {0}
250250
run: |
251251
source activate $CONDA_ENV_NAME
252252
pip install --force-reinstall ${{ github.workspace }}/pytorch_triton_xpu-*.whl
253253
- name: Torch Config
254+
shell: bash -xe {0}
254255
run: |
255256
source activate $CONDA_ENV_NAME
256257
python -c "import torch; print(torch.__config__.show())"
@@ -264,6 +265,7 @@ jobs:
264265
rm -rf ~/.triton/cache || sudo rm -rf ~/.triton/cache
265266
echo "UT_NAME=${{ matrix.test.name }}" >> "${GITHUB_ENV}"
266267
- name: Run XPU UT Test
268+
shell: bash -xe {0}
267269
if: ${{ matrix.test.condition }}
268270
run: |
269271
set -e
@@ -282,12 +284,13 @@ jobs:
282284
${{ matrix.test.command_script }}
283285
SCRIPT
284286
else
285-
timeout ${{ matrix.test.timeout }} ${{ matrix.test.command }} \
287+
${{ matrix.test.command }} \
286288
2>${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test_error.log | \
287289
tee ${{ github.workspace }}/ut_log/${{ matrix.test.name }}/${{ matrix.test.log_prefix }}_test.log
288290
${{ matrix.test.xml_post_processing || '' }}
289291
fi
290292
- name: UT Test Results Summary
293+
shell: bash -xe {0}
291294
if: ${{ matrix.test.condition }}
292295
run: |
293296
source activate $CONDA_ENV_NAME

test/xpu/extended/run_test_with_skip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
skip_options += '"'
1818

1919
os.environ["PYTORCH_TEST_WITH_SLOW"] = "1"
20-
test_command = "pytest --timeout 600 -v --junit-xml=./op_extended.xml test_ops_xpu.py"
20+
test_command = "pytest --timeout 600 -v --timeout_method=thread --junit-xml=./op_extended.xml test_ops_xpu.py"
2121
test_command += skip_options
2222
res = os.system(test_command)
2323
sys.exit(res)

0 commit comments

Comments
 (0)