Skip to content

Commit a8aa3d0

Browse files
mengfei25rootzxd1997066
authored
[CI] Refactor CICD test workflows (#1862)
1. test in container 2. use local python instead of conda 3. split common steps disable_winidows --------- Co-authored-by: root <[email protected]> Co-authored-by: Zeng, Xiangdong <[email protected]>
1 parent 3da5886 commit a8aa3d0

27 files changed

+1415
-2417
lines changed

.github/actions/get-runner/action.yml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
name: Get Runner Infos
2+
3+
outputs:
4+
runner_id:
5+
value: ${{ steps.runner.outputs.runner_id }}
6+
user_id:
7+
value: ${{ steps.runner.outputs.user_id }}
8+
render_id:
9+
value: ${{ steps.runner.outputs.render_id }}
10+
hostname:
11+
value: ${{ steps.runner.outputs.hostname }}
12+
13+
permissions: read-all
14+
15+
runs:
16+
using: composite
17+
steps:
18+
- name: Get runner
19+
shell: bash -xe {0}
20+
id: runner
21+
run: |
22+
# get test runner
23+
echo "runner_id=$(echo ${RUNNER_NAME} |sed 's/\-[0-9]$//')" |tee -a ${GITHUB_OUTPUT}
24+
echo "user_id=$(id -u)" |tee -a ${GITHUB_OUTPUT}
25+
echo "render_id=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
26+
echo "hostname=$(hostname)" |tee -a ${GITHUB_OUTPUT}
27+
# show host info
28+
lscpu
29+
lshw -C display
30+
free -h
31+
df -h
32+
cat /etc/os-release
33+
uname -a
34+
- name: Cleanup host
35+
shell: bash -xe {0}
36+
run: |
37+
# clean docker cache
38+
docker system prune -af || true
39+
# clean workspace
40+
ls -al
41+
sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
42+
cd ${RUNNER_WORKSPACE}/..
43+
if [ "${PWD}" != "/" ];then
44+
ls -al
45+
sudo chmod 777 -R torch-xpu-ops _temp _actions _tool || true
46+
# mount HOME dir to use caches to save time
47+
rm -rf _temp && mkdir _temp
48+
ln -sf ${HOME} _temp/_github_home
49+
fi

.github/actions/inductor-xpu-e2e-test/action.yml

Lines changed: 0 additions & 185 deletions
This file was deleted.
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
name: Linux E2E Test
2+
3+
inputs:
4+
suite:
5+
required: true
6+
type: string
7+
default: 'huggingface'
8+
description: Dynamo benchmarks test suite. huggingface,timm_models,torchbench. Delimiter is comma
9+
dt:
10+
required: true
11+
type: string
12+
default: 'float32'
13+
description: Data precision of the test.float32,bfloat16,float16,amp_bf16,amp_fp16. Delimiter is comma
14+
mode:
15+
required: true
16+
type: string
17+
default: 'inference'
18+
description: inference,training. Delimiter is comma
19+
scenario:
20+
required: true
21+
type: string
22+
default: 'accuracy'
23+
description: accuracy,performance. Delimiter is comma
24+
25+
runs:
26+
using: composite
27+
steps:
28+
- name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
29+
shell: bash -x {0}
30+
run: |
31+
pip list |grep -E 'intel|torch'
32+
cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
33+
cd ./pytorch
34+
# check param
35+
function contains() {
36+
contains_status="echo 'Start $2 ...'"
37+
{
38+
[[ $1 =~ (^|,)$2($|,) ]]
39+
} || {
40+
echo "[Warning] $2 is not suppotted type! Skipped!"
41+
contains_status="continue"
42+
}
43+
}
44+
xpu_num=$(clinfo --list |awk 'BEGIN{gpu=0;}{if(gpu==1 && $0~/Platform/){gpu=0;}; if(gpu==1){print $0;}; if($0~/Platform.*Graphics/){gpu=1;}}' |wc -l)
45+
cores_per_instance="$(lscpu |grep -E 'Core\(s\) per socket:|Socket\(s\):' |awk -v i="${xpu_num}" 'BEGIN{sum=1}{sum*=$NF}END{print sum/i}')"
46+
export OMP_NUM_THREADS=${cores_per_instance}
47+
for suite in $(echo ${{ inputs.suite }} |sed 's/,/ /g')
48+
do
49+
if [ "${suite}" == "pt2e" ];then
50+
continue
51+
fi
52+
contains "huggingface,timm_models,torchbench" $suite
53+
$contains_status
54+
for dt in $(echo ${{ inputs.dt }} |sed 's/,/ /g')
55+
do
56+
contains "float32,bfloat16,float16,amp_bf16,amp_fp16" $dt
57+
$contains_status
58+
for mode in $(echo ${{ inputs.mode }} |sed 's/,/ /g')
59+
do
60+
contains "inference,training" $mode
61+
$contains_status
62+
for scenario in $(echo ${{ inputs.scenario }} |sed 's/,/ /g')
63+
do
64+
contains "accuracy,performance" $scenario
65+
$contains_status
66+
if [ "${MODEL_ONLY_NAME}" == "" ];then
67+
for xpu_id in $(seq 0 $[ ${xpu_num} - 1 ])
68+
do
69+
cpu_list="$(echo "${cores_per_instance} ${xpu_id}" |awk '{printf("%d-%d", $1*$2, $1*$2+$1-1)}')"
70+
numactl --localalloc --physcpubind=${cpu_list} bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${xpu_id} &
71+
done
72+
else
73+
for test_model in $(echo ${MODEL_ONLY_NAME} |sed 's/,/ /g')
74+
do
75+
numactl --localalloc bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${test_model}
76+
done
77+
fi
78+
wait
79+
# summarize pass rate
80+
LOG_DIR="inductor_log/${suite}/${dt}"
81+
LOG_NAME=inductor_${suite}_${dt}_${mode}_xpu_${scenario}_all.log
82+
rm -f ${LOG_DIR}/${LOG_NAME}
83+
find ${LOG_DIR}/ -name "inductor_${suite}_${dt}_${mode}_xpu_${scenario}_card*.log" |xargs cat >> ${LOG_DIR}/${LOG_NAME} 2>&1
84+
done
85+
done
86+
done
87+
done
88+
89+
- name: Summary E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
90+
shell: bash -xe {0}
91+
run: |
92+
cd ./pytorch
93+
rm -f inductor_log/summary_accuracy.csv
94+
for var in $(find inductor_log/ -name "inductor_*_xpu_accuracy.csv")
95+
do
96+
sed -i "s/$/,$(basename $var)/" $var
97+
cat $var >> inductor_log/summary_accuracy.csv
98+
done
99+
cp ${{ github.workspace }}/.github/scripts/inductor_summary.py ./
100+
csv_file="$(find inductor_log/ -name "inductor_*_xpu_*.csv" |tail -n 1)"
101+
if [ -f "${csv_file}" ];then
102+
pip install styleFrame scipy pandas
103+
dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
104+
mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
105+
suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')
106+
scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g')
107+
python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario}
108+
fi

0 commit comments

Comments
 (0)