1+ name : Distribute CI (V100)
2+
3+ on :
4+ pull_request :
5+ types : [opened, synchronize, reopened]
6+ branches : [develop]
7+ schedule :
8+ - cron : " 2 0 * * *"
9+ workflow_call :
10+ inputs :
11+ run_downstream :
12+ required : true
13+ type : string
14+ image_name :
15+ required : true
16+ type : string
17+
18+
19+ concurrency :
20+ group : ${{ github.workflow }}-${{ github.event.pull_request.number }}
21+ cancel-in-progress : true
22+
23+ env :
24+ PR_ID : ${{ github.event.pull_request.number }}
25+ COMMIT_ID : ${{ github.event.pull_request.head.sha }}
26+ TASK : paddlenlp-CI-${{ github.event.pull_request.number }}-Distribut-V100
27+ ci_scripts : /workspace/PaddleNLP/scripts/distribute
28+ BRANCH : ${{ github.event.pull_request.base.ref }}
29+ AGILE_COMPILE_BRANCH : ${{ github.event.pull_request.base.ref }}
30+ CI_name : distribute-ci
31+ no_proxy : " localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
32+ GITHUB_EVENT_NAME : ${{ github.event_name }}
33+ RUN_DOWNSTREAM : ${{ inputs.run_downstream }}
34+
35+ defaults :
36+ run :
37+ shell : bash
38+
39+ jobs :
40+ distribute-v100-ci :
41+ name : distribute-v100-ci
42+ runs-on :
43+ group : Auto-Parallel
44+ steps :
45+ - name : Determine Image Name
46+ env :
47+ IMAGE_NAME : ${{ inputs.image_name }}
48+ run : |
49+ if [[ -n "${IMAGE_NAME}" ]]; then
50+ echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV"
51+ else
52+ echo "IMAGE_NAME=registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82" >> "$GITHUB_ENV"
53+ fi
54+
55+ - name : Run Container
56+ env :
57+ work_dir : ${{ github.workspace }}
58+ CACHE_DIR : /home/data/cfs/.cache
59+ FLAGS_dynamic_static_unified_comm : " True"
60+ python_version : " 3.10"
61+ paddle_whl : https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuSome-LinuxCentos-Gcc82-Cuda118-Cudnn86-Trt85-Py310-CINN-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
62+ run : |
63+ container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
64+ echo "container_name=${container_name}" >> "$GITHUB_ENV"
65+ if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
66+ echo "Not in a pull_request or test_build event. Skipping..."
67+ else
68+ nvidia-docker run -d -t --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \
69+ -v $work_dir/../../..:$work_dir/../../.. \
70+ -v $work_dir:/workspace \
71+ -v /home/.cache/pip:/home/.cache/pip \
72+ -v /home/FleetX_CI:/fleetx_data \
73+ -v /home/Llm_gpt_CI:/llm_gpt_data \
74+ -v /home/Llama_CI:/llama_data \
75+ -e BRANCH \
76+ -e AGILE_COMPILE_BRANCH \
77+ -e PR_ID \
78+ -e COMMIT_ID \
79+ -e work_dir \
80+ -e ci_scripts \
81+ -e no_proxy \
82+ -e CI_name \
83+ -e paddle_whl \
84+ -e FLAGS_dynamic_static_unified_comm \
85+ -e python_version \
86+ -w /workspace --runtime=nvidia ${{ env.IMAGE_NAME }}
87+ fi
88+
89+ - name : Download Code
90+ run : |
91+ if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
92+ echo "Not in a pull_request or test_build event. Skipping.."
93+ else
94+ docker exec -t $container_name /bin/bash -c '
95+ rm -rf * .[^.]*
96+ echo "Downloading PaddleNLP.tar.gz"
97+ wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleNLP.tar --no-check-certificate
98+ echo "Extracting PaddleNLP.tar.gz"
99+ tar xf PaddleNLP.tar && rm -rf PaddleNLP.tar
100+ source $work_dir/../../../proxy
101+ cd PaddleNLP
102+ git config --global user.name "PaddleCI"
103+ git config --global user.email "paddle_ci@example.com"
104+ git pull
105+ git submodule update --init --recursive --force
106+ if [ -n "${PR_ID}" ]; then
107+ git fetch origin pull/${PR_ID}/head
108+ git checkout -b PR_${PR_ID} FETCH_HEAD
109+ git remote add upstream https://github.com/PaddlePaddle/PaddleFormers.git
110+ git fetch upstream ${BRANCH}
111+ git merge ${BRANCH} --no-edit
112+ git diff --numstat ${BRANCH} -- | awk "{print \$NF}"
113+ else
114+ echo "Not in a pull_request event. Skipping PR-specific operations."
115+ fi
116+ git log --pretty=oneline -10
117+ '
118+ fi
119+
120+ - name : Test
121+ run : |
122+ if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
123+ echo "Not in a pull_request or test_build event. Skipping..."
124+ else
125+ docker exec -t $container_name /bin/bash -c '
126+ ldconfig
127+ ln -sf $(which python${python_version}) /usr/bin/python
128+ pip config set global.cache-dir "/home/.cache/pip"
129+ source $work_dir/../../../proxy
130+ set -e
131+ cd /workspace/PaddleNLP && git config --global --add safe.directory $PWD
132+ timeout 80m bash scripts/distribute/run_ci.sh ${paddle_whl}
133+ '
134+ fi
135+
136+ - name : Upload Logs
137+ if : always()
138+ env :
139+ home_path : ${{ github.workspace }}/../../..
140+ bos_file : ${{ github.workspace }}/../../../bos/BosClient.py
141+ run : |
142+ if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
143+ echo "Not in a pull_request or test_build event. Skipping..."
144+ else
145+ docker exec -t $container_name /bin/bash -c '
146+ if [ ! -f "${{ env.bos_file }}" ]; then
147+ wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate
148+ mkdir ${{ env.home_path }}/bos
149+ tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos
150+ fi
151+
152+ if [[ "${{ env.RUN_DOWNSTREAM }}" == "" && -n "${PR_ID}" ]]; then
153+ bos_prefix="${PR_ID}/${COMMIT_ID}"
154+ elif [[ "${{ env.RUN_DOWNSTREAM }}" == "true" && -n "${PR_ID}" ]]; then
155+ bos_prefix="${PR_ID}/${COMMIT_ID}/test_build"
156+ else
157+ bos_prefix="schedule/$(date +%Y%m%d)"
158+ fi
159+
160+ cd /workspace/case_logs
161+ for FILE in /workspace/case_logs/*; do
162+ file=$(basename "$FILE")
163+ python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleNLP/distribute/${bos_prefix}/logs
164+ echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/distribute/${bos_prefix}/logs/$file"
165+ done
166+ tar -czf products.tar.gz ./
167+ python ${{ env.bos_file }} products.tar.gz paddle-github-action/PR/PaddleNLP/distribute/${bos_prefix}/logs
168+ echo "products: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/distribute/${bos_prefix}/logs/products.tar.gz"
169+ '
170+ fi
171+
172+ - name : Terminate And Delete the Container
173+ if : always()
174+ run : |
175+ docker rm -f $container_name 2>/dev/null || true
0 commit comments