Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6e9a3ec
Fix {} to {{}}
huanghengheng Sep 25, 2025
3c1d86e
Move get_modified_files.py to codestyle floder
huanghengheng Sep 25, 2025
cf23ca0
Creat the model-unittest-gpu.yml
huanghengheng Sep 25, 2025
f7cab20
Creat the ci_model_unit.sh
huanghengheng Sep 25, 2025
4a99803
Fix model unittest ci
huanghengheng Sep 25, 2025
f4cf1c8
Fix model unittest CI
huanghengheng Sep 25, 2025
2e17e6f
Fix model unittest CI
huanghengheng Sep 25, 2025
9619e2a
Fix model unittest CI
huanghengheng Sep 25, 2025
0b1c663
Fix model unittest CI
huanghengheng Sep 25, 2025
05ea002
Fix model unittest CI
huanghengheng Sep 26, 2025
da34e8c
Fix model unittest CI
huanghengheng Sep 26, 2025
ec6ccbc
Fix model unittest CI
huanghengheng Sep 26, 2025
d4c7bce
Fix model unittest CI
huanghengheng Sep 26, 2025
950ac99
Fix model unittest CI
huanghengheng Sep 26, 2025
6c200e9
Fix model unittest CI
huanghengheng Sep 26, 2025
2c335c0
Fix model unittest CI
huanghengheng Sep 26, 2025
64bb588
Fix model unittest CI
huanghengheng Sep 26, 2025
5fea663
Fix model unittest CI
huanghengheng Sep 26, 2025
6d8ab06
Fix model unittest CI
huanghengheng Sep 26, 2025
60370ed
Fix model unittest CI
huanghengheng Sep 26, 2025
d34c5f7
Fix model unittest CI
huanghengheng Sep 26, 2025
588f480
Fix model unittest CI
huanghengheng Sep 26, 2025
b647577
Fix model unittest CI
huanghengheng Sep 28, 2025
1392227
Block-hf-download-in-ci
huanghengheng Sep 28, 2025
88e657c
Remove block-hf-download-in-ci
huanghengheng Sep 28, 2025
67978c7
Merge upstream/develop and resolve conflicts
huanghengheng Sep 28, 2025
272c48a
debug model ci
huanghengheng Sep 28, 2025
7c522c8
debug model ci and use env.variable
huanghengheng Sep 28, 2025
f93c473
Fix shell variable format
huanghengheng Sep 29, 2025
70eab87
Change EXCEPTED_LOSS value
huanghengheng Sep 29, 2025
845cc1a
Change EXCEPTED_LOSS value
huanghengheng Sep 29, 2025
58e8adf
Debug the model unittest
huanghengheng Sep 30, 2025
3b6e73c
Debug the model unittest
huanghengheng Sep 30, 2025
64b58f1
Remove the allure
huanghengheng Sep 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ce-build-ci-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
with:
flag_build: test
runner: ernie-8gpu-2
image_base: ${image_base}
image_base: ${{env.image_base}}

test-ci-images:
name: test-ci-images
Expand Down Expand Up @@ -49,7 +49,7 @@ jobs:
with:
flag_build: update
runner: ernie-8gpu-1
image_base: ${image_base}
image_base: ${{env.image_base}}

update-ci-images-2:
name: update-ci-images-2
Expand All @@ -58,4 +58,4 @@ jobs:
with:
flag_build: update
runner: ernie-8gpu-2
image_base: ${image_base}
image_base: ${{env.image_base}}
2 changes: 1 addition & 1 deletion .github/workflows/ce-unittest-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
pip config set global.cache-dir "/home/.cache/pip"
set -e
cd /workspace/PaddleFormers && git config --global --add safe.directory $PWD
timeout 40m bash scripts/unit_test/ci_unit.sh ${paddle_whl} true
timeout 40m bash scripts/unit_test/ci_unittest.sh ${paddle_whl} true
'

- name: Upload Allure-reports & Logs
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/debug-unittest-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,6 @@ jobs:
echo "work_path: $work_dir/PaddleFormers"
echo "work_path in docker: /workspace/PaddleFormers"
echo "cmd: "
echo "bash scripts/unit_test/ci_unit.sh ${paddle_whl}"
echo "bash scripts/unit_test/ci_unittest.sh ${paddle_whl}"
echo "or python -m pytest fail_case_name"
echo "docker rm -f $container_name"
153 changes: 153 additions & 0 deletions .github/workflows/model-unittest-gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
name: Model Unittest GPU CI

on:
pull_request:
schedule:
- cron: "0 18 * * *"
workflow_call:
inputs:
runner:
required: false
type: string
image_name:
required: false
type: string

concurrency:
group: model-unittest-${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true

env:
PR_ID: ${{ github.event.pull_request.number || '' }}
COMMIT_ID: ${{ github.event.pull_request.head.sha || github.sha }}
TASK: PaddleFormers-CI-${{ github.event.pull_request.number }}-model-unittest-gpu
CI_SCRIPTS_PATH: /workspace/PaddleFormers/scripts/ci_model_unittest.sh
BRANCH: ${{ github.event.pull_request.base.ref || github.ref_name }}
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
CI_JOB_NAME: model-unittest-gpu-ci
NO_PROXY: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"

defaults:
run:
shell: bash

jobs:
model-unittest-gpu-ci:
name: model-unittest-gpu-ci
runs-on: ${{ inputs.runner || 'ernie-8gpu' }}
steps:
- name: Determine Image Name
env:
IMAGE_NAME: ${{ inputs.image_name }}
run: |
if [[ -n "${IMAGE_NAME}" ]]; then
echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV"
else
echo "IMAGE_NAME=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda126-dev-latest" >> "$GITHUB_ENV"
fi

- name: Run Container
env:
WORK_DIR: ${{ github.workspace }}
FLAGS_DYNAMIC_STATIC_UNIFIED_COMM: "True"
PYTHON_VERSION: "3.10"
PADDLE_WHL: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuAll-LinuxCentos-Gcc11-Cuda126-Cudnn95-Trt105-Py310-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
run: |
CONTAINER_NAME=${TASK}-$(date +%Y%m%d-%H%M%S)
echo "CONTAINER_NAME=${CONTAINER_NAME}" >> "$GITHUB_ENV"
docker run -d -t --gpus all --name ${CONTAINER_NAME} --net=host -v /dev/shm:/dev/shm --shm-size=32G \
-v ${WORK_DIR}/../../..:${WORK_DIR}/../../.. \
-v ${WORK_DIR}:/workspace \
-v /home/.cache/pip:/home/.cache/pip \
-e "BRANCH=$BRANCH" \
-e "AGILE_COMPILE_BRANCH=$AGILE_COMPILE_BRANCH" \
-e "PR_ID=$PR_ID" \
-e "COMMIT_ID=$COMMIT_ID" \
-e "WORK_DIR=$WORK_DIR" \
-e "CI_SCRIPTS_PATH=$CI_SCRIPTS_PATH" \
-e "NO_PROXY=$NO_PROXY" \
-e "CI_JOB_NAME=$CI_JOB_NAME" \
-e "PADDLE_WHL=$PADDLE_WHL" \
-e "FLAGS_DYNAMIC_STATIC_UNIFIED_COMM=$FLAGS_DYNAMIC_STATIC_UNIFIED_COMM" \
-e "PYTHON_VERSION=$PYTHON_VERSION" \
-e HF_PROXY_PATH=${WORK_DIR}/../../../proxy_huggingface \
-e AISTUDIO_PROXY_PATH=${WORK_DIR}/../../../proxy_aistudio \
-w /workspace --privileged ${IMAGE_NAME}

- name: Download Code
run: |
docker exec -t $CONTAINER_NAME /bin/bash -c '
rm -rf * .[^.]*
echo "Downloading PaddleFormers.tar"
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleFormers.tar --no-check-certificate
echo "Extracting PaddleFormers.tar"
tar xf PaddleFormers.tar && rm -rf PaddleFormers.tar
echo "WORK_DIR = ${WORK_DIR}"
source ${WORK_DIR}/../../../proxy
cat ${WORK_DIR}/../../../proxy
cd PaddleFormers
git config --global user.name "PaddleCI"
git config --global user.email "[email protected]"
git pull
git submodule update --init --recursive --force
if [ -n "${PR_ID}" ]; then
git fetch origin pull/${PR_ID}/head
git checkout -b PR_${PR_ID} FETCH_HEAD
git remote add upstream https://github.com/PaddlePaddle/PaddleFormers.git
echo "Checking out ${BRANCH}..."
git fetch upstream ${BRANCH}:${BRANCH}
git merge ${BRANCH} --no-edit
git diff --numstat ${BRANCH} -- | awk "{print \$NF}"
else
echo "Not in a pull_request event. Skipping PR-specific operations."
fi
git log --pretty=oneline -10
'

- name: Test
run: |
docker exec -t $CONTAINER_NAME /bin/bash -c '
ldconfig
pip config set global.cache-dir "/home/.cache/pip"
set -e
rm -rf /root/.cache/aistudio/
cd /workspace/PaddleFormers && git config --global --add safe.directory $PWD
echo "WORK_DIR = ${WORK_DIR}"
cp -r ${WORK_DIR}/../../../models ./models
echo "Check models:"
ls -l ./models
echo "Test Start"
hostname
timeout 30m bash scripts/regression/ci_model_unittest.sh ${PADDLE_WHL}
'

- name: Upload Products
if: always()
env:
HOME_PATH: ${{ github.workspace }}/../../..
BOS_UPLOAD_SCRIPT: ${{ github.workspace }}/../../../bos/BosClient.py
run: |
docker exec -t $CONTAINER_NAME /bin/bash -c '
if [ ! -f "${BOS_UPLOAD_SCRIPT}" ]; then
wget -q --no-proxy -O ${HOME_PATH}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate
mkdir ${HOME_PATH}/bos
tar xf ${HOME_PATH}/bos_new.tar.gz -C ${HOME_PATH}/bos
fi
if [ -n "${PR_ID}" ]; then
bos_prefix="${PR_ID}/${COMMIT_ID}"
else
bos_prefix="schedule/$(date +%Y%m%d)"
fi
# logs
cd /workspace/PaddleFormers/model_unittest_logs
for FILE in /workspace/PaddleFormers/model_unittest_logs/*; do
file=$(basename "$FILE")
python ${BOS_UPLOAD_SCRIPT} $file paddle-github-action/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs/$file"
done
'

- name: Terminate And Delete the Container
if: always()
run: |
docker rm -f $CONTAINER_NAME 2>/dev/null || true
6 changes: 4 additions & 2 deletions .github/workflows/unittest-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ name: Unittest GPU CI

on:
pull_request:
push:
schedule:
- cron: "0 18 * * *"
workflow_call:
Expand Down Expand Up @@ -56,6 +55,7 @@ jobs:
run: |
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
echo "container_name=${container_name}" >> "$GITHUB_ENV"
echo "Workspace path: ${{ github.workspace }}"
docker run -d -t --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \
-v $work_dir/../../..:$work_dir/../../.. \
-v $work_dir:/workspace \
Expand All @@ -73,6 +73,8 @@ jobs:
-e python_version \
-e HF_PROXY_PATH=$work_dir/../../../proxy_huggingface \
-e AISTUDIO_PROXY_PATH=$work_dir/../../../proxy_aistudio \
-e "HF_DATASETS_CACHE=$work_dir/../../../paddlenlp/huggingface/datasets" \
-e "TRANSFORMERS_CACHE=$work_dir/../../../paddlenlp/huggingface" \
-w /workspace --runtime=nvidia --privileged $IMAGE_NAME

- name: Download Code
Expand Down Expand Up @@ -112,7 +114,7 @@ jobs:
cd /workspace/PaddleFormers && git config --global --add safe.directory $PWD
source $work_dir/../../../proxy
source $work_dir/../../../AISTUDIO_ACCESS_TOKEN
timeout 30m bash scripts/unit_test/ci_unit.sh ${paddle_whl}
timeout 30m bash scripts/unit_test/ci_unittest.sh ${paddle_whl}
'

- name: Upload Products
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ format:

.PHONY: lint
lint:
$(eval modified_py_files := $(shell python scripts/get_modified_files.py $(check_dirs)))
$(eval modified_py_files := $(shell python scripts/codestyle/get_modified_files.py $(check_dirs)))
@if test -n "$(modified_py_files)"; then \
echo ${modified_py_files}; \
pre-commit run --files ${modified_py_files}; \
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ skip = ['paddleformers/transformers/__init__.py']
[tool.black]
line-length = 119
target_version = ['py35', 'py36', 'py37', 'py38', 'py39', 'py310']
exclude = ['.flake8']

[tool.pytest.ini_options]
minversion = "6.0"
Expand Down
File renamed without changes.
135 changes: 135 additions & 0 deletions scripts/regression/ci_model_unittest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#!/usr/bin/env bash

# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
export paddle=$1
export FLAGS_enable_CE=${2-false}
export nlp_dir=/workspace/PaddleFormers
export log_path=/workspace/PaddleFormers/model_unittest_logs
export model_unittest_path=/workspace/PaddleFormers/scripts/regression
cd $nlp_dir
mkdir -p $log_path

install_requirements() {
python -m pip config --user set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
python -m pip config --user set global.trusted-host pypi.tuna.tsinghua.edu.cn
python -m pip install -r requirements.txt
python -m pip install -r requirements-dev.txt
python -m pip install -r tests/requirements.txt
python -m pip uninstall paddlepaddle paddlepaddle_gpu -y
python -m pip install --no-cache-dir ${paddle} --no-dependencies --progress-bar off --force-reinstall
python -c "import paddle;print('paddle');print(paddle.__version__);print(paddle.version.show())" >> ${log_path}/commit_info.txt
python setup.py bdist_wheel > /dev/null
python -m pip install dist/p****.whl
python -c "from paddleformers import __version__; print('paddleformers version:', __version__)" >> ${log_path}/commit_info.txt
python -c "import paddleformers; print('paddleformers commit:',paddleformers.version.commit)" >> ${log_path}/commit_info.txt
python -m pip list >> ${log_path}/commit_info.txt
}

set_env() {
export NVIDIA_TF32_OVERRIDE=0
export FLAGS_cudnn_deterministic=1
export HF_ENDPOINT=https://hf-mirror.com
export FLAGS_use_cuda_managed_memory=true

# for CE
if [[ ${FLAGS_enable_CE} == "true" ]];then
export CE_TEST_ENV=1
export RUN_SLOW_TEST=1
export PYTHONPATH=${nlp_dir}:${nlp_dir}/llm:${PYTHONPATH}
fi
}

print_info() {
if [ $1 -ne 0 ]; then
cat ${log_path}/model_unittest.log | grep -v "Fail to fscanf: Success" \
| grep -v "SKIPPED" | grep -v "warning" > ${log_path}/model_unittest_FAIL.log
tail -n 1 ${log_path}/model_unittest.log >> ${log_path}/model_unittest_FAIL.log
echo -e "\033[31m ${log_path}/model_unittest_FAIL \033[0m"
cat ${log_path}/model_unittest_FAIL.log
if [ -n "${AGILE_JOB_BUILD_ID}" ]; then
cp ${log_path}/model_unittest_FAIL.log ${PPNLP_HOME}/upload/model_unittest_FAIL.log.${AGILE_PIPELINE_BUILD_ID}.${AGILE_JOB_BUILD_ID}
cd ${PPNLP_HOME} && python upload.py ${PPNLP_HOME}/upload 'paddlenlp/PaddleNLP_CI/PaddleNLP-CI-Model-Unittest-GPU'
rm -rf upload/* && cd -
fi
if [ $1 -eq 124 ]; then
echo "\033[32m [failed-timeout] Test case execution was terminated after exceeding the ${running_time} min limit."
fi
else
tail -n 1 ${log_path}/model_unittest.log
echo -e "\033[32m ${log_path}/model_unittest_SUCCESS \033[0m"
fi
}

get_diff_TO_case(){
export FLAGS_enable_CI=false
if [ -z "${AGILE_COMPILE_BRANCH}" ]; then
# Scheduled Regression Test
FLAGS_enable_CI=true
else
for file_name in `git diff --numstat ${AGILE_COMPILE_BRANCH} -- |awk '{print $NF}'`;do
ext="${file_name##*.}"
echo "file_name: ${file_name}, ext: ${file_name##*.}"

if [ ! -f ${file_name} ];then # Delete Files for a Pull Request
continue
elif [[ "$ext" == "md" || "$ext" == "rst" || "$file_name" == docs/* ]]; then
continue
else
FLAGS_enable_CI=true
fi
done
fi
}

get_diff_TO_case
set_env
if [[ ${FLAGS_enable_CI} == "true" ]] || [[ ${FLAGS_enable_CE} == "true" ]];then
install_requirements
cd ${nlp_dir}
echo ' Testing all model unittest cases '
unset http_proxy && unset https_proxy
set +e
echo "Check paddle Cuda Version"
python -c "import paddle; print(paddle.version.cuda()); print(paddle.version.cudnn()); print(paddle.is_compiled_with_cuda())"
echo "Check docker Cuda Version"
nvcc -V
cat /usr/local/cuda/version.txt
echo "Check nvidia-smi"
nvidia-smi
python -c "import paddle; print(paddle.device.device_count())"
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
PYTHONPATH=$(pwd) \
COVERAGE_SOURCE=paddleformers \
python -m pytest -s -v ${model_unittest_path} > ${log_path}/model_unittest.log 2>&1
exit_code=$?
print_info $exit_code model_unittest

if [ -n "${AGILE_JOB_BUILD_ID}" ]; then
cd ${nlp_dir}
echo -e "\033[35m ---- Generate Allure Report \033[0m"
unset http_proxy && unset https_proxy
cp ${nlp_dir}/scripts/unit_test/gen_allure_report.py ./
python gen_allure_report.py > /dev/null
echo -e "\033[35m ---- Report: https://xly.bce.baidu.com/ipipe/ipipe-report/report/${AGILE_JOB_BUILD_ID}/report/ \033[0m"
else
echo "AGILE_JOB_BUILD_ID is empty, skip generate allure report"
fi
else
echo -e "\033[32m Changed Not CI case, Skips \033[0m"
exit_code=0
fi
exit $exit_code
Loading
Loading