diff --git a/.github/workflows/distribute-a100.yml b/.github/workflows/distribute-a100.yml new file mode 100644 index 000000000000..cf8343cc0246 --- /dev/null +++ b/.github/workflows/distribute-a100.yml @@ -0,0 +1,180 @@ +name: Distribute CI (A100) + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: [develop] + schedule: + - cron: "1 0 * * *" + workflow_call: + inputs: + run_downstream: + required: true + type: string + image_name: + required: true + type: string + + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +env: + PR_ID: ${{ github.event.pull_request.number }} + COMMIT_ID: ${{ github.event.pull_request.head.sha }} + TASK: paddlenlp-CI-${{ github.event.pull_request.number }}-Distribut-A100 + ci_scripts: /workspace/PaddleNLP/scripts/distribute + BRANCH: ${{ github.event.pull_request.base.ref }} + AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }} + CI_name: distribute-ci + no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn" + GITHUB_EVENT_NAME: ${{ github.event_name }} + RUN_DOWNSTREAM: ${{ inputs.run_downstream }} + +defaults: + run: + shell: bash + +jobs: + distribute-a100-ci: + name: distribute-a100-ci + runs-on: + group: Distribute + steps: + - name: Determine Image Name + env: + IMAGE_NAME: ${{ inputs.image_name }} + run: | + if [[ -n "${IMAGE_NAME}" ]]; then + echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV" + else + echo "IMAGE_NAME=registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82" >> "$GITHUB_ENV" + fi + + - name: Run Container + env: + work_dir: ${{ github.workspace }} + CACHE_DIR: /home/data/cfs/.cache + FLAGS_dynamic_static_unified_comm: "True" + FLAGS_dataloader_use_file_descriptor: "False" + python_version: "3.10" + paddle_whl: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuSome-LinuxCentos-Gcc82-Cuda118-Cudnn86-Trt85-Py310-CINN-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + run: | + container_name=${TASK}-$(date +%Y%m%d-%H%M%S) + echo "container_name=${container_name}" >> "$GITHUB_ENV" + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + export CUDA_SO="$(\ls -d /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls -d /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + export DEVICES="$(\ls -d /dev/nvidia* | xargs -I{} echo "-v {}:{}") $(\ls /dev/nvidia-caps/* | xargs -I{} echo "-v {}:{}")" + export SMI="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi" + docker run -d -t --name ${container_name} ${CUDA_SO} ${DEVICES} ${SMI} --runtime=nvidia --shm-size=32G \ + --network host -v /dev/shm:/dev/shm \ + -v $work_dir/../../..:$work_dir/../../.. \ + -v $work_dir:/workspace \ + -v /home/.cache/pip:/home/.cache/pip \ + -v /home/FleetX_CI:/fleetx_data \ + -v /home/Llm_gpt_CI:/llm_gpt_data \ + -v /home/Llama_CI:/llama_data \ + -e BRANCH \ + -e AGILE_COMPILE_BRANCH \ + -e PR_ID \ + -e COMMIT_ID \ + -e work_dir \ + -e ci_scripts \ + -e no_proxy \ + -e CI_name \ + -e paddle_whl \ + -e FLAGS_dynamic_static_unified_comm \ + -e FLAGS_dataloader_use_file_descriptor \ + -e python_version \ + -w /workspace $IMAGE_NAME + fi + + - name: Download Code + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping.." + else + docker exec -t $container_name /bin/bash -c ' + rm -rf * .[^.]* + echo "Downloading PaddleNLP.tar.gz" + wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleNLP.tar --no-check-certificate + echo "Extracting PaddleNLP.tar.gz" + tar xf PaddleNLP.tar && rm -rf PaddleNLP.tar + source $work_dir/../../../proxy + cd PaddleNLP + git config --global user.name "PaddleCI" + git config --global user.email "paddle_ci@example.com" + git pull + git submodule update --init --recursive --force + if [ -n "${PR_ID}" ]; then + git fetch origin pull/${PR_ID}/head + git checkout -b PR_${PR_ID} FETCH_HEAD + git remote add upstream https://github.com/PaddlePaddle/PaddleFormers.git + git fetch upstream ${BRANCH} + git merge ${BRANCH} --no-edit + git diff --numstat ${BRANCH} -- | awk "{print \$NF}" + else + echo "Not in a pull_request event. Skipping PR-specific operations." + fi + git log --pretty=oneline -10 + ' + fi + + - name: Test + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker exec -t $container_name /bin/bash -c ' + ldconfig + ln -sf $(which python${python_version}) /usr/bin/python + pip config set global.cache-dir "/home/.cache/pip" + source $work_dir/../../../proxy + set -e + cd /workspace/PaddleNLP && git config --global --add safe.directory $PWD + timeout 80m bash scripts/distribute/run_ci.sh ${paddle_whl} + ' + fi + + - name: Upload Logs + if: always() + env: + home_path: ${{ github.workspace }}/.. + bos_file: ${{ github.workspace }}/../bos_retry/BosClient.py + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker exec -t $container_name /bin/bash -c ' + unset http_proxy && unset https_proxy + if [ ! -f "${{ env.bos_file }}" ]; then + wget -q --no-proxy -O ${{ env.home_path }}/bos_retry.tar.gz https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate + mkdir ${{ env.home_path }}/bos_retry + tar xf ${{ env.home_path }}/bos_retry.tar.gz -C ${{ env.home_path }}/bos_retry + fi + + if [[ "${{ env.RUN_DOWNSTREAM }}" == "" && -n "${PR_ID}" ]]; then + bos_prefix="${PR_ID}/${COMMIT_ID}" + elif [[ "${{ env.RUN_DOWNSTREAM }}" == "true" && -n "${PR_ID}" ]]; then + bos_prefix="${PR_ID}/${COMMIT_ID}/test_build" + else + bos_prefix="schedule/$(date +%Y%m%d)" + fi + + cd /workspace/case_logs + for FILE in /workspace/case_logs/*; do + file=$(basename "$FILE") + python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleNLP/distribute-a100/${bos_prefix}/logs + echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/distribute-a100/${bos_prefix}/logs/$file" + done + ' + fi + + - name: Terminate And Delete the Container + if: always() + run: | + docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*' + docker rm -f $container_name 2>/dev/null || true \ No newline at end of file diff --git a/.github/workflows/llm.yml b/.github/workflows/llm.yml new file mode 100644 index 000000000000..87d19f88f2a6 --- /dev/null +++ b/.github/workflows/llm.yml @@ -0,0 +1,206 @@ +name: LLM CI + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: [develop] + schedule: + - cron: "2 0 * * *" + workflow_call: + inputs: + run_downstream: + required: true + type: string + image_name: + required: true + type: string + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +env: + PR_ID: ${{ github.event.pull_request.number }} + COMMIT_ID: ${{ github.event.pull_request.head.sha }} + TASK: paddlenlp-CI-${{ github.event.pull_request.number }}-llm + ci_scripts: /workspace/PaddleNLP/scripts/regression + BRANCH: ${{ github.event.pull_request.base.ref }} + AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }} + CI_name: llm-ci + no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn" + HF_ENDPOINT: https://hf-mirror.com + STUDIO_GIT_HOST: http://git.prod.idc-to-cloud.aistudio.baidu-int.com + PPNLP_HOME: /ssd1/paddlenlp + HF_DATASETS_CACHE: /ssd1/paddlenlp/huggingface/datasets + TRANSFORMERS_CACHE: /ssd1/paddlenlp/huggingface + CCACHE_DIR: /home/data/gzcfs/.ccache/gpubox + RUN_DOWNSTREAM: ${{ inputs.run_downstream }} + +defaults: + run: + shell: bash + +jobs: + llm-ci: + name: llm-ci + runs-on: [self-hosted, ernie-8gpu] + steps: + - name: Determine Image Name + env: + IMAGE_NAME: ${{ inputs.image_name }} + run: | + if [[ -n "${IMAGE_NAME}" ]]; then + echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV" + else + echo "IMAGE_NAME=iregistry.baidu-int.com/paddlecloud/base-images:paddlecloud-ubuntu18.04-gcc8.2-cuda11.8-cudnn8.6-nccl2.15.5-paddlenlp-latest" >> "$GITHUB_ENV" + fi + + - name: Run Container + env: + work_dir: ${{ github.workspace }} + CACHE_DIR: /home/data/cfs/.cache + FLAGS_dynamic_static_unified_comm: "True" + python_version: "3.10" + paddle_whl: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuSome-LinuxCentos-Gcc82-Cuda118-Cudnn86-Trt85-Py310-CINN-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + run: | + container_name=${TASK}-$(date +%Y%m%d-%H%M%S) + echo "container_name=${container_name}" >> "$GITHUB_ENV" + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker run -d -t --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \ + -v $work_dir/../../..:$work_dir/../../.. \ + -v $work_dir:/workspace \ + -v /home/.cache/pip:/home/.cache/pip \ + -v /ssd1/paddlenlp:/ssd1/paddlenlp \ + -v /home/data/gzcfs/.ccache/gpubox:/home/data/gzcfs/.ccache/gpubox \ + -e BRANCH \ + -e AGILE_COMPILE_BRANCH \ + -e PR_ID \ + -e COMMIT_ID \ + -e work_dir \ + -e ci_scripts \ + -e no_proxy \ + -e CI_name \ + -e paddle_whl \ + -e HF_ENDPOINT \ + -e STUDIO_GIT_HOST \ + -e PPNLP_HOME \ + -e HF_DATASETS_CACHE \ + -e TRANSFORMERS_CACHE \ + -e CACHE_DIR \ + -e FLAGS_dynamic_static_unified_comm \ + -e python_version \ + -w /workspace --runtime=nvidia $IMAGE_NAME + fi + + - name: Download Code + env: + work_dir: ${{ github.workspace }} + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping.." + else + docker exec -t $container_name /bin/bash -c ' + rm -rf * .[^.]* + echo "Downloading PaddleNLP.tar.gz" + wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleNLP.tar --no-check-certificate + echo "Extracting PaddleNLP.tar.gz" + tar xf PaddleNLP.tar && rm -rf PaddleNLP.tar + source $work_dir/../../../proxy + cd PaddleNLP + git config --global user.name "PaddleCI" + git config --global user.email "paddle_ci@example.com" + git pull + git submodule update --init --recursive --force + if [ -n "${PR_ID}" ]; then + git fetch origin pull/${PR_ID}/head + git checkout -b PR_${PR_ID} FETCH_HEAD + git remote add upstream https://github.com/PaddlePaddle/PaddleNLP.git + git fetch upstream ${BRANCH} + git merge ${BRANCH} --no-edit + git diff --numstat ${BRANCH} -- | awk "{print \$NF}" + else + echo "Not in a pull_request event. Skipping PR-specific operations." + fi + git log --pretty=oneline -10 + ' + fi + + - name: Skip For Bug + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker exec -t $container_name /bin/bash -c ' + cd /workspace/PaddleNLP + git revert f2477c07272d04244cd3287d1f21c70482a4a85f --no-edit # 套件PR#10413引入bug-待修复 + git revert 3e9d3518cbecd8357cec14f059776272713d5c62 --no-edit # 套件PR#10912引入bug-待修复 + # rm -rf tests/llm/test_grpo.py tests/llm/test_reinforce_plus_plus.py + ' + fi + + - name: Test + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker exec -t $container_name /bin/bash -c ' + ldconfig + unlink /usr/bin/python3 + ln -sf $(which python${python_version}) /usr/bin/python3 + pip config set global.cache-dir "/home/.cache/pip" + set -e + source $work_dir/../../../proxy + cd /workspace/PaddleNLP && git config --global --add safe.directory $PWD + export paddle_whl=https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuSome-LinuxCentos-Gcc82-Cuda118-Cudnn86-Trt85-Py310-CINN-Compile/8ae7423e99b2ea96e410968a0ebb3f1795e37205/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl # 需要套件侧适配Paddle#73283 + timeout 2h bash scripts/regression/run_ci.sh python${python_version} ${paddle_whl} + ' + fi + + - name: Upload Allure-reports & Logs + if: always() + env: + home_path: ${{ github.workspace }}/../../.. + bos_file: ${{ github.workspace }}/../../../bos/BosClient.py + allure_file: ${{ github.workspace }}/../../../allure-2.19.0/bin/allure + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker exec -t $container_name /bin/bash -c ' + unset http_proxy && unset https_proxy + if [ ! -f "${{ env.bos_file }}" ]; then + wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate + mkdir ${{ env.home_path }}/bos + tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos + fi + if [ ! -f "${{ env.allure_file }}" ]; then + wget -q --no-proxy -O ${{ env.home_path }}/allure-2.19.0.zip https://xly-devops.bj.bcebos.com/tools/allure-2.19.0.zip --no-check-certificate + unzip -q ${{ env.home_path }}/allure-2.19.0.zip -d ${{ env.home_path }}/ + fi + if [[ "${{ env.RUN_DOWNSTREAM }}" == "" && -n "${PR_ID}" ]]; then + bos_prefix="${PR_ID}/${COMMIT_ID}" + elif [[ "${{ env.RUN_DOWNSTREAM }}" == "true" && -n "${PR_ID}" ]]; then + bos_prefix="${PR_ID}/${COMMIT_ID}/test_build" + else + bos_prefix="schedule/$(date +%Y%m%d)" + fi + cd /workspace/PaddleNLP/model_logs + for FILE in /workspace/PaddleNLP/model_logs/*; do + file=$(basename "$FILE") + python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleNLP/llm/${bos_prefix}/logs + echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/llm/${bos_prefix}/logs/$file" + done + cd /workspace/PaddleNLP/ + ${{ env.allure_file }} generate result -o report + tar -czf products.tar.gz report model_logs + python ${{ env.bos_file }} products.tar.gz paddle-github-action/PR/PaddleNLP/llm/${bos_prefix}/logs + echo "products: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/llm/${bos_prefix}/logs/products.tar.gz" + ' + fi + + - name: Terminate And Delete the Container + if: always() + run: | + docker rm -f ${{ env.container_name }} 2>/dev/null || true \ No newline at end of file diff --git a/.github/workflows/unittest-gpu.yml b/.github/workflows/unittest-gpu.yml new file mode 100644 index 000000000000..44536184b000 --- /dev/null +++ b/.github/workflows/unittest-gpu.yml @@ -0,0 +1,203 @@ +name: Unittest GPU CI + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: [develop] + schedule: + - cron: "3 0 * * *" + workflow_call: + inputs: + run_downstream: + required: true + type: string + image_name: + required: true + type: string + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +env: + PR_ID: ${{ github.event.pull_request.number }} + COMMIT_ID: ${{ github.event.pull_request.head.sha }} + TASK: paddlenlp-CI-${{ github.event.pull_request.number }}-unit-gpu + ci_scripts: /workspace/PaddleNLP/scripts/unit_test + BRANCH: ${{ github.event.pull_request.base.ref }} + AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }} + CI_name: unittest-gpu-ci + no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn" + HF_ENDPOINT: https://hf-mirror.com + STUDIO_GIT_HOST: http://git.prod.idc-to-cloud.aistudio.baidu-int.com + PPNLP_HOME: /ssd1/paddlenlp + HF_DATASETS_CACHE: /ssd1/paddlenlp/huggingface/datasets + TRANSFORMERS_CACHE: /ssd1/paddlenlp/huggingface + CCACHE_DIR: /home/data/gzcfs/.ccache/gpubox + RUN_DOWNSTREAM: ${{ inputs.run_downstream }} + +defaults: + run: + shell: bash + +jobs: + unittest-gpu-ci: + name: unittest-gpu-ci + runs-on: [self-hosted, ernie-8gpu] + steps: + - name: Determine Image Name + env: + IMAGE_NAME: ${{ inputs.image_name }} + run: | + if [[ -n "${IMAGE_NAME}" ]]; then + echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV" + else + echo "IMAGE_NAME=iregistry.baidu-int.com/paddlecloud/base-images:paddlecloud-ubuntu18.04-gcc8.2-cuda11.8-cudnn8.6-nccl2.15.5-paddlenlp-latest" >> "$GITHUB_ENV" + fi + + - name: Run Container + env: + work_dir: ${{ github.workspace }} + CACHE_DIR: /home/data/cfs/.cache + FLAGS_dynamic_static_unified_comm: "True" + python_version: "3.10" + paddle_whl: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuSome-LinuxCentos-Gcc82-Cuda118-Cudnn86-Trt85-Py310-CINN-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + run: | + container_name=${TASK}-$(date +%Y%m%d-%H%M%S) + echo "container_name=${container_name}" >> "$GITHUB_ENV" + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker run -d -t --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \ + -v $work_dir/../../..:$work_dir/../../.. \ + -v $work_dir:/workspace \ + -v /home/.cache/pip:/home/.cache/pip \ + -v /ssd1/paddlenlp:/ssd1/paddlenlp \ + -v /home/data/gzcfs/.ccache/gpubox:/home/data/gzcfs/.ccache/gpubox \ + -e BRANCH \ + -e AGILE_COMPILE_BRANCH \ + -e PR_ID \ + -e COMMIT_ID \ + -e work_dir \ + -e ci_scripts \ + -e no_proxy \ + -e CI_name \ + -e paddle_whl \ + -e HF_ENDPOINT \ + -e STUDIO_GIT_HOST \ + -e PPNLP_HOME \ + -e HF_DATASETS_CACHE \ + -e TRANSFORMERS_CACHE \ + -e CACHE_DIR \ + -e FLAGS_dynamic_static_unified_comm \ + -e python_version \ + -w /workspace --runtime=nvidia $IMAGE_NAME + fi + + - name: Download Code + env: + work_dir: ${{ github.workspace }} + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping.." + else + docker exec -t $container_name /bin/bash -c ' + rm -rf * .[^.]* + echo "Downloading PaddleNLP.tar.gz" + wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleNLP.tar --no-check-certificate + echo "Extracting PaddleNLP.tar.gz" + tar xf PaddleNLP.tar && rm -rf PaddleNLP.tar + source $work_dir/../../../proxy + cd PaddleNLP + git config --global user.name "PaddleCI" + git config --global user.email "paddle_ci@example.com" + git pull + git submodule update --init --recursive --force + if [ -n "${PR_ID}" ]; then + git fetch origin pull/${PR_ID}/head + git checkout -b PR_${PR_ID} FETCH_HEAD + git remote add upstream https://github.com/PaddlePaddle/PaddleNLP.git + git fetch upstream ${BRANCH} + git merge ${BRANCH} --no-edit + git diff --numstat ${BRANCH} -- | awk "{print \$NF}" + else + echo "Not in a pull_request event. Skipping PR-specific operations." + fi + git log --pretty=oneline -10 + ' + fi + + - name: Skip For Bug + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker exec -t $container_name /bin/bash -c ' + cd /workspace/PaddleNLP + rm -rf tests/utils/test_aistudio_download.py + ' + fi + + - name: Test + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker exec -t $container_name /bin/bash -c ' + ldconfig + unlink /usr/bin/python3 + ln -sf $(which python${python_version}) /usr/bin/python3 + pip config set global.cache-dir "/home/.cache/pip" + source $work_dir/../../../proxy + set -e + cd /workspace/PaddleNLP && git config --global --add safe.directory $PWD + timeout 50m bash scripts/unit_test/ci_unit.sh ${paddle_whl} + ' + fi + + - name: Upload Allure-reports & Logs + if: always() + env: + home_path: ${{ github.workspace }}/../../.. + bos_file: ${{ github.workspace }}/../../../bos/BosClient.py + allure_file: ${{ github.workspace }}/../../../allure-2.19.0/bin/allure + run: | + if [[ "$RUN_DOWNSTREAM" == "false" ]]; then + echo "Not in a pull_request or test_build event. Skipping..." + else + docker exec -t $container_name /bin/bash -c ' + unset http_proxy && unset https_proxy + if [ ! -f "${{ env.bos_file }}" ]; then + wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate + mkdir ${{ env.home_path }}/bos + tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos + fi + if [ ! -f "${{ env.allure_file }}" ]; then + wget -q --no-proxy -O ${{ env.home_path }}/allure-2.19.0.zip https://xly-devops.bj.bcebos.com/tools/allure-2.19.0.zip --no-check-certificate + unzip -q ${{ env.home_path }}/allure-2.19.0.zip -d ${{ env.home_path }}/ + fi + if [[ "${{ env.RUN_DOWNSTREAM }}" == "" && -n "${PR_ID}" ]]; then + bos_prefix="${PR_ID}/${COMMIT_ID}" + elif [[ "${{ env.RUN_DOWNSTREAM }}" == "true" && -n "${PR_ID}" ]]; then + bos_prefix="${PR_ID}/${COMMIT_ID}/test_build" + else + bos_prefix="schedule/$(date +%Y%m%d)" + fi + cd /workspace/PaddleNLP/unittest_logs + for FILE in /workspace/PaddleNLP/unittest_logs/*; do + file=$(basename "$FILE") + python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleNLP/unittest-gpu/${bos_prefix}/logs + echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/unittest-gpu/${bos_prefix}/logs/$file" + done + cd /workspace/PaddleNLP/ + ${{ env.allure_file }} generate result -o report + tar -czf products.tar.gz report unittest_logs + python ${{ env.bos_file }} products.tar.gz paddle-github-action/PR/PaddleNLP/unittest-gpu/${bos_prefix}/logs + echo "report: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/unittest-gpu/${bos_prefix}/logs/products.tar.gz" + ' + fi + + - name: Terminate And Delete the Container + if: always() + run: | + docker rm -f $container_name 2>/dev/null || true \ No newline at end of file diff --git a/tests/llm/test_grpo.py b/tests/llm/test_grpo.py index 92dd919f06d5..dd58d6c89b20 100644 --- a/tests/llm/test_grpo.py +++ b/tests/llm/test_grpo.py @@ -92,7 +92,7 @@ def test_grpo(self): # 运行主逻辑 cmd = 'python -u -m paddle.distributed.launch \ --devices "$CUDA_VISIBLE_DEVICES" run_rl.py \ - ../../config/qwen/reinforce_plus_plus_argument.yaml \ + ../../config/qwen/grpo_argument.yaml \ --actor_model_name_or_path "Qwen/Qwen2-1.5B" \ --max_dec_len 128 \ --max_steps 3 \ diff --git a/tests/llm/test_reinforce_plus_plus.py b/tests/llm/test_reinforce_plus_plus.py index 8e74e663d886..b0f1e332c64a 100644 --- a/tests/llm/test_reinforce_plus_plus.py +++ b/tests/llm/test_reinforce_plus_plus.py @@ -92,7 +92,7 @@ def test_reinforce_plus_plus(self): # 运行主逻辑 cmd = 'python -u -m paddle.distributed.launch \ --devices "$CUDA_VISIBLE_DEVICES" run_rl.py \ - ../../config/qwen/reinforce_plus_plus_argument.yaml \ + ../../config/qwen/grpo_argument.yaml \ --rl_algorithm "reinforce_plus_plus" \ --actor_model_name_or_path "Qwen/Qwen2-1.5B" \ --max_dec_len 128 \