[ci] refactor ete testcase #5453
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: pr_ete_test | |
| on: | |
| pull_request: | |
| paths: | |
| - ".github/workflows/pr_ete_test.yml" | |
| - "cmake/**" | |
| - "src/**" | |
| - "autotest/**" | |
| - "3rdparty/**" | |
| - "lmdeploy/**" | |
| - "requirements/**" | |
| - "requirements_cuda.txt" | |
| - "CMakeLists.txt" | |
| - "setup.py" | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
| cancel-in-progress: true | |
| env: | |
| HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | |
| HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | |
| ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
| PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
| jobs: | |
| pr_functions_test: | |
| runs-on: [self-hosted, linux-a100-pr] | |
| timeout-minutes: 120 | |
| env: | |
| REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.head_ref }}_${{ github.run_id }} | |
| SERVER_LOG: /nvme/qa_test_models/server_log/${{ github.head_ref }}_${{ github.run_id }} | |
| container: | |
| image: openmmlab/lmdeploy:dev-cu12.8 | |
| options: --gpus all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never | |
| volumes: | |
| - /nvme/share_data/github-actions/pip-cache:/root/.cache/pip | |
| - /nvme/share_data/github-actions/packages:/root/packages | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /mnt/121:/mnt/121 | |
| - /mnt/104:/mnt/104 | |
| - /mnt/bigdisk:/mnt/bigdisk | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Clone repository | |
| uses: actions/checkout@v2 | |
| - name: Install lmdeploy | |
| run: | | |
| python3 -m pip install -r requirements/lite.txt | |
| python3 -m pip install -r requirements/test.txt | |
| python3 -m pip install -e . | |
| - name: Check env | |
| run: | | |
| python3 -m pip list | |
| lmdeploy check_env | |
| mkdir ${{env.REPORT_DIR}} -p | |
| mkdir ${{env.SERVER_LOG}} -p | |
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | |
| - name: Test lmdeploy - func | |
| run: | | |
| pytest autotest -m 'pr_test and gpu_num_2' -x --alluredir=${{env.REPORT_DIR}} --clean-alluredir | |
| pytest autotest -m 'pr_test and gpu_num_1' -n 2 -x --alluredir=${{env.REPORT_DIR}} | |
| - name: Update transformers | |
| run: | | |
| pip install transformers==4.57.3 | |
| - name: Test restful server - turbomind Qwen3-32B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=5,6 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-32B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - turbomind InternVL3-38B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=5,6 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - turbomind Qwen3-30B-A3B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=5,6 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client> ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - pytorch Qwen3-30B-A3B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=5,6 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --enable-return-routed-experts --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - pytorch Qwen3-VL-30B-A3B-Instruct | |
| run: | | |
| CUDA_VISIBLE_DEVICES=5,6 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-VL-30B-A3B-Instruct --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log | |
| exit 1 | |
| - name: Test restful server - pytorch InternVL3_5-30B-A3B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=5,6 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log | |
| exit 1 | |
| - name: Clear workfile | |
| if: always() | |
| run: | | |
| echo "status=done" >> ${{env.REPORT_DIR}}/status.txt | |
| export workdir=$(pwd) | |
| cd .. | |
| rm -rf $workdir | |
| mkdir $workdir | |
| chmod -R 777 $workdir |