daily_ete_test #850
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: daily_ete_test | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| repo_org: | |
| required: false | |
| description: 'Tested repository organization name. Default is InternLM' | |
| type: string | |
| default: 'InternLM/lmdeploy' | |
| repo_ref: | |
| required: false | |
| description: 'Set branch or tag or commit id. Default is "main"' | |
| type: string | |
| default: 'main' | |
| backend: | |
| required: true | |
| description: 'Set backend filter. Default is "["turbomind", "pytorch"]"' | |
| type: string | |
| default: "['turbomind', 'pytorch']" | |
| model: | |
| required: true | |
| description: 'Set testcase module filter: llm, mllm. Default contains all models' | |
| type: string | |
| default: "['llm','mllm']" | |
| function: | |
| required: true | |
| description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions' | |
| type: string | |
| default: '["pipeline", "restful", "chat"]' | |
| offline_mode: | |
| required: true | |
| description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself' | |
| type: boolean | |
| default: false | |
| regression_func: | |
| required: true | |
| description: 'regression functions' | |
| type: string | |
| default: "['quant', 'tools','restful','pipeline','benchmark','evaluation']" | |
| schedule: | |
| - cron: '00 14 * * 0-4' | |
| env: | |
| HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | |
| HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | |
| OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }} | |
| ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
| REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }} | |
| COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy | |
| FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}} | |
| TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }} | |
| OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy | |
| OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt | |
| DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL | |
| jobs: | |
| linux-build: | |
| if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}} | |
| strategy: | |
| matrix: | |
| pyver: [py310] | |
| runs-on: ubuntu-latest | |
| env: | |
| PYTHON_VERSION: ${{ matrix.pyver }} | |
| PLAT_NAME: manylinux2014_x86_64 | |
| DOCKER_TAG: cuda12.8 | |
| steps: | |
| - name: Free disk space | |
| uses: jlumbroso/free-disk-space@main | |
| with: | |
| # This might remove tools that are actually needed, if set to "true" but frees about 6 GB | |
| tool-cache: false | |
| docker-images: false | |
| # All of these default to true, but feel free to set to "false" if necessary for your workflow | |
| android: true | |
| dotnet: true | |
| haskell: true | |
| large-packages: true | |
| swap-storage: false | |
| - name: Checkout repository | |
| uses: actions/checkout@v3 | |
| with: | |
| repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
| ref: ${{github.event.inputs.repo_ref || 'main'}} | |
| - name: Build | |
| run: | | |
| echo ${PYTHON_VERSION} | |
| echo ${PLAT_NAME} | |
| echo ${DOCKER_TAG} | |
| echo ${OUTPUT_FOLDER} | |
| echo ${GITHUB_RUN_ID} | |
| # remove -it | |
| sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh | |
| bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} | |
| - name: Upload Artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| if-no-files-found: error | |
| path: builder/manywheel/${{ env.OUTPUT_FOLDER }} | |
| retention-days: 1 | |
| name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} | |
| download_pkgs: | |
| needs: linux-build | |
| if: ${{!cancelled()}} | |
| runs-on: [self-hosted, linux-a100] | |
| timeout-minutes: 50 | |
| container: | |
| image: openmmlab/lmdeploy:latest-cu12.8 | |
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
| volumes: | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Clone repository | |
| uses: actions/checkout@v2 | |
| if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
| with: | |
| repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
| ref: ${{github.event.inputs.repo_ref || 'main'}} | |
| - name: Copy repository | |
| if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
| run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}} | |
| - name: Copy repository - offline | |
| if: ${{inputs.offline_mode}} | |
| run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}} | |
| - name: Download Artifacts | |
| if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: my-artifact-${{ github.run_id }}-py310 | |
| - name: Copy Artifacts | |
| if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
| run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}} | |
| - name: Copy Artifacts - offline | |
| if: ${{inputs.offline_mode}} | |
| run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}} | |
| - name: Mark as start | |
| run: | | |
| chmod -R 777 ${{env.TEST_CODE_PATH}} | |
| mkdir ${{env.REPORT_DIR}} -p | |
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | |
| test_quantization: | |
| needs: download_pkgs | |
| if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}} | |
| runs-on: [self-hosted, linux-a100] | |
| timeout-minutes: 150 | |
| env: | |
| PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
| MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub | |
| MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules | |
| container: | |
| image: openmmlab/lmdeploy:latest-cu12.8 | |
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
| volumes: | |
| - /nvme/github-actions/pip-cache:/root/.cache/pip | |
| - /nvme/github-actions/packages:/root/packages | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /nvme/huggingface_hub:/nvme/huggingface_hub | |
| - /mnt/121:/mnt/121 | |
| - /mnt/104:/mnt/104 | |
| - /mnt/bigdisk:/mnt/bigdisk | |
| - /nvme/qa_test_models/lmdeploy/autotest:/local_case | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Copy repository and Artifacts | |
| run: | | |
| cp -r ${{env.TEST_CODE_PATH}}/. . | |
| mkdir ${{env.REPORT_DIR}} -p | |
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | |
| - name: Install lmdeploy - dependency | |
| run: | | |
| python3 -m pip install auto_gptq matplotlib attrdict | |
| python3 -m pip install -r requirements/lite.txt | |
| - name: Install lmdeploy | |
| run: | | |
| python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps | |
| python3 -m pip install -r requirements/test.txt | |
| rm -rf ${{env.DEEPSEEK_VL}}/build | |
| pip install ${{env.DEEPSEEK_VL}} --no-deps | |
| python3 -m pip install transformers==4.53.1 datasets==3.6.0 | |
| - name: Check env | |
| run: | | |
| python3 -m pip list | |
| lmdeploy check_env | |
| rm -rf allure-results | |
| # remove tmp log in testcase | |
| rm -rf /nvme/qa_test_models/autotest_model/log/* | |
| mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f | |
| ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
| - name: Test lmdeploy - quantization w4a16 | |
| continue-on-error: true | |
| if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind') | |
| run: | | |
| pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test lmdeploy - quantization w8a8 | |
| continue-on-error: true | |
| if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch') | |
| run: | | |
| pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Clear workfile | |
| if: always() | |
| run: | | |
| echo "status=done" >> ${{env.REPORT_DIR}}/status.txt | |
| chmod -R 777 $REPORT_DIR | |
| export workdir=$(pwd) | |
| cd .. | |
| rm -rf $workdir | |
| mkdir $workdir | |
| chmod -R 777 $workdir | |
| test_tools: | |
| if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}} | |
| runs-on: [self-hosted, linux-a100] | |
| needs: test_quantization | |
| timeout-minutes: 300 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}} | |
| model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}} | |
| function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}} | |
| exclude: | |
| - backend: turbomind | |
| model: mllm | |
| function: chat | |
| - backend: pytorch | |
| model: mllm | |
| function: chat | |
| include: | |
| - backend: turbomind | |
| model: llm | |
| function: local_case | |
| env: | |
| PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
| MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub | |
| MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules | |
| container: | |
| image: openmmlab/lmdeploy:latest-cu12.8 | |
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
| volumes: | |
| - /nvme/github-actions/pip-cache:/root/.cache/pip | |
| - /nvme/github-actions/packages:/root/packages | |
| - /nvme/github-actions/resources/lora:/root/lora | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /nvme/huggingface_hub:/nvme/huggingface_hub | |
| - /mnt/121:/mnt/121 | |
| - /mnt/104:/mnt/104 | |
| - /mnt/bigdisk:/mnt/bigdisk | |
| - /nvme/qa_test_models/lmdeploy/autotest:/local_case | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Copy repository and Artifacts | |
| run: | | |
| cp -r ${{env.TEST_CODE_PATH}}/. . | |
| mkdir ${{env.REPORT_DIR}} -p | |
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | |
| - name: Install lmdeploy - dependency | |
| run: | | |
| python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
| - name: Install lmdeploy | |
| run: | | |
| python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps | |
| python3 -m pip install -r requirements/test.txt | |
| rm -rf ${{env.DEEPSEEK_VL}}/build | |
| pip install ${{env.DEEPSEEK_VL}} --no-deps | |
| - name: Check env | |
| run: | | |
| python3 -m pip list | |
| lmdeploy check_env | |
| cp -r /root/lora . | |
| rm -rf allure-results | |
| # remove tmp log in testcase | |
| rm -rf /nvme/qa_test_models/autotest_model/log/* | |
| mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f | |
| ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
| - name: Test lmdeploy - chat | |
| continue-on-error: true | |
| if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat' | |
| run: | | |
| pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
| pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test lmdeploy - pipeline | |
| continue-on-error: true | |
| if: matrix.function == 'pipeline' | |
| run: | | |
| pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
| pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test lmdeploy - restful | |
| continue-on-error: true | |
| if: matrix.function == 'restful' | |
| run: | | |
| pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
| pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test lmdeploy - local testcase | |
| if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'local_case' | |
| run: | | |
| pytest autotest/toolchain --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| pytest /local_case/issue_regression --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}}|| true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Clear workfile | |
| if: always() | |
| run: | | |
| echo "status=done" >> ${{env.REPORT_DIR}}/status.txt | |
| chmod -R 777 $REPORT_DIR | |
| export workdir=$(pwd) | |
| cd .. | |
| rm -rf $workdir | |
| mkdir $workdir | |
| chmod -R 777 $workdir | |
| test_restful: | |
| if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}} | |
| runs-on: [self-hosted, linux-a100] | |
| needs: test_quantization | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}} | |
| model_path: ['internlm/Intern-S1', 'internlm/internlm2_5-20b-chat', 'internlm/internlm2_5-20b', 'Qwen/Qwen3-8B-Base', 'Qwen/Qwen3-30B-A3B', 'Qwen/Qwen3-32B', 'OpenGVLab/InternVL3_5-30B-A3B', 'OpenGVLab/InternVL3-38B', 'Qwen/Qwen3-VL-8B-Instruct', 'Qwen/Qwen3-VL-30B-A3B-Instruct'] | |
| include: | |
| - tp: 2 | |
| model: internlm2_5-20b-chat | |
| model_path: internlm/internlm2_5-20b-chat | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: base | |
| - tp: 2 | |
| model: internlm2_5-20b | |
| model_path: internlm/internlm2_5-20b | |
| case_info: ['completions_v1'] | |
| generate_type: base | |
| - tp: 2 | |
| model: Qwen3-8B-Base | |
| model_path: Qwen/Qwen3-8B-Base | |
| case_info: ['completions_v1'] | |
| generate_type: base | |
| - tp: 8 | |
| model: Intern-S1 | |
| model_path: internlm/Intern-S1 | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: base | |
| - tp: 2 | |
| model: Qwen3-30B-A3B | |
| model_path: Qwen/Qwen3-30B-A3B | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: all | |
| extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts' | |
| backend: pytorch | |
| - tp: 2 | |
| model: Qwen3-30B-A3B | |
| model_path: Qwen/Qwen3-30B-A3B | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: logprob | |
| extra: '--logprobs-mode raw_logprobs' | |
| backend: turbomind | |
| - tp: 2 | |
| model: InternVL3_5-30B-A3B | |
| model_path: OpenGVLab/InternVL3_5-30B-A3B | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: logprob | |
| extra: '--logprobs-mode raw_logprobs' | |
| - tp: 2 | |
| model: Qwen3-VL-30B-A3B-Instruct | |
| model_path: Qwen/Qwen3-VL-30B-A3B-Instruct | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: logprob | |
| extra: '--logprobs-mode raw_logprobs' | |
| - tp: 2 | |
| model: Qwen3-32B | |
| model_path: Qwen/Qwen3-32B | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: logprob | |
| extra: '--logprobs-mode raw_logprobs' | |
| - tp: 1 | |
| model: Qwen3-VL-8B-Instruct | |
| model_path: Qwen/Qwen3-VL-8B-Instruct | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: logprob | |
| extra: '--logprobs-mode raw_logprobs' | |
| - tp: 2 | |
| model: InternVL3-38B | |
| model_path: OpenGVLab/InternVL3-38B | |
| case_info: ['chat_completions_v1', 'generate'] | |
| generate_type: logprob | |
| extra: '--logprobs-mode raw_logprobs' | |
| timeout-minutes: 60 | |
| container: | |
| image: openmmlab/lmdeploy:latest-cu12.8 | |
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
| volumes: | |
| - /nvme/github-actions/pip-cache:/root/.cache/pip | |
| - /nvme/github-actions/packages:/root/packages | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /nvme/huggingface_hub:/nvme/huggingface_hub | |
| - /mnt/121:/mnt/121 | |
| - /mnt/104:/mnt/104 | |
| - /mnt/bigdisk:/mnt/bigdisk | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Copy repository and Artifacts | |
| run: | | |
| cp -r ${{env.TEST_CODE_PATH}}/. . | |
| mkdir ${{env.REPORT_DIR}} -p | |
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | |
| - name: Install lmdeploy - dependency | |
| run: | | |
| python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
| - name: Install lmdeploy | |
| run: | | |
| python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps | |
| python3 -m pip install -r requirements/test.txt | |
| - name: Check env | |
| run: | | |
| python3 -m pip list | |
| lmdeploy check_env | |
| rm -rf allure-results | |
| # remove tmp log in testcase | |
| rm -rf /nvme/qa_test_models/autotest_model/log/* | |
| mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f | |
| ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
| - name: Start restful api | |
| run: | | |
| lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} --allow-terminate-by-client > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| echo "health check success" | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 1 | |
| - name: Test lmdeploy - chat_completions_v1 | |
| if: matrix.model != 'internlm2_5-20b-chat' && matrix.model != 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1') | |
| timeout-minutes: 60 | |
| run: | | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test lmdeploy - chat_completions_v1 | |
| if: matrix.model == 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1') | |
| timeout-minutes: 60 | |
| run: | | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test lmdeploy - chat_completions_v1 - internlm2_5-20b-chat | |
| if: matrix.model == 'internlm2_5-20b-chat' && contains(matrix.case_info, 'chat_completions_v1') | |
| timeout-minutes: 60 | |
| run: | | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test lmdeploy - completions_v1 - internlm2_5-20b | |
| if: matrix.model == 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1') | |
| timeout-minutes: 60 | |
| run: | | |
| pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test lmdeploy - completions_v1 - other | |
| if: matrix.model != 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1') | |
| timeout-minutes: 60 | |
| run: | | |
| pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test generate - base | |
| if: matrix.generate_type == 'base' && contains(matrix.case_info, 'generate') | |
| timeout-minutes: 60 | |
| run: | | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not logprob and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test generate - logprob | |
| if: matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate') | |
| timeout-minutes: 60 | |
| run: | | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Test generate - all | |
| if: matrix.generate_type == 'all' && contains(matrix.case_info, 'generate') | |
| timeout-minutes: 60 | |
| run: | | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Kill api server | |
| if: always() | |
| run: | | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| - name: Clear workfile | |
| if: always() | |
| run: | | |
| echo "status=done" >> ${{env.REPORT_DIR}}/status.txt | |
| chmod -R 777 $REPORT_DIR | |
| export workdir=$(pwd) | |
| cd .. | |
| rm -rf $workdir | |
| mkdir $workdir | |
| chmod -R 777 $workdir | |
| test_pipeline: | |
| if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}} | |
| runs-on: [self-hosted, linux-a100] | |
| needs: test_quantization | |
| timeout-minutes: 240 | |
| container: | |
| image: openmmlab/lmdeploy:latest-cu12.8 | |
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
| volumes: | |
| - /nvme/github-actions/pip-cache:/root/.cache/pip | |
| - /nvme/github-actions/packages:/root/packages | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /nvme/huggingface_hub:/nvme/huggingface_hub | |
| - /mnt/121:/mnt/121 | |
| - /mnt/104:/mnt/104 | |
| - /mnt/bigdisk:/mnt/bigdisk | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Copy repository and Artifacts | |
| run: | | |
| cp -r ${{env.TEST_CODE_PATH}}/. . | |
| mkdir ${{env.REPORT_DIR}} -p | |
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | |
| - name: Install lmdeploy - dependency | |
| run: | | |
| python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
| - name: Install lmdeploy | |
| run: | | |
| python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps | |
| python3 -m pip install -r requirements/test.txt | |
| rm -rf ${{env.DEEPSEEK_VL}}/build | |
| pip install ${{env.DEEPSEEK_VL}} --no-deps | |
| - name: Check env | |
| run: | | |
| python3 -m pip list | |
| lmdeploy check_env | |
| rm -rf allure-results | |
| # remove tmp log in testcase | |
| rm -rf /nvme/qa_test_models/autotest_model/log/* | |
| mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f | |
| ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
| - name: Test lmdeploy - interface pipeline case | |
| run: | | |
| pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
| pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
| pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
| pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_8 and not pr_test' -n 1 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Clear workfile | |
| if: always() | |
| run: | | |
| echo "status=done" >> ${{env.REPORT_DIR}}/status.txt | |
| chmod -R 777 $REPORT_DIR | |
| export workdir=$(pwd) | |
| cd .. | |
| rm -rf $workdir | |
| mkdir $workdir | |
| chmod -R 777 $workdir | |
| test_benchmark: | |
| if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}} | |
| runs-on: [self-hosted, linux-a100] | |
| needs: test_quantization | |
| timeout-minutes: 120 | |
| container: | |
| image: openmmlab/lmdeploy:latest-cu12.8 | |
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
| volumes: | |
| - /nvme/github-actions/pip-cache:/root/.cache/pip | |
| - /nvme/github-actions/packages:/root/packages | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /nvme/huggingface_hub:/nvme/huggingface_hub | |
| - /mnt/121:/mnt/121 | |
| - /mnt/104:/mnt/104 | |
| - /mnt/bigdisk:/mnt/bigdisk | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Copy repository and Artifacts | |
| run: | | |
| cp -r ${{env.TEST_CODE_PATH}}/. . | |
| mkdir ${{env.REPORT_DIR}} -p | |
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | |
| - name: Install lmdeploy - dependency | |
| run: | | |
| python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
| - name: Install lmdeploy | |
| run: | | |
| python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps | |
| python3 -m pip install -r requirements/test.txt | |
| rm -rf ${{env.DEEPSEEK_VL}}/build | |
| pip install ${{env.DEEPSEEK_VL}} --no-deps | |
| - name: Check env | |
| run: | | |
| python3 -m pip list | |
| lmdeploy check_env | |
| rm -rf allure-results | |
| # remove tmp log in testcase | |
| rm -rf /nvme/qa_test_models/autotest_model/log/* | |
| mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f | |
| ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
| - name: Test benchmark script | |
| run: | | |
| pytest autotest/benchmark -n 4 --run_id ${{ github.run_id }} -m function ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
| - name: Clear workfile | |
| if: always() | |
| run: | | |
| echo "status=done" >> ${{env.REPORT_DIR}}/status.txt | |
| chmod -R 777 $REPORT_DIR | |
| chmod -R 777 /nvme/qa_test_models/benchmark-reports/${{ github.run_id }} | |
| export workdir=$(pwd) | |
| cd .. | |
| rm -rf $workdir | |
| mkdir $workdir | |
| chmod -R 777 $workdir | |
| get_benchmark_result: | |
| if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}} | |
| needs: [test_benchmark] | |
| timeout-minutes: 5 | |
| runs-on: [self-hosted, linux-a100] | |
| container: | |
| image: openmmlab/lmdeploy:latest-cu12.8 | |
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
| volumes: | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| env: | |
| BENCHMARK_REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }} | |
| steps: | |
| - name: Clone repository | |
| uses: actions/checkout@v2 | |
| with: | |
| repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
| ref: ${{github.event.inputs.repo_ref || 'main'}} | |
| - name: Get overview | |
| run: | | |
| pip install pandas fire mmengine | |
| python3 .github/scripts/action_tools.py generate_benchmark_report $BENCHMARK_REPORT_DIR | |
| get_coverage_report: | |
| if: ${{!cancelled()}} | |
| runs-on: [self-hosted, linux-a100] | |
| needs: [test_tools, test_restful, test_pipeline, test_benchmark] | |
| timeout-minutes: 5 | |
| container: | |
| image: openmmlab/lmdeploy:latest-cu12.8 | |
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
| volumes: | |
| - /nvme/github-actions/pip-cache:/root/.cache/pip | |
| - /nvme/github-actions/packages:/root/packages | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Copy repository and Artifacts | |
| run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
| - name: Install lmdeploy | |
| run: | | |
| echo "status=done" >> ${{env.REPORT_DIR}}/status.txt | |
| python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps | |
| python3 -m pip install -r requirements/test.txt | |
| - name: Get coverage report | |
| run: | | |
| pip install coverage | |
| coverage combine ${{env.REPORT_DIR}} | |
| coverage xml -o ${{env.REPORT_DIR}}/coverage.xml | |
| coverage report -m | |
| mv .coverage ${{env.REPORT_DIR}}/.coverage | |
| - name: Clear workfile | |
| if: always() | |
| run: | | |
| chmod -R 777 $REPORT_DIR | |
| export workdir=$(pwd) | |
| cd .. | |
| rm -rf $workdir | |
| mkdir $workdir | |
| chmod -R 777 $workdir | |
| notify_to_feishu: | |
| if: always() && !cancelled() && (github.ref_name == 'develop' || github.ref_name == 'main') | |
| needs: [get_benchmark_result, get_coverage_report] | |
| timeout-minutes: 5 | |
| runs-on: [self-hosted, linux-a100] | |
| steps: | |
| - name: notify | |
| if: contains(needs.*.result, 'failure') | |
| run: | | |
| curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test finished!!!","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }} |