daily_ete_test #850

Workflow file for this run

.github/workflows/daily_ete_test.yml at 6aa9a4c

	name: daily_ete_test

	on:
	workflow_dispatch:
	inputs:
	repo_org:
	required: false
	description: 'Tested repository organization name. Default is InternLM'
	type: string
	default: 'InternLM/lmdeploy'
	repo_ref:
	required: false
	description: 'Set branch or tag or commit id. Default is "main"'
	type: string
	default: 'main'
	backend:
	required: true
	description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
	type: string
	default: "['turbomind', 'pytorch']"
	model:
	required: true
	description: 'Set testcase module filter: llm, mllm. Default contains all models'
	type: string
	default: "['llm','mllm']"
	function:
	required: true
	description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
	type: string
	default: '["pipeline", "restful", "chat"]'
	offline_mode:
	required: true
	description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
	type: boolean
	default: false
	regression_func:
	required: true
	description: 'regression functions'
	type: string
	default: "['quant', 'tools','restful','pipeline','benchmark','evaluation']"
	schedule:
	- cron: '00 14 * * 0-4'

	env:
	HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
	HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
	OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
	ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
	REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }}
	COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
	FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' \|\| '--lf'}}
	TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}
	OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
	OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
	DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL

	jobs:
	linux-build:
	if: ${{!cancelled() && (github.event_name == 'schedule' \|\| !inputs.offline_mode)}}
	strategy:
	matrix:
	pyver: [py310]
	runs-on: ubuntu-latest
	env:
	PYTHON_VERSION: ${{ matrix.pyver }}
	PLAT_NAME: manylinux2014_x86_64
	DOCKER_TAG: cuda12.8
	steps:
	- name: Free disk space
	uses: jlumbroso/free-disk-space@main
	with:
	# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
	tool-cache: false
	docker-images: false
	# All of these default to true, but feel free to set to "false" if necessary for your workflow
	android: true
	dotnet: true
	haskell: true
	large-packages: true
	swap-storage: false
	- name: Checkout repository
	uses: actions/checkout@v3
	with:
	repository: ${{ github.event.inputs.repo_org \|\| 'InternLM/lmdeploy' }}
	ref: ${{github.event.inputs.repo_ref \|\| 'main'}}
	- name: Build
	run: \|
	echo ${PYTHON_VERSION}
	echo ${PLAT_NAME}
	echo ${DOCKER_TAG}
	echo ${OUTPUT_FOLDER}
	echo ${GITHUB_RUN_ID}
	# remove -it
	sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
	bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
	- name: Upload Artifacts
	uses: actions/upload-artifact@v4
	with:
	if-no-files-found: error
	path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
	retention-days: 1
	name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}


	download_pkgs:
	needs: linux-build
	if: ${{!cancelled()}}
	runs-on: [self-hosted, linux-a100]
	timeout-minutes: 50
	container:
	image: openmmlab/lmdeploy:latest-cu12.8
	options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
	volumes:
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	steps:
	- name: Clone repository
	uses: actions/checkout@v2
	if: ${{github.event_name == 'schedule' \|\| !inputs.offline_mode}}
	with:
	repository: ${{ github.event.inputs.repo_org \|\| 'InternLM/lmdeploy' }}
	ref: ${{github.event.inputs.repo_ref \|\| 'main'}}
	- name: Copy repository
	if: ${{github.event_name == 'schedule' \|\| !inputs.offline_mode}}
	run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
	- name: Copy repository - offline
	if: ${{inputs.offline_mode}}
	run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
	- name: Download Artifacts
	if: ${{github.event_name == 'schedule' \|\| !inputs.offline_mode}}
	uses: actions/download-artifact@v4
	with:
	name: my-artifact-${{ github.run_id }}-py310
	- name: Copy Artifacts
	if: ${{github.event_name == 'schedule' \|\| !inputs.offline_mode}}
	run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-.whl -f && cp lmdeploy-.whl ${{env.TEST_CODE_PATH}}
	- name: Copy Artifacts - offline
	if: ${{inputs.offline_mode}}
	run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-.whl ${{env.TEST_CODE_PATH}}
	- name: Mark as start
	run: \|
	chmod -R 777 ${{env.TEST_CODE_PATH}}
	mkdir ${{env.REPORT_DIR}} -p
	echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt

	test_quantization:
	needs: download_pkgs
	if: ${{!cancelled() && (github.event_name == 'schedule' \|\| contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
	runs-on: [self-hosted, linux-a100]
	timeout-minutes: 150
	env:
	PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
	MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
	MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
	container:
	image: openmmlab/lmdeploy:latest-cu12.8
	options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
	volumes:
	- /nvme/github-actions/pip-cache:/root/.cache/pip
	- /nvme/github-actions/packages:/root/packages
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /nvme/huggingface_hub:/nvme/huggingface_hub
	- /mnt/121:/mnt/121
	- /mnt/104:/mnt/104
	- /mnt/bigdisk:/mnt/bigdisk
	- /nvme/qa_test_models/lmdeploy/autotest:/local_case
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	steps:
	- name: Copy repository and Artifacts
	run: \|
	cp -r ${{env.TEST_CODE_PATH}}/. .
	mkdir ${{env.REPORT_DIR}} -p
	echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
	- name: Install lmdeploy - dependency
	run: \|
	python3 -m pip install auto_gptq matplotlib attrdict
	python3 -m pip install -r requirements/lite.txt
	- name: Install lmdeploy
	run: \|
	python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
	python3 -m pip install -r requirements/test.txt
	rm -rf ${{env.DEEPSEEK_VL}}/build
	pip install ${{env.DEEPSEEK_VL}} --no-deps
	python3 -m pip install transformers==4.53.1 datasets==3.6.0
	- name: Check env
	run: \|
	python3 -m pip list
	lmdeploy check_env
	rm -rf allure-results
	# remove tmp log in testcase
	rm -rf /nvme/qa_test_models/autotest_model/log/*
	mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
	ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
	- name: Test lmdeploy - quantization w4a16
	continue-on-error: true
	if: github.event_name == 'schedule' \|\| contains(fromJSON(github.event.inputs.backend), 'turbomind')
	run: \|
	pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test lmdeploy - quantization w8a8
	continue-on-error: true
	if: github.event_name == 'schedule' \|\| contains(fromJSON(github.event.inputs.backend), 'pytorch')
	run: \|
	pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Clear workfile
	if: always()
	run: \|
	echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
	chmod -R 777 $REPORT_DIR
	export workdir=$(pwd)
	cd ..
	rm -rf $workdir
	mkdir $workdir
	chmod -R 777 $workdir

	test_tools:
	if: ${{!cancelled() && (github.event_name == 'schedule' \|\| contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
	runs-on: [self-hosted, linux-a100]
	needs: test_quantization
	timeout-minutes: 300
	strategy:
	fail-fast: false
	matrix:
	backend: ${{ fromJSON(inputs.backend \|\| '["turbomind", "pytorch"]')}}
	model: ${{ fromJSON(inputs.model \|\| '["llm", "mllm"]')}}
	function: ${{ fromJSON(inputs.function \|\| '["pipeline","restful","chat"]')}}
	exclude:
	- backend: turbomind
	model: mllm
	function: chat
	- backend: pytorch
	model: mllm
	function: chat
	include:
	- backend: turbomind
	model: llm
	function: local_case
	env:
	PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
	MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
	MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
	container:
	image: openmmlab/lmdeploy:latest-cu12.8
	options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
	volumes:
	- /nvme/github-actions/pip-cache:/root/.cache/pip
	- /nvme/github-actions/packages:/root/packages
	- /nvme/github-actions/resources/lora:/root/lora
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /nvme/huggingface_hub:/nvme/huggingface_hub
	- /mnt/121:/mnt/121
	- /mnt/104:/mnt/104
	- /mnt/bigdisk:/mnt/bigdisk
	- /nvme/qa_test_models/lmdeploy/autotest:/local_case
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	steps:
	- name: Copy repository and Artifacts
	run: \|
	cp -r ${{env.TEST_CODE_PATH}}/. .
	mkdir ${{env.REPORT_DIR}} -p
	echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
	- name: Install lmdeploy - dependency
	run: \|
	python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
	- name: Install lmdeploy
	run: \|
	python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
	python3 -m pip install -r requirements/test.txt
	rm -rf ${{env.DEEPSEEK_VL}}/build
	pip install ${{env.DEEPSEEK_VL}} --no-deps
	- name: Check env
	run: \|
	python3 -m pip list
	lmdeploy check_env
	cp -r /root/lora .
	rm -rf allure-results
	# remove tmp log in testcase
	rm -rf /nvme/qa_test_models/autotest_model/log/*
	mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
	ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
	- name: Test lmdeploy - chat
	continue-on-error: true
	if: (matrix.backend == 'pytorch' \|\| matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
	run: \|
	pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') \|\| true
	pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test lmdeploy - pipeline
	continue-on-error: true
	if: matrix.function == 'pipeline'
	run: \|
	pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') \|\| true
	pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test lmdeploy - restful
	continue-on-error: true
	if: matrix.function == 'restful'
	run: \|
	pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') \|\| true
	pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test lmdeploy - local testcase
	if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'local_case'
	run: \|
	pytest autotest/toolchain --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	pytest /local_case/issue_regression --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}}\|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Clear workfile
	if: always()
	run: \|
	echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
	chmod -R 777 $REPORT_DIR
	export workdir=$(pwd)
	cd ..
	rm -rf $workdir
	mkdir $workdir
	chmod -R 777 $workdir

	test_restful:
	if: ${{!cancelled() && (github.event_name == 'schedule' \|\| contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
	runs-on: [self-hosted, linux-a100]
	needs: test_quantization
	strategy:
	fail-fast: false
	matrix:
	backend: ${{ fromJSON(inputs.backend \|\| '["turbomind", "pytorch"]')}}
	model_path: ['internlm/Intern-S1', 'internlm/internlm2_5-20b-chat', 'internlm/internlm2_5-20b', 'Qwen/Qwen3-8B-Base', 'Qwen/Qwen3-30B-A3B', 'Qwen/Qwen3-32B', 'OpenGVLab/InternVL3_5-30B-A3B', 'OpenGVLab/InternVL3-38B', 'Qwen/Qwen3-VL-8B-Instruct', 'Qwen/Qwen3-VL-30B-A3B-Instruct']
	include:
	- tp: 2
	model: internlm2_5-20b-chat
	model_path: internlm/internlm2_5-20b-chat
	case_info: ['chat_completions_v1', 'generate']
	generate_type: base
	- tp: 2
	model: internlm2_5-20b
	model_path: internlm/internlm2_5-20b
	case_info: ['completions_v1']
	generate_type: base
	- tp: 2
	model: Qwen3-8B-Base
	model_path: Qwen/Qwen3-8B-Base
	case_info: ['completions_v1']
	generate_type: base
	- tp: 8
	model: Intern-S1
	model_path: internlm/Intern-S1
	case_info: ['chat_completions_v1', 'generate']
	generate_type: base
	- tp: 2
	model: Qwen3-30B-A3B
	model_path: Qwen/Qwen3-30B-A3B
	case_info: ['chat_completions_v1', 'generate']
	generate_type: all
	extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
	backend: pytorch
	- tp: 2
	model: Qwen3-30B-A3B
	model_path: Qwen/Qwen3-30B-A3B
	case_info: ['chat_completions_v1', 'generate']
	generate_type: logprob
	extra: '--logprobs-mode raw_logprobs'
	backend: turbomind
	- tp: 2
	model: InternVL3_5-30B-A3B
	model_path: OpenGVLab/InternVL3_5-30B-A3B
	case_info: ['chat_completions_v1', 'generate']
	generate_type: logprob
	extra: '--logprobs-mode raw_logprobs'
	- tp: 2
	model: Qwen3-VL-30B-A3B-Instruct
	model_path: Qwen/Qwen3-VL-30B-A3B-Instruct
	case_info: ['chat_completions_v1', 'generate']
	generate_type: logprob
	extra: '--logprobs-mode raw_logprobs'
	- tp: 2
	model: Qwen3-32B
	model_path: Qwen/Qwen3-32B
	case_info: ['chat_completions_v1', 'generate']
	generate_type: logprob
	extra: '--logprobs-mode raw_logprobs'
	- tp: 1
	model: Qwen3-VL-8B-Instruct
	model_path: Qwen/Qwen3-VL-8B-Instruct
	case_info: ['chat_completions_v1', 'generate']
	generate_type: logprob
	extra: '--logprobs-mode raw_logprobs'
	- tp: 2
	model: InternVL3-38B
	model_path: OpenGVLab/InternVL3-38B
	case_info: ['chat_completions_v1', 'generate']
	generate_type: logprob
	extra: '--logprobs-mode raw_logprobs'
	timeout-minutes: 60
	container:
	image: openmmlab/lmdeploy:latest-cu12.8
	options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
	volumes:
	- /nvme/github-actions/pip-cache:/root/.cache/pip
	- /nvme/github-actions/packages:/root/packages
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /nvme/huggingface_hub:/nvme/huggingface_hub
	- /mnt/121:/mnt/121
	- /mnt/104:/mnt/104
	- /mnt/bigdisk:/mnt/bigdisk
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	steps:
	- name: Copy repository and Artifacts
	run: \|
	cp -r ${{env.TEST_CODE_PATH}}/. .
	mkdir ${{env.REPORT_DIR}} -p
	echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
	- name: Install lmdeploy - dependency
	run: \|
	python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
	- name: Install lmdeploy
	run: \|
	python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
	python3 -m pip install -r requirements/test.txt
	- name: Check env
	run: \|
	python3 -m pip list
	lmdeploy check_env
	rm -rf allure-results
	# remove tmp log in testcase
	rm -rf /nvme/qa_test_models/autotest_model/log/*
	mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
	ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
	- name: Start restful api
	run: \|
	lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} --allow-terminate-by-client > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
	echo "restful_pid=$!"
	for i in $(seq 1 180)
	do
	sleep 5
	echo "health check try $i"
	if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
	echo "health check success"
	exit 0
	fi
	done

	echo "health check fail"
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	exit 1
	- name: Test lmdeploy - chat_completions_v1
	if: matrix.model != 'internlm2_5-20b-chat' && matrix.model != 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
	timeout-minutes: 60
	run: \|
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test lmdeploy - chat_completions_v1
	if: matrix.model == 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
	timeout-minutes: 60
	run: \|
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test lmdeploy - chat_completions_v1 - internlm2_5-20b-chat
	if: matrix.model == 'internlm2_5-20b-chat' && contains(matrix.case_info, 'chat_completions_v1')
	timeout-minutes: 60
	run: \|
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test lmdeploy - completions_v1 - internlm2_5-20b
	if: matrix.model == 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
	timeout-minutes: 60
	run: \|
	pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test lmdeploy - completions_v1 - other
	if: matrix.model != 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
	timeout-minutes: 60
	run: \|
	pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test generate - base
	if: matrix.generate_type == 'base' && contains(matrix.case_info, 'generate')
	timeout-minutes: 60
	run: \|
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not logprob and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test generate - logprob
	if: matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
	timeout-minutes: 60
	run: \|
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Test generate - all
	if: matrix.generate_type == 'all' && contains(matrix.case_info, 'generate')
	timeout-minutes: 60
	run: \|
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Kill api server
	if: always()
	run: \|
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	- name: Clear workfile
	if: always()
	run: \|
	echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
	chmod -R 777 $REPORT_DIR
	export workdir=$(pwd)
	cd ..
	rm -rf $workdir
	mkdir $workdir
	chmod -R 777 $workdir

	test_pipeline:
	if: ${{!cancelled() && (github.event_name == 'schedule' \|\| contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}}
	runs-on: [self-hosted, linux-a100]
	needs: test_quantization
	timeout-minutes: 240
	container:
	image: openmmlab/lmdeploy:latest-cu12.8
	options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
	volumes:
	- /nvme/github-actions/pip-cache:/root/.cache/pip
	- /nvme/github-actions/packages:/root/packages
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /nvme/huggingface_hub:/nvme/huggingface_hub
	- /mnt/121:/mnt/121
	- /mnt/104:/mnt/104
	- /mnt/bigdisk:/mnt/bigdisk
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	steps:
	- name: Copy repository and Artifacts
	run: \|
	cp -r ${{env.TEST_CODE_PATH}}/. .
	mkdir ${{env.REPORT_DIR}} -p
	echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
	- name: Install lmdeploy - dependency
	run: \|
	python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
	- name: Install lmdeploy
	run: \|
	python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
	python3 -m pip install -r requirements/test.txt
	rm -rf ${{env.DEEPSEEK_VL}}/build
	pip install ${{env.DEEPSEEK_VL}} --no-deps
	- name: Check env
	run: \|
	python3 -m pip list
	lmdeploy check_env
	rm -rf allure-results
	# remove tmp log in testcase
	rm -rf /nvme/qa_test_models/autotest_model/log/*
	mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
	ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
	- name: Test lmdeploy - interface pipeline case
	run: \|
	pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') \|\| true
	pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') \|\| true
	pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') \|\| true
	pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_8 and not pr_test' -n 1 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Clear workfile
	if: always()
	run: \|
	echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
	chmod -R 777 $REPORT_DIR
	export workdir=$(pwd)
	cd ..
	rm -rf $workdir
	mkdir $workdir
	chmod -R 777 $workdir


	test_benchmark:
	if: ${{!cancelled() && (github.event_name == 'schedule' \|\| contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}}
	runs-on: [self-hosted, linux-a100]
	needs: test_quantization
	timeout-minutes: 120
	container:
	image: openmmlab/lmdeploy:latest-cu12.8
	options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
	volumes:
	- /nvme/github-actions/pip-cache:/root/.cache/pip
	- /nvme/github-actions/packages:/root/packages
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /nvme/huggingface_hub:/nvme/huggingface_hub
	- /mnt/121:/mnt/121
	- /mnt/104:/mnt/104
	- /mnt/bigdisk:/mnt/bigdisk
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	steps:
	- name: Copy repository and Artifacts
	run: \|
	cp -r ${{env.TEST_CODE_PATH}}/. .
	mkdir ${{env.REPORT_DIR}} -p
	echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
	- name: Install lmdeploy - dependency
	run: \|
	python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
	- name: Install lmdeploy
	run: \|
	python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
	python3 -m pip install -r requirements/test.txt
	rm -rf ${{env.DEEPSEEK_VL}}/build
	pip install ${{env.DEEPSEEK_VL}} --no-deps
	- name: Check env
	run: \|
	python3 -m pip list
	lmdeploy check_env
	rm -rf allure-results
	# remove tmp log in testcase
	rm -rf /nvme/qa_test_models/autotest_model/log/*
	mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
	ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
	- name: Test benchmark script
	run: \|
	pytest autotest/benchmark -n 4 --run_id ${{ github.run_id }} -m function ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} \|\| true
	mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
	- name: Clear workfile
	if: always()
	run: \|
	echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
	chmod -R 777 $REPORT_DIR
	chmod -R 777 /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}
	export workdir=$(pwd)
	cd ..
	rm -rf $workdir
	mkdir $workdir
	chmod -R 777 $workdir

	get_benchmark_result:
	if: ${{!cancelled() && (github.event_name == 'schedule' \|\| contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}}
	needs: [test_benchmark]
	timeout-minutes: 5
	runs-on: [self-hosted, linux-a100]
	container:
	image: openmmlab/lmdeploy:latest-cu12.8
	options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
	volumes:
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	env:
	BENCHMARK_REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}
	steps:
	- name: Clone repository
	uses: actions/checkout@v2
	with:
	repository: ${{ github.event.inputs.repo_org \|\| 'InternLM/lmdeploy' }}
	ref: ${{github.event.inputs.repo_ref \|\| 'main'}}
	- name: Get overview
	run: \|
	pip install pandas fire mmengine
	python3 .github/scripts/action_tools.py generate_benchmark_report $BENCHMARK_REPORT_DIR


	get_coverage_report:
	if: ${{!cancelled()}}
	runs-on: [self-hosted, linux-a100]
	needs: [test_tools, test_restful, test_pipeline, test_benchmark]
	timeout-minutes: 5
	container:
	image: openmmlab/lmdeploy:latest-cu12.8
	options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
	volumes:
	- /nvme/github-actions/pip-cache:/root/.cache/pip
	- /nvme/github-actions/packages:/root/packages
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	steps:
	- name: Copy repository and Artifacts
	run: cp -r ${{env.TEST_CODE_PATH}}/. .
	- name: Install lmdeploy
	run: \|
	echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
	python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
	python3 -m pip install -r requirements/test.txt
	- name: Get coverage report
	run: \|
	pip install coverage
	coverage combine ${{env.REPORT_DIR}}
	coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
	coverage report -m
	mv .coverage ${{env.REPORT_DIR}}/.coverage
	- name: Clear workfile
	if: always()
	run: \|
	chmod -R 777 $REPORT_DIR
	export workdir=$(pwd)
	cd ..
	rm -rf $workdir
	mkdir $workdir
	chmod -R 777 $workdir

	notify_to_feishu:
	if: always() && !cancelled() && (github.ref_name == 'develop' \|\| github.ref_name == 'main')
	needs: [get_benchmark_result, get_coverage_report]
	timeout-minutes: 5
	runs-on: [self-hosted, linux-a100]
	steps:
	- name: notify
	if: contains(needs.*.result, 'failure')
	run: \|
	curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test finished！！！","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

daily_ete_test #850

Workflow file

daily_ete_test #850

Uh oh!

Workflow file for this run