[infer] update lmdeploy version (#5398)

Jintao-Huang · Jintao-Huang · commit c2312fddede9 · 2025-08-15T11:04:13.000+08:00
diff --git a/README.md b/README.md
@@ -131,7 +131,7 @@ Running Environment:
 | deepspeed    | >=0.14       | 0.16.9              | Training                                  |
 | vllm         | >=0.5.1      | 0.10                | Inference/Deployment                      |
 | sglang       | >=0.4.6      | 0.4.9.post6         | Inference/Deployment                      |
-| lmdeploy     | >=0.5,<0.9   | 0.8                 | Inference/Deployment                      |
+| lmdeploy     | >=0.5   | 0.9.2                 | Inference/Deployment                      |
 | evalscope    | >=0.11       |                     | Evaluation                                |
 | gradio       |              | 5.32.1              | Web-UI/App                                |
 
diff --git a/README_CN.md b/README_CN.md
@@ -127,7 +127,7 @@ pip install -e .
 | deepspeed    | >=0.14       | 0.16.9              | 训练                 |
 | vllm         | >=0.5.1      | 0.10                | 推理/部署              |
 | sglang       | >=0.4.6      | 0.4.9.post6         | 推理/部署              |
-| lmdeploy     | >=0.5,<0.9   | 0.8                 | 推理/部署              |
+| lmdeploy     | >=0.5   | 0.9.2                 | 推理/部署              |
 | evalscope    | >=0.11       |                     | 评测                 |
 | gradio       |              | 5.32.1              | Web-UI/App         |
 
diff --git a/docs/source/GetStarted/SWIFT安装.md b/docs/source/GetStarted/SWIFT安装.md
@@ -96,7 +96,7 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 | deepspeed    | >=0.14       | 0.16.9              | 训练                 |
 | vllm         | >=0.5.1      | 0.10                | 推理/部署              |
 | sglang       | >=0.4.6      | 0.4.9.post6         | 推理/部署              |
-| lmdeploy     | >=0.5,<0.9   | 0.8                 | 推理/部署              |
+| lmdeploy     | >=0.5   | 0.9.2                 | 推理/部署              |
 | evalscope    | >=0.11       |                     | 评测                 |
 | gradio       |              | 5.32.1              | Web-UI/App         |
 
diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -97,7 +97,7 @@ More images can be found [here](https://modelscope.cn/docs/intro/environment-set
 | deepspeed    | >=0.14       | 0.16.9              | Training                                  |
 | vllm         | >=0.5.1      | 0.10                | Inference/Deployment                      |
 | sglang       | >=0.4.6      | 0.4.9.post6         | Inference/Deployment                      |
-| lmdeploy     | >=0.5,<0.9   | 0.8                 | Inference/Deployment                      |
+| lmdeploy     | >=0.5   | 0.9.2                 | Inference/Deployment                      |
 | evalscope    | >=0.11       |                     | Evaluation                                |
 | gradio       |              | 5.32.1              | Web-UI/App                                |
 
diff --git a/examples/infer/lmdeploy/batch_ddp.sh b/examples/infer/lmdeploy/batch_ddp.sh
@@ -1,8 +1,8 @@
+# test env: lmdeploy 0.9.2
 NPROC_PER_NODE=4 \
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
 swift infer \
     --model Qwen/Qwen2.5-1.5B-Instruct \
     --infer_backend lmdeploy \
     --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \
-    --max_batch_size 16 \
     --max_new_tokens 512
diff --git a/requirements/install_all.sh b/requirements/install_all.sh
@@ -2,7 +2,7 @@
 # sh requirements/install_all.sh
 pip install "sglang[all]" -U
 pip install "vllm>=0.5.1" "transformers<4.55" "trl<0.21" -U
-pip install "lmdeploy>=0.5,<0.9" -U --no-deps
+pip install "lmdeploy>=0.5" -U
 pip install autoawq -U --no-deps
 pip install auto_gptq optimum bitsandbytes "gradio<5.33" -U
 pip install git+https://github.com/modelscope/ms-swift.git
diff --git a/swift/llm/infer/infer_engine/lmdeploy_engine.py b/swift/llm/infer/infer_engine/lmdeploy_engine.py
@@ -14,6 +14,7 @@
 from lmdeploy.serve import async_engine
 from packaging import version
 from transformers import GenerationConfig
+from transformers.utils.versions import require_version
 
 from swift.llm import InferRequest, Template, TemplateMeta, get_model_tokenizer
 from swift.plugin import Metric
@@ -105,6 +106,9 @@ def _prepare_engine_kwargs(self,
         pipeline_kwargs = {}
         is_multimodal = self.model_meta.is_multimodal
         if is_multimodal:
+            require_version(
+                'lmdeploy<0.9', 'LmdeployEngine will no longer maintain inference for '
+                'multimodal models in lmdeploy>=0.9.')
             vision_config = VisionConfig(max_batch_size=vision_batch_size)
             pipeline_kwargs['vision_config'] = vision_config
             logger.info(f'vision_config: {vision_config}')