fix vlm deploy lora & agent (#1371)

Jintao-Huang · web-flow · commit 05719cc853c4 · 2024-07-12T11:07:21.000+08:00
diff --git a/swift/llm/deploy.py b/swift/llm/deploy.py
@@ -116,13 +116,13 @@ async def _prepare_request(request: Union[ChatCompletionRequest, CompletionReque
                 f'the model `{model_or_engine.model_type}` is in text generation format. '
                 'Please use the `completions` API.')
         messages = request.messages
-        # For agent, check if response is endwith observations and join tool observation
-        messages_join_observation(messages)
         images = request.images
         if _args.is_multimodal:
             compat_openai(messages, images, template.template_type)
             messages = decode_base64(messages=messages)['messages']
             images = decode_base64(images=images)['images']
+        # For agent, check if response is endwith observations and join tool observation
+        messages_join_observation(messages)
         example = messages_to_history(messages)
         if len(images) > 0:
             example['images'] = images
diff --git a/swift/llm/infer.py b/swift/llm/infer.py
@@ -210,7 +210,7 @@ def prepare_model_template(args: InferArguments,
                              f'args.max_model_len: {args.max_model_len}, model.max_model_len: {model.max_model_len}')
     # Preparing LoRA
     if is_adapter(args.sft_type) and args.ckpt_dir is not None:
-        if is_quant_model(args.model_type, model):
+        if args.lora_request_list is not None and (is_quant_model(args.model_type, model) or args.is_multimodal):
             # gptq awq does not support lora switching
             args.lora_request_list = None
             logger.warning('The current model does not support LoRA switching. '