Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/Instruction/Command-line-parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,7 @@ qwen2_5_omni除了包含qwen2_5_vl和qwen2_audio的模型特定参数外,还
- 提示:ms-swift只对thinker部分进行微调,建议设置为False以降低显存占用(只创建thinker部分的模型结构)。

### qwen3_vl
参数含义与`qwen_vl_utils>=0.0.14`库中的含义一致,可以查看[这里](https://github.com/QwenLM/Qwen2.5-VL/blob/main/qwen-vl-utils/src/qwen_vl_utils/vision_process.py#L24)。通过传入以下环境变量,可以修改该库的全局变量默认值。
参数含义与`qwen_vl_utils>=0.0.14`库中的含义一致,可以查看[这里](https://github.com/QwenLM/Qwen2.5-VL/blob/main/qwen-vl-utils/src/qwen_vl_utils/vision_process.py#L24)。通过传入以下环境变量,可以修改该库的全局变量默认值。(也兼容使用`qwen2_5_vl`的环境变量,例如:`MAX_PIXELS`、`VIDEO_MAX_PIXELS`,会做自动转换。)

- SPATIAL_MERGE_SIZE: 默认为2。
- IMAGE_MIN_TOKEN_NUM: 默认为`4`,代表一张图片最小图像tokens的个数。
Expand Down
3 changes: 2 additions & 1 deletion docs/source_en/Instruction/Command-line-parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,8 @@ qwen2_5_omni not only includes the model-specific parameters of qwen2_5_vl and q


### qwen3_vl
The parameter meanings are the same as in the `qwen_vl_utils>=0.0.14` library — see here: https://github.com/QwenLM/Qwen2.5-VL/blob/main/qwen-vl-utils/src/qwen_vl_utils/vision_process.py#L24. By passing the following environment variables you can override the library's global default values:
The parameter meanings are the same as in the `qwen_vl_utils>=0.0.14` library — see here: https://github.com/QwenLM/Qwen2.5-VL/blob/main/qwen-vl-utils/src/qwen_vl_utils/vision_process.py#L24. By passing the following environment variables you can override the library's global default values: (It is also compatible with environment variables used by `qwen2_5_vl`, such as: `MAX_PIXELS`, `VIDEO_MAX_PIXELS`, and will perform automatic conversion.)


- SPATIAL_MERGE_SIZE: default 2.
- IMAGE_MIN_TOKEN_NUM: default `4`, denotes the minimum number of image tokens per image.
Expand Down
5 changes: 1 addition & 4 deletions examples/infer/demo_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@

if __name__ == '__main__':
engine = PtEngine(
'Qwen/Qwen3-Embedding-4B',
task_type='embedding',
torch_dtype=torch.float16,
attn_implementation='flash_attention_2')
'Qwen/Qwen3-Embedding-4B', task_type='embedding', torch_dtype=torch.float16, attn_impl='flash_attention_2')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For better readability and adherence to PEP 8 style guidelines (which recommend a line length of 79-99 characters), it's better to format the arguments to PtEngine across multiple lines, as it was before this change.

        'Qwen/Qwen3-Embedding-4B',
        task_type='embedding',
        torch_dtype=torch.float16,
        attn_impl='flash_attention_2')


infer_requests = [
InferRequest(messages=[
Expand Down
2 changes: 1 addition & 1 deletion examples/infer/demo_reranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
'Qwen/Qwen3-Reranker-4B',
task_type='generative_reranker',
torch_dtype=torch.float16,
attn_implementation='flash_attention_2')
attn_impl='flash_attention_2')

infer_request = InferRequest(
messages=[{
Expand Down
9 changes: 8 additions & 1 deletion swift/llm/infer/infer_engine/pt_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from swift.llm import InferRequest, Template, TemplateMeta, get_model_tokenizer, safe_snapshot_download, to_device
from swift.plugin import Metric
from swift.tuners import Swift
from swift.utils import get_last_valid_indices
from ..protocol import (ChatCompletionResponse, ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
ChatCompletionStreamResponse, ChatMessage, DeltaMessage, EmbeddingResponse,
EmbeddingResponseData, RequestConfig, random_uuid)
Expand Down Expand Up @@ -349,7 +350,13 @@ def _infer_forward(self, template: Template, inputs: Dict[str, Any], adapter_req
negative_token = os.environ.get('GENERATIVE_RERANKER_NEGATIVE_TOKEN', 'no')
token_false_id = template.tokenizer.convert_tokens_to_ids(negative_token)
token_true_id = template.tokenizer.convert_tokens_to_ids(positive_token)
batch_scores = logits[:, -1, :]
attention_mask = inputs.get('attention_mask')
if attention_mask is None:
batch_scores = logits[:, -1, :]
else:
last_valid_indices = get_last_valid_indices(attention_mask)
batch_indices = torch.arange(attention_mask.shape[0], device=logits.device)
batch_scores = logits[batch_indices, last_valid_indices, :]
true_vector = batch_scores[:, token_true_id]
false_vector = batch_scores[:, token_false_id]
batch_scores = torch.stack([false_vector, true_vector], dim=1).float()
Expand Down
13 changes: 4 additions & 9 deletions swift/llm/model/patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import accelerate
import torch
import torch.nn as nn
import torch.nn.functional as F
import transformers
from accelerate.utils import find_device
from packaging import version
Expand Down Expand Up @@ -92,15 +93,9 @@ def _output_embedding_hook(module, args, kwargs, output):
if attention_mask is None:
attention_mask = output.get('attention_mask', None)
hidden_states = output.logits
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
embeddings = hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = hidden_states.shape[0]
embeddings = hidden_states[torch.arange(batch_size, device=hidden_states.device), sequence_lengths]
embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)

sequence_lengths = get_last_valid_indices(attention_mask)
embeddings = hidden_states[torch.arange(hidden_states.shape[0], device=hidden_states.device), sequence_lengths]
embeddings = F.normalize(embeddings, p=2, dim=1)
return {
'last_hidden_state': embeddings.contiguous(),
}
Expand Down
3 changes: 3 additions & 0 deletions swift/llm/model/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import torch.nn.functional as F
from accelerate.utils import find_device
from modelscope.hub.utils.utils import get_cache_dir
from packaging import version
from torch import nn
from transformers import PretrainedConfig
from transformers.utils import strtobool
Expand Down Expand Up @@ -549,6 +550,8 @@ def _patch_conv3d():
nn.Conv3d._original_forward = nn.Conv3d.forward

def forward(self, x):
if version.parse(torch.__version__) < version.parse('2.9.0'):
return self._original_forward(x)
if any(s != k for s, k in zip(self.stride, self.kernel_size)) or any(p != 0 for p in self.padding) or any(
d != 1 for d in self.dilation) or self.groups != 1:
raise NotImplementedError(
Expand Down