Skip to content

Commit e67ab9e

Browse files
authored
fix:MiniCPMVPlugin IndexError in process_messages when training with video (#10276)
Co-authored-by: xxddccaa <xxddccaa@users.noreply.github.com>
1 parent 2c4f121 commit e67ab9e

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/llamafactory/data/mm_plugin.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,7 +1058,9 @@ def _get_mm_inputs(
10581058
chunk_input=True,
10591059
sampling_rate=getattr(processor, "audio_sampling_rate", 16000),
10601060
)
1061-
audio_feature_lens = [torch.tensor(audio_feature_len) for audio_feature_len in audio_feature_lens]
1061+
audio_feature_lens = [
1062+
x.clone().detach() if isinstance(x, torch.Tensor) else torch.tensor(x) for x in audio_feature_lens
1063+
]
10621064
mm_inputs.update({"audio_features": audio_features, "audio_feature_lens": audio_feature_lens})
10631065
if kwargs.get("ret_phs", False):
10641066
mm_inputs.update({"audio_phs": audio_phs})
@@ -1098,7 +1100,7 @@ def process_messages(
10981100
num_image_tokens += 1
10991101

11001102
while VIDEO_PLACEHOLDER in content:
1101-
video_seqlen = len(mm_inputs["pixel_values"][num_video_tokens]) if self.expand_mm_tokens else 1
1103+
video_seqlen = len(mm_inputs["image_sizes"][num_video_tokens]) if self.expand_mm_tokens else 1
11021104
content = content.replace(VIDEO_PLACEHOLDER, "{{image}}" * video_seqlen, 1)
11031105
num_video_tokens += 1
11041106

0 commit comments

Comments
 (0)