File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed
Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -1058,7 +1058,9 @@ def _get_mm_inputs(
10581058 chunk_input = True ,
10591059 sampling_rate = getattr (processor , "audio_sampling_rate" , 16000 ),
10601060 )
1061- audio_feature_lens = [torch .tensor (audio_feature_len ) for audio_feature_len in audio_feature_lens ]
1061+ audio_feature_lens = [
1062+ x .clone ().detach () if isinstance (x , torch .Tensor ) else torch .tensor (x ) for x in audio_feature_lens
1063+ ]
10621064 mm_inputs .update ({"audio_features" : audio_features , "audio_feature_lens" : audio_feature_lens })
10631065 if kwargs .get ("ret_phs" , False ):
10641066 mm_inputs .update ({"audio_phs" : audio_phs })
@@ -1098,7 +1100,7 @@ def process_messages(
10981100 num_image_tokens += 1
10991101
11001102 while VIDEO_PLACEHOLDER in content :
1101- video_seqlen = len (mm_inputs ["pixel_values " ][num_video_tokens ]) if self .expand_mm_tokens else 1
1103+ video_seqlen = len (mm_inputs ["image_sizes " ][num_video_tokens ]) if self .expand_mm_tokens else 1
11021104 content = content .replace (VIDEO_PLACEHOLDER , "{{image}}" * video_seqlen , 1 )
11031105 num_video_tokens += 1
11041106
You can’t perform that action at this time.
0 commit comments