@@ -366,13 +366,13 @@ def convert_nan_to_none(obj):
366
366
audio_contents .append (content )
367
367
if role == "user" or role == "system" :
368
368
text_tokens = tokenizer .encode (
369
- f "<|audio_bos|><|AUDIO|><|audio_eos|>" ,
369
+ "<|audio_bos|><|AUDIO|><|audio_eos|>" ,
370
370
add_special_tokens = False ,
371
371
)
372
372
input_tokens .extend (text_tokens )
373
373
elif role == "assistant" :
374
374
text_tokens = tokenizer .encode (
375
- f "<|audio_out_bos|><|AUDIO_OUT|><|audio_eos|>" ,
375
+ "<|audio_out_bos|><|AUDIO_OUT|><|audio_eos|>" ,
376
376
add_special_tokens = False ,
377
377
)
378
378
input_tokens .extend (text_tokens )
@@ -587,7 +587,7 @@ def __call__(self, batch: List[ChatMLDatasetSample]):
587
587
# I tried to remove the for-loop in original implementation
588
588
# but to do batching with padding caused problem so I turned it into a list compre.
589
589
lengths = [seg .shape [1 ] for seg in audio_in_ids_l ]
590
- aug_lengths = [l + 2 for l in lengths ]
590
+ aug_lengths = [length + 2 for length in lengths ]
591
591
audio_in_ids_start = torch .cumsum (
592
592
torch .tensor ([0 ] + aug_lengths [:- 1 ], dtype = torch .long ), dim = 0
593
593
)
0 commit comments