@@ -257,6 +257,8 @@ def _process_multimodal(
257
257
mm_processor_kwargs : Optional [Mapping [str , object ]],
258
258
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
259
259
lora_request : Optional [LoRARequest ] = None ,
260
+ * ,
261
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
260
262
) -> MultiModalInputs :
261
263
"""
262
264
Apply the model's multi-modal processor to a multi-modal prompt,
@@ -273,10 +275,13 @@ def _process_multimodal(
273
275
if mm_processor_kwargs is None :
274
276
mm_processor_kwargs = {}
275
277
276
- return mm_processor .apply (prompt ,
277
- mm_data ,
278
- hf_processor_mm_kwargs = mm_processor_kwargs ,
279
- tokenization_kwargs = tokenization_kwargs )
278
+ return mm_processor .apply (
279
+ prompt ,
280
+ mm_data ,
281
+ hf_processor_mm_kwargs = mm_processor_kwargs ,
282
+ tokenization_kwargs = tokenization_kwargs ,
283
+ mm_hash_overrides = mm_hash_overrides ,
284
+ )
280
285
281
286
async def _process_multimodal_async (
282
287
self ,
@@ -285,6 +290,8 @@ async def _process_multimodal_async(
285
290
mm_processor_kwargs : Optional [Mapping [str , object ]],
286
291
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
287
292
lora_request : Optional [LoRARequest ] = None ,
293
+ * ,
294
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
288
295
) -> MultiModalInputs :
289
296
"""
290
297
Async version of
@@ -301,10 +308,13 @@ async def _process_multimodal_async(
301
308
if mm_processor_kwargs is None :
302
309
mm_processor_kwargs = {}
303
310
304
- return mm_processor .apply (prompt ,
305
- mm_data ,
306
- hf_processor_mm_kwargs = mm_processor_kwargs ,
307
- tokenization_kwargs = tokenization_kwargs )
311
+ return mm_processor .apply (
312
+ prompt ,
313
+ mm_data ,
314
+ hf_processor_mm_kwargs = mm_processor_kwargs ,
315
+ tokenization_kwargs = tokenization_kwargs ,
316
+ mm_hash_overrides = mm_hash_overrides ,
317
+ )
308
318
309
319
def _process_embeds (
310
320
self ,
@@ -341,6 +351,8 @@ def _process_tokens(
341
351
parsed_content : TokensPrompt ,
342
352
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
343
353
lora_request : Optional [LoRARequest ] = None ,
354
+ * ,
355
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
344
356
) -> Union [TokenInputs , MultiModalInputs ]:
345
357
prompt_token_ids = parsed_content ["prompt_token_ids" ]
346
358
token_type_ids = parsed_content .get ("token_type_ids" )
@@ -353,6 +365,7 @@ def _process_tokens(
353
365
parsed_content .get ("mm_processor_kwargs" ),
354
366
tokenization_kwargs = tokenization_kwargs ,
355
367
lora_request = lora_request ,
368
+ mm_hash_overrides = mm_hash_overrides ,
356
369
)
357
370
else :
358
371
inputs = token_inputs (
@@ -370,6 +383,8 @@ async def _process_tokens_async(
370
383
parsed_content : TokensPrompt ,
371
384
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
372
385
lora_request : Optional [LoRARequest ] = None ,
386
+ * ,
387
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
373
388
) -> Union [TokenInputs , MultiModalInputs ]:
374
389
prompt_token_ids = parsed_content ["prompt_token_ids" ]
375
390
token_type_ids = parsed_content .get ("token_type_ids" )
@@ -382,6 +397,7 @@ async def _process_tokens_async(
382
397
parsed_content .get ("mm_processor_kwargs" ),
383
398
tokenization_kwargs = tokenization_kwargs ,
384
399
lora_request = lora_request ,
400
+ mm_hash_overrides = mm_hash_overrides ,
385
401
)
386
402
else :
387
403
inputs = token_inputs (
@@ -399,6 +415,8 @@ def _process_text(
399
415
parsed_content : TextPrompt ,
400
416
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
401
417
lora_request : Optional [LoRARequest ] = None ,
418
+ * ,
419
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
402
420
) -> Union [TokenInputs , MultiModalInputs ]:
403
421
prompt_text = parsed_content ["prompt" ]
404
422
@@ -410,6 +428,7 @@ def _process_text(
410
428
parsed_content .get ("mm_processor_kwargs" ),
411
429
tokenization_kwargs = tokenization_kwargs ,
412
430
lora_request = lora_request ,
431
+ mm_hash_overrides = mm_hash_overrides ,
413
432
)
414
433
else :
415
434
prompt_token_ids = self ._tokenize_prompt (
@@ -432,6 +451,8 @@ async def _process_text_async(
432
451
parsed_content : TextPrompt ,
433
452
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
434
453
lora_request : Optional [LoRARequest ] = None ,
454
+ * ,
455
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
435
456
) -> Union [TokenInputs , MultiModalInputs ]:
436
457
prompt_text = parsed_content ["prompt" ]
437
458
@@ -443,6 +464,7 @@ async def _process_text_async(
443
464
parsed_content .get ("mm_processor_kwargs" ),
444
465
tokenization_kwargs = tokenization_kwargs ,
445
466
lora_request = lora_request ,
467
+ mm_hash_overrides = mm_hash_overrides ,
446
468
)
447
469
else :
448
470
prompt_token_ids = await self ._tokenize_prompt_async (
@@ -465,6 +487,8 @@ def _prompt_to_llm_inputs(
465
487
prompt : SingletonPrompt ,
466
488
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
467
489
lora_request : Optional [LoRARequest ] = None ,
490
+ * ,
491
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
468
492
) -> SingletonInputs :
469
493
"""
470
494
Extract the singleton inputs from a prompt.
@@ -486,18 +510,21 @@ def _prompt_to_llm_inputs(
486
510
return self ._process_tokens (
487
511
parsed ["content" ],
488
512
lora_request = lora_request ,
513
+ mm_hash_overrides = mm_hash_overrides ,
489
514
)
490
515
if parsed ["type" ] == "text" :
491
516
return self ._process_text (
492
517
parsed ["content" ],
493
518
tokenization_kwargs = tokenization_kwargs ,
494
519
lora_request = lora_request ,
520
+ mm_hash_overrides = mm_hash_overrides ,
495
521
)
496
522
if parsed ["type" ] == "str" :
497
523
return self ._process_text (
498
524
TextPrompt (prompt = parsed ["content" ]),
499
525
tokenization_kwargs = tokenization_kwargs ,
500
526
lora_request = lora_request ,
527
+ mm_hash_overrides = mm_hash_overrides ,
501
528
)
502
529
503
530
assert_never (parsed )
@@ -507,6 +534,8 @@ async def _prompt_to_llm_inputs_async(
507
534
prompt : SingletonPrompt ,
508
535
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
509
536
lora_request : Optional [LoRARequest ] = None ,
537
+ * ,
538
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
510
539
) -> SingletonInputs :
511
540
"""
512
541
Async version of
@@ -520,18 +549,21 @@ async def _prompt_to_llm_inputs_async(
520
549
return await self ._process_tokens_async (
521
550
parsed ["content" ],
522
551
lora_request = lora_request ,
552
+ mm_hash_overrides = mm_hash_overrides ,
523
553
)
524
554
if parsed ["type" ] == "text" :
525
555
return await self ._process_text_async (
526
556
parsed ["content" ],
527
557
tokenization_kwargs = tokenization_kwargs ,
528
558
lora_request = lora_request ,
559
+ mm_hash_overrides = mm_hash_overrides ,
529
560
)
530
561
if parsed ["type" ] == "str" :
531
562
return await self ._process_text_async (
532
563
TextPrompt (prompt = parsed ["content" ]),
533
564
tokenization_kwargs = tokenization_kwargs ,
534
565
lora_request = lora_request ,
566
+ mm_hash_overrides = mm_hash_overrides ,
535
567
)
536
568
537
569
assert_never (parsed )
@@ -641,6 +673,8 @@ def _process_encoder_decoder_prompt(
641
673
self ,
642
674
prompt : PromptType ,
643
675
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
676
+ * ,
677
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
644
678
) -> EncoderDecoderInputs :
645
679
"""
646
680
For encoder/decoder models only:
@@ -682,6 +716,7 @@ def _process_encoder_decoder_prompt(
682
716
encoder_inputs = self ._prompt_to_llm_inputs (
683
717
prompt ["encoder_prompt" ],
684
718
tokenization_kwargs = tokenization_kwargs ,
719
+ mm_hash_overrides = mm_hash_overrides ,
685
720
)
686
721
if (decoder_input := prompt ["decoder_prompt" ]) is None :
687
722
decoder_inputs = None
@@ -697,6 +732,7 @@ def _process_encoder_decoder_prompt(
697
732
inputs = self ._prompt_to_llm_inputs (
698
733
prompt ,
699
734
tokenization_kwargs = tokenization_kwargs ,
735
+ mm_hash_overrides = mm_hash_overrides ,
700
736
)
701
737
if self .model_config .is_multimodal_model :
702
738
# Encoder-Decoder Multimodal model
@@ -712,6 +748,8 @@ async def _process_encoder_decoder_prompt_async(
712
748
self ,
713
749
prompt : PromptType ,
714
750
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
751
+ * ,
752
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
715
753
) -> EncoderDecoderInputs :
716
754
"""
717
755
Async version of
@@ -724,6 +762,7 @@ async def _process_encoder_decoder_prompt_async(
724
762
encoder_task = self ._prompt_to_llm_inputs_async (
725
763
prompt ["encoder_prompt" ],
726
764
tokenization_kwargs = tokenization_kwargs ,
765
+ mm_hash_overrides = mm_hash_overrides ,
727
766
)
728
767
729
768
if (decoder_input := prompt ["decoder_prompt" ]) is None :
@@ -733,6 +772,7 @@ async def _process_encoder_decoder_prompt_async(
733
772
decoder_task = self ._prompt_to_llm_inputs_async (
734
773
decoder_input ,
735
774
tokenization_kwargs = tokenization_kwargs ,
775
+ mm_hash_overrides = mm_hash_overrides ,
736
776
)
737
777
738
778
encoder_inputs , decoder_inputs = await asyncio .gather (
@@ -748,6 +788,7 @@ async def _process_encoder_decoder_prompt_async(
748
788
inputs = await self ._prompt_to_llm_inputs_async (
749
789
prompt ,
750
790
tokenization_kwargs = tokenization_kwargs ,
791
+ mm_hash_overrides = mm_hash_overrides ,
751
792
)
752
793
if self .model_config .is_multimodal_model :
753
794
# Encoder-Decoder Multimodal model
@@ -774,6 +815,8 @@ def _process_decoder_only_prompt(
774
815
prompt : SingletonPrompt ,
775
816
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
776
817
lora_request : Optional [LoRARequest ] = None ,
818
+ * ,
819
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
777
820
) -> DecoderOnlyInputs :
778
821
"""
779
822
For decoder-only models:
@@ -794,6 +837,7 @@ def _process_decoder_only_prompt(
794
837
prompt ,
795
838
tokenization_kwargs = tokenization_kwargs ,
796
839
lora_request = lora_request ,
840
+ mm_hash_overrides = mm_hash_overrides ,
797
841
)
798
842
799
843
return self ._build_decoder_only_llm_inputs (prompt_comps )
@@ -803,6 +847,8 @@ async def _process_decoder_only_prompt_async(
803
847
prompt : SingletonPrompt ,
804
848
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
805
849
lora_request : Optional [LoRARequest ] = None ,
850
+ * ,
851
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
806
852
) -> DecoderOnlyInputs :
807
853
"""
808
854
Async version of
@@ -812,6 +858,7 @@ async def _process_decoder_only_prompt_async(
812
858
prompt ,
813
859
tokenization_kwargs = tokenization_kwargs ,
814
860
lora_request = lora_request ,
861
+ mm_hash_overrides = mm_hash_overrides ,
815
862
)
816
863
817
864
return self ._build_decoder_only_llm_inputs (prompt_comps )
@@ -821,6 +868,8 @@ def preprocess(
821
868
prompt : PromptType ,
822
869
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
823
870
lora_request : Optional [LoRARequest ] = None ,
871
+ * ,
872
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
824
873
) -> ProcessorInputs :
825
874
"""Preprocess the input prompt."""
826
875
if self .model_config .is_encoder_decoder :
@@ -829,6 +878,7 @@ def preprocess(
829
878
return self ._process_encoder_decoder_prompt (
830
879
prompt ,
831
880
tokenization_kwargs ,
881
+ mm_hash_overrides = mm_hash_overrides ,
832
882
)
833
883
834
884
if is_explicit_encoder_decoder_prompt (prompt ):
@@ -840,13 +890,16 @@ def preprocess(
840
890
prompt ,
841
891
tokenization_kwargs = tokenization_kwargs ,
842
892
lora_request = lora_request ,
893
+ mm_hash_overrides = mm_hash_overrides ,
843
894
)
844
895
845
896
async def preprocess_async (
846
897
self ,
847
898
prompt : PromptType ,
848
899
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
849
900
lora_request : Optional [LoRARequest ] = None ,
901
+ * ,
902
+ mm_hash_overrides : Optional [dict [str , list [str ]]] = None ,
850
903
) -> ProcessorInputs :
851
904
"""
852
905
Async version of
@@ -858,6 +911,7 @@ async def preprocess_async(
858
911
return await self ._process_encoder_decoder_prompt_async (
859
912
prompt ,
860
913
tokenization_kwargs ,
914
+ mm_hash_overrides = mm_hash_overrides ,
861
915
)
862
916
863
917
if is_explicit_encoder_decoder_prompt (prompt ):
@@ -869,6 +923,7 @@ async def preprocess_async(
869
923
prompt ,
870
924
tokenization_kwargs = tokenization_kwargs ,
871
925
lora_request = lora_request ,
926
+ mm_hash_overrides = mm_hash_overrides ,
872
927
)
873
928
874
929
def clear_cache (self ) -> None :
0 commit comments