diff --git a/tensorrt_llm/_torch/models/modeling_gemma3vl.py b/tensorrt_llm/_torch/models/modeling_gemma3vl.py index a5308a3b524..7f020e03b54 100644 --- a/tensorrt_llm/_torch/models/modeling_gemma3vl.py +++ b/tensorrt_llm/_torch/models/modeling_gemma3vl.py @@ -42,8 +42,13 @@ def __init__(self, model_path: str, config: PretrainedConfig, tokenizer: AutoTokenizer, - trust_remote_code: bool = True): - super().__init__() + trust_remote_code: bool = True, + **kwargs): + super().__init__(model_path=model_path, + config=config, + tokenizer=tokenizer, + trust_remote_code=trust_remote_code, + **kwargs) self._config = config self._tokenizer = tokenizer self._model_path = model_path diff --git a/tensorrt_llm/_torch/models/modeling_hyperclovax.py b/tensorrt_llm/_torch/models/modeling_hyperclovax.py index 9dcf039d175..04e6d975740 100644 --- a/tensorrt_llm/_torch/models/modeling_hyperclovax.py +++ b/tensorrt_llm/_torch/models/modeling_hyperclovax.py @@ -572,8 +572,13 @@ def __init__(self, model_path: str, config: PretrainedConfig, tokenizer: AutoTokenizer, - trust_remote_code: bool = True): - super().__init__() + trust_remote_code: bool = True, + **kwargs): + super().__init__(model_path=model_path, + config=config, + tokenizer=tokenizer, + trust_remote_code=trust_remote_code, + **kwargs) self._config = config self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained( model_path, diff --git a/tensorrt_llm/_torch/models/modeling_llama.py b/tensorrt_llm/_torch/models/modeling_llama.py index 38d487a7eae..474ce0f9c8c 100644 --- a/tensorrt_llm/_torch/models/modeling_llama.py +++ b/tensorrt_llm/_torch/models/modeling_llama.py @@ -1053,8 +1053,13 @@ def __init__(self, model_path: str, config: PretrainedConfig, tokenizer: AutoTokenizer, - trust_remote_code: bool = True): - super().__init__() + trust_remote_code: bool = True, + **kwargs): + super().__init__(model_path=model_path, + config=config, + tokenizer=tokenizer, + trust_remote_code=trust_remote_code, + **kwargs) self._config = config self._dtype = self._config.torch_dtype self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained( diff --git a/tensorrt_llm/_torch/models/modeling_llava_next.py b/tensorrt_llm/_torch/models/modeling_llava_next.py index 677d67ba9d9..0fd3a9a5104 100644 --- a/tensorrt_llm/_torch/models/modeling_llava_next.py +++ b/tensorrt_llm/_torch/models/modeling_llava_next.py @@ -43,8 +43,13 @@ def __init__(self, model_path: str, config: PretrainedConfig, tokenizer: AutoTokenizer, - trust_remote_code: bool = True): - super().__init__() + trust_remote_code: bool = True, + **kwargs): + super().__init__(model_path=model_path, + config=config, + tokenizer=tokenizer, + trust_remote_code=trust_remote_code, + **kwargs) self._config = config self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained( model_path, diff --git a/tensorrt_llm/_torch/models/modeling_mistral.py b/tensorrt_llm/_torch/models/modeling_mistral.py index ec650c804b2..9ade4dee220 100644 --- a/tensorrt_llm/_torch/models/modeling_mistral.py +++ b/tensorrt_llm/_torch/models/modeling_mistral.py @@ -224,8 +224,13 @@ def __init__( config: PretrainedConfig, tokenizer: Optional[AutoTokenizer], trust_remote_code: bool = False, + **kwargs, ): - super().__init__() + super().__init__(model_path=model_path, + config=config, + tokenizer=tokenizer, + trust_remote_code=trust_remote_code, + **kwargs) self._config = config self._dtype = self._config.torch_dtype self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained( diff --git a/tensorrt_llm/_torch/models/modeling_nanov2vlm.py b/tensorrt_llm/_torch/models/modeling_nanov2vlm.py index 5e8f2ecd078..c386b271ec9 100644 --- a/tensorrt_llm/_torch/models/modeling_nanov2vlm.py +++ b/tensorrt_llm/_torch/models/modeling_nanov2vlm.py @@ -262,8 +262,13 @@ def __init__(self, model_path: str, config: transformers.PretrainedConfig, tokenizer: transformers.AutoTokenizer, - trust_remote_code: bool = True): - super().__init__() + trust_remote_code: bool = True, + **kwargs): + super().__init__(model_path=model_path, + config=config, + tokenizer=tokenizer, + trust_remote_code=trust_remote_code, + **kwargs) if not trust_remote_code: raise ValueError("trust_remote_code must be True for Phi4MM") diff --git a/tensorrt_llm/_torch/models/modeling_phi4mm.py b/tensorrt_llm/_torch/models/modeling_phi4mm.py index f04e6280b64..f10f284c34f 100644 --- a/tensorrt_llm/_torch/models/modeling_phi4mm.py +++ b/tensorrt_llm/_torch/models/modeling_phi4mm.py @@ -763,8 +763,13 @@ def __init__(self, model_path: str, config: transformers.PretrainedConfig, tokenizer: transformers.AutoTokenizer, - trust_remote_code: bool = True): - super().__init__() + trust_remote_code: bool = True, + **kwargs): + super().__init__(model_path=model_path, + config=config, + tokenizer=tokenizer, + trust_remote_code=trust_remote_code, + **kwargs) if not trust_remote_code: raise ValueError("trust_remote_code must be True for Phi4MM") diff --git a/tensorrt_llm/_torch/models/modeling_qwen2vl.py b/tensorrt_llm/_torch/models/modeling_qwen2vl.py index 008051b4884..0e77f4aa309 100644 --- a/tensorrt_llm/_torch/models/modeling_qwen2vl.py +++ b/tensorrt_llm/_torch/models/modeling_qwen2vl.py @@ -95,10 +95,13 @@ def __init__(self, model_path: str, config: PretrainedConfig, tokenizer: AutoTokenizer, - trust_remote_code: bool = True): - - super().__init__() - self._config = config + trust_remote_code: bool = True, + **kwargs): + super().__init__(model_path=model_path, + config=config, + tokenizer=tokenizer, + trust_remote_code=trust_remote_code, + **kwargs) self._dtype = self._config.torch_dtype self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained( model_path) diff --git a/tensorrt_llm/inputs/registry.py b/tensorrt_llm/inputs/registry.py index 394ebd04532..158878dbfcc 100644 --- a/tensorrt_llm/inputs/registry.py +++ b/tensorrt_llm/inputs/registry.py @@ -42,6 +42,9 @@ class InputProcessor(Protocol): config: any tokenizer: any + def __init__(self, **kwargs): + super().__init__(**kwargs) + def __call__( self, inputs: TextPrompt, sampling_params: SamplingParams ) -> Tuple[List[int], Optional[ExtraProcessedInputs]]: @@ -127,8 +130,16 @@ class BaseMultimodalInputProcessor(InputProcessor, ABC): models. Specific processors can override these methods if they need custom logic. """ - def __init__(self, **kwargs): + def __init__(self, + model_path, + config, + tokenizer, + trust_remote_code: bool = True, + **kwargs) -> None: super().__init__(**kwargs) + self._config = config + self._model_path = model_path + self._tokenizer = tokenizer self._use_fast: bool = kwargs.get('use_fast', True) self._multimodal_hashing_supported: Optional[bool] = None @@ -142,13 +153,13 @@ def processor(self) -> AutoProcessor: @abstractmethod def tokenizer(self) -> PreTrainedTokenizerBase: """The HF tokenizer for this model.""" - ... + return self._tokenizer @property @abstractmethod def config(self) -> PretrainedConfig: """The HF pretrained config for this model.""" - ... + return self._config @property @abstractmethod @@ -306,7 +317,8 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.image_max_dim = kwargs.get('image_max_dim', self.DEFAULT_IMAGE_MAX_DIM) - self.img_min_dim = kwargs.get('img_min_dim', self.DEFAULT_IMAGE_MIN_DIM) + self.image_min_dim = kwargs.get('image_min_dim', + self.DEFAULT_IMAGE_MIN_DIM) @property @abstractmethod @@ -331,7 +343,7 @@ def get_dummy_image(self, max_width: int, max_height: int) -> Image.Image: def get_dummy_prompt(self, input_seq_len: int): # TODO(yechank): We use the max resolution as starting point and keep reducing the resolution until the prompt length is less than the input sequence length. # Need to find better way to calculate the dummy prompt length as this iteration may not be efficient. - while self.image_max_dim >= self.img_min_dim: + while self.image_max_dim >= self.image_min_dim: image = self.get_dummy_image(max_width=self.image_max_dim, max_height=self.image_max_dim) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 8d4dd24aaeb..61d15ca7ead 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -405,7 +405,6 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_bf16[multi_gpus_no_cache] S examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5606268) disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5626197) disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-True-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5628952) -accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype SKIP accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False-False-False] SKIP (https://nvbugs/5629790) test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-hf-nvfp4-False-False] SKIP (https://nvbugs/5629791) accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5629792)