Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tensorrt_llm/_torch/models/modeling_gemma3vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ def __init__(self,
model_path: str,
config: PretrainedConfig,
tokenizer: AutoTokenizer,
trust_remote_code: bool = True):
super().__init__()
trust_remote_code: bool = True,
**kwargs):
super().__init__(**kwargs)
self._config = config
self._tokenizer = tokenizer
self._model_path = model_path
Expand Down
5 changes: 3 additions & 2 deletions tensorrt_llm/_torch/models/modeling_hyperclovax.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,8 +572,9 @@ def __init__(self,
model_path: str,
config: PretrainedConfig,
tokenizer: AutoTokenizer,
trust_remote_code: bool = True):
super().__init__()
trust_remote_code: bool = True,
**kwargs):
super().__init__(**kwargs)
self._config = config
self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained(
model_path,
Expand Down
5 changes: 3 additions & 2 deletions tensorrt_llm/_torch/models/modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -1053,8 +1053,9 @@ def __init__(self,
model_path: str,
config: PretrainedConfig,
tokenizer: AutoTokenizer,
trust_remote_code: bool = True):
super().__init__()
trust_remote_code: bool = True,
**kwargs):
super().__init__(**kwargs)
self._config = config
self._dtype = self._config.torch_dtype
self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained(
Expand Down
5 changes: 3 additions & 2 deletions tensorrt_llm/_torch/models/modeling_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ def __init__(self,
model_path: str,
config: PretrainedConfig,
tokenizer: AutoTokenizer,
trust_remote_code: bool = True):
super().__init__()
trust_remote_code: bool = True,
**kwargs):
super().__init__(**kwargs)
self._config = config
self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained(
model_path,
Expand Down
3 changes: 2 additions & 1 deletion tensorrt_llm/_torch/models/modeling_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,9 @@ def __init__(
config: PretrainedConfig,
tokenizer: Optional[AutoTokenizer],
trust_remote_code: bool = False,
**kwargs,
):
super().__init__()
super().__init__(**kwargs)
self._config = config
self._dtype = self._config.torch_dtype
self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained(
Expand Down
5 changes: 3 additions & 2 deletions tensorrt_llm/_torch/models/modeling_nanov2vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,9 @@ def __init__(self,
model_path: str,
config: transformers.PretrainedConfig,
tokenizer: transformers.AutoTokenizer,
trust_remote_code: bool = True):
super().__init__()
trust_remote_code: bool = True,
**kwargs):
super().__init__(**kwargs)
if not trust_remote_code:
raise ValueError("trust_remote_code must be True for Phi4MM")

Expand Down
5 changes: 3 additions & 2 deletions tensorrt_llm/_torch/models/modeling_phi4mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,9 @@ def __init__(self,
model_path: str,
config: transformers.PretrainedConfig,
tokenizer: transformers.AutoTokenizer,
trust_remote_code: bool = True):
super().__init__()
trust_remote_code: bool = True,
**kwargs):
super().__init__(**kwargs)
if not trust_remote_code:
raise ValueError("trust_remote_code must be True for Phi4MM")

Expand Down
6 changes: 3 additions & 3 deletions tensorrt_llm/_torch/models/modeling_qwen2vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def __init__(self,
model_path: str,
config: PretrainedConfig,
tokenizer: AutoTokenizer,
trust_remote_code: bool = True):

super().__init__()
trust_remote_code: bool = True,
**kwargs):
super().__init__(**kwargs)
self._config = config
self._dtype = self._config.torch_dtype
self._tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained(
Expand Down
3 changes: 3 additions & 0 deletions tensorrt_llm/inputs/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class InputProcessor(Protocol):
config: any
tokenizer: any

def __init__(self, **kwargs):
super().__init__(**kwargs)

Comment on lines +45 to +47
Copy link
Contributor

@coderabbitai coderabbitai bot Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Fix Protocol init to avoid super() in MRO (TypeError risk).

InputProcessor is in the MRO of concrete processors; calling super().init(**kwargs) here can bubble kwargs to ABC/object and crash. Swallow kwargs instead.

Apply this diff:

 class InputProcessor(Protocol):
@@
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
+    def __init__(self, **kwargs) -> None:
+        # Protocol initializer intentionally swallows kwargs to terminate
+        # cooperative __init__ chains safely.
+        pass
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def __init__(self, **kwargs):
super().__init__(**kwargs)
def __init__(self, **kwargs) -> None:
# Protocol initializer intentionally swallows kwargs to terminate
# cooperative __init__ chains safely.
pass
🤖 Prompt for AI Agents
In tensorrt_llm/inputs/registry.py around lines 45 to 47, the
InputProcessor.__init__ currently calls super().__init__(**kwargs) which can
bubble unexpected kwargs up the MRO and raise a TypeError; change the
initializer to accept **kwargs but not call super — simply swallow/ignore the
kwargs (or explicitly pop any known args) so no unexpected keyword arguments are
passed to parent classes. Ensure the method signature remains def __init__(self,
**kwargs): and its body does nothing with kwargs (or documents ignored params).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we need Line:45-46? I think we can simply skip definition of __init__ here. Is there any specific reason?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Search for how InputProcessor is used as a type annotation
rg -n "InputProcessor\[" --type=py

# Search for where InputProcessor or its subclasses are instantiated
rg -nP ":\s*(Type\[)?InputProcessor" --type=py -A 2

# Check how input processors are created/instantiated
rg -n "input_processor_cls\(" --type=py -B 2 -A 2

Length of output: 664


@jaedeok-nvidia You're absolutely right! The __init__ definition in the InputProcessor Protocol is not necessary and can be safely removed.

Here's why:

  1. Protocols don't enforce __init__ signatures: Python's Protocol class is used for structural typing (checking if implementations have the right methods/attributes). Type checkers like mypy don't actually enforce __init__ signatures on Protocol implementations.

  2. It creates MRO complications: When BaseMultimodalInputProcessor (which inherits from both InputProcessor and ABC) calls super().__init__(**kwargs), having an __init__ in the Protocol that also calls super().__init__(**kwargs) can cause issues. The kwargs would bubble up to ABC or object, which don't accept arbitrary kwargs, potentially causing a TypeError.

  3. No practical benefit: The Protocol is used for type annotations (e.g., Type[InputProcessor]), not for enforcing constructor signatures. Concrete implementations like DefaultInputProcessor and BaseMultimodalInputProcessor define their own __init__ methods independently.

Recommendation: Remove lines 45-46 entirely. The Protocol should only define the structural interface (model_path, config, tokenizer attributes and the __call__ method), not the constructor.

class InputProcessor(Protocol):
    """..."""
    
    model_path: any
    config: any
    tokenizer: any

    # No __init__ needed here
    
    def __call__(
        self, inputs: TextPrompt, sampling_params: SamplingParams
    ) -> Tuple[List[int], Optional[ExtraProcessedInputs]]:
        ...

🧬 Code Graph Analysis Results
  • File: tensorrt_llm/inputs/registry.py, lines around the InputProcessor class definition (includes init):
class InputProcessor(Protocol):
    """
    Protocol for InputProcessor classes.
    InputProcessor's functions are more relevant to multimodal use cases:
        - Preprocess: extra steps to manipulate the prompts.
        - Forward: the main logic to process the inputs. In multimodal cases, this may run a multimodal encoder model.
        - Postprocess: extra steps to manipulate the outputs
    Model-specific implementation should:
        - Inherit this class and implement the forward() method.
        - Register the inherited class to the model class using @register_input_processor(...)
    """

    model_path: any
    config: any
    tokenizer: any

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __call__(
        self, inputs: TextPrompt, sampling_params: SamplingParams
    ) -> Tuple[List[int], Optional[ExtraProcessedInputs]]:
        ...
  • File: tensorrt_llm/inputs/registry.py, lines around the BaseMultimodalInputProcessor init (shows use of super() and initialization of internal flags):
class BaseMultimodalInputProcessor(InputProcessor, ABC):
    """
    Base class for multimodal input processors with default implementations
    of get_num_tokens_per_image and get_num_tokens_per_video methods.

    This class provides default implementations that work with most AutoProcessor-based
    models. Specific processors can override these methods if they need custom logic.
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._use_fast: bool = kwargs.get('use_fast', True)
        self._multimodal_hashing_supported: Optional[bool] = None

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jaedeok-nvidia, the reason for InputProcessor to have __init__ is that the MRO did not work correctly if we do not have __init__ on the InputProcessor. For example, on the Qwen2InputProcessor, the super().__init__() call will stuck in InputProcessor's __init__() as it does not have one, so it skipped the initialization of BaseMultimodalDummyInputsBuilder which gives an error.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@coderabbitai can you share your thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Find classes that inherit from both BaseMultimodalInputProcessor and BaseMultimodalDummyInputsBuilder
rg -n "class.*\(.*BaseMultimodalInputProcessor.*BaseMultimodalDummyInputsBuilder.*\)|class.*\(.*BaseMultimodalDummyInputsBuilder.*BaseMultimodalInputProcessor.*\)" --type=py -A 5

# Search for Qwen2InputProcessor or Qwen2VL input processor classes
rg -n "class.*Qwen2.*InputProcessor" --type=py -A 10

# Check all files that have classes inheriting from BaseMultimodalDummyInputsBuilder
rg -n "class.*BaseMultimodalDummyInputsBuilder" --type=py -A 5

Length of output: 1905


@yechank-nvidia You are absolutely correct! Thank you for the clarification. My initial analysis was wrong.

Here's what's actually happening:

The Multiple Inheritance Chain

Classes like Qwen2VLInputProcessorBase and HCXVisionInputProcessor inherit from both BaseMultimodalInputProcessor and BaseMultimodalDummyInputsBuilder:

class Qwen2VLInputProcessorBase(BaseMultimodalInputProcessor,
                                BaseMultimodalDummyInputsBuilder):

The Method Resolution Order (MRO) for such classes is:

  1. Qwen2VLInputProcessorBase
  2. BaseMultimodalInputProcessor
  3. InputProcessor (Protocol)
  4. BaseMultimodalDummyInputsBuilder
  5. ABC
  6. object

Why InputProcessor.__init__ is Required

With cooperative multiple inheritance (using super().__init__(**kwargs) throughout), each class must participate in the chain:

  1. Qwen2VLInputProcessorBase.__init__ calls super().__init__(**kwargs)
  2. BaseMultimodalInputProcessor.__init__ calls super().__init__(**kwargs)
  3. InputProcessor.__init__ must call super().__init__(**kwargs) to continue
  4. BaseMultimodalDummyInputsBuilder.__init__ gets called and initialized properly

Without __init__ in InputProcessor Protocol:

  • The MRO chain could break or skip BaseMultimodalDummyInputsBuilder
  • The Protocol base class's __init__ might not accept **kwargs, causing TypeError

With pass instead of super().__init__(**kwargs):

  • The chain stops at InputProcessor
  • BaseMultimodalDummyInputsBuilder.__init__ never executes
  • Missing initialization → runtime errors

Conclusion

The current implementation is correct:

def __init__(self, **kwargs):
    super().__init__(**kwargs)

This ensures the cooperative multiple inheritance chain continues properly through all base classes. The suggestion to remove or replace with pass would indeed break the initialization for classes using multiple inheritance.


🧬 Code Graph Analysis Results

[tensorrt_llm/inputs/registry.py] - Summary of potentially relevant components for understanding multimodal input processing and registry behavior

  • InputProcessor (Protocol)

    • Description: Protocol defining the interface for input processors. Focuses on preprocess/forward/postprocess steps and registering concrete implementations via @register_input_processor.
    • Key aspects to note:
      • Expected attributes: model_path, config, tokenizer
      • call(self, inputs: TextPrompt, sampling_params: SamplingParams) -> Tuple[List[int], Optional[ExtraProcessedInputs]]
    • Why relevant: Central to how different model types provide specialized input processing in the registry.
  • DefaultInputProcessor (class)

    • Description: Minimal, default implementation of InputProcessor that tokenizes text prompts.
    • Key behaviors:
      • Requires a tokenizer
      • Encodes inputs["prompt"] (and optional "query") with support for truncation and special tokens
    • Why relevant: Used when no model-specific input processor is registered; baseline behavior in the registry.
  • BaseMultimodalInputProcessor (class)

    • Description: Abstract base for multimodal input processors with default implementations for token-count helpers.
    • Key capabilities:
      • Abstract accessors for processor, tokenizer, config, dtype
      • Methods like get_vocab_size, get_mm_token_ids, get_mm_special_token_ids
      • Helpers: get_num_tokens_per_image, get_num_tokens_per_video with default delegation to processor methods
    • Why relevant: Underpins multimodal handling in the registry; used by support_multimodal_disaggregated and related hashing paths.
  • BaseMultimodalDummyInputsBuilder (class)

    • Description: Utility for constructing dummy inputs (profiling/benchmarking).
    • Why relevant: Ties into multimodal input generation used by the registry’s tooling.
  • MultimodalPlaceholderPlacement (enum)

    • Description: Enum describing where multimodal placeholders appear relative to text (BEFORE_TEXT, AFTER_TEXT, etc.).
    • Why relevant: Placeholder placement is stored and used by the registry to assemble prompts.
  • MultimodalPlaceholderMetadata (dataclass)

    • Description: Metadata container for multimodal placeholder configuration:
      • placeholder_map: Dict[str, str] mapping modality to placeholder
      • placeholder_placement: MultimodalPlaceholderPlacement
      • placeholders_separator: str
    • Why relevant: Core data structure used by the registry to describe how placeholders are inserted for multimodal content.
  • MultimodalPlaceholderRegistry (class)

    • Description: Registry that tracks placeholder metadata per model type.
    • Key methods (summary):
      • str: pretty-prints registry contents
      • set_placeholder_metadata(model_type: str, placeholder_metadata: MultimodalPlaceholderMetadata)
      • remove_placeholder_metadata(model_type: str)
      • is_valid(model_type: str, modality: str) -> bool
      • get_placeholder_metadata(model_type: str) -> MultimodalPlaceholderMetadata
      • get_placeholder(model_type: str, modality: str) -> str
      • get_placeholder_placement(model_type: str) -> MultimodalPlaceholderPlacement
      • get_placeholders_separator(model_type: str) -> str
      • get_registered_image_model_types() -> Tuple[str, ...]
      • get_registered_video_model_types() -> Tuple[str, ...]
      • get_registered_audio_model_types() -> Tuple[str, ...]
      • get_registered_model_types() -> Tuple[str, ...]
    • Why relevant: Central store for how multimodal content should be inserted for each model type; used by input processor creation and prompt assembly.
  • MULTIMODAL_PLACEHOLDER_REGISTRY (instance)

    • Description: Shared singleton-like registry instance for multimodal placeholders.
    • Why relevant: Global registry used by other components to query/modify placeholder behavior.
  • InputProcessorRegistry (class)

    • Description: Registry that maps model types to their corresponding InputProcessor classes.
    • Key member: _input_processors_cls_by_model_type: Dict[Type[nn.Module], Type[InputProcessor]]
    • Why relevant: Core mechanism by which the system associates a model class with a specific input processor.
  • INPUT_PROCESSOR_REGISTRY (instance)

    • Description: Shared instance of InputProcessorRegistry.
    • Why relevant: Used by decorator @register_input_processor to register processors for models.
  • support_multimodal_disaggregated(model_cls: Type[nn.Module])

    • Description: Decorator to declare support for multimodal disaggregated inputs on a model class.
    • Behavior:
      • Looks up the registered processor class for the model
      • Validates processor inherits from BaseMultimodalInputProcessor
      • Ensures processor implements get_prompt_token_ids
      • Sets supports_mm_disagg (processor and model class) to True
    • Exceptions: RuntimeError/TypeError if registration prerequisites are not met
    • Why relevant: Mechanism to enable multimodal disaggregation support in models.
  • register_input_processor(processor_cls, model_type, placeholder_metadata=None)

    • Description: Decorator to register an input processor class for a given model type with placeholder metadata.
    • Behavior:
      • Stores processor_cls in INPUT_PROCESSOR_REGISTRY._input_processors_cls_by_model_type
      • Requires non-None placeholder_metadata
      • Updates MULTIMODAL_PLACEHOLDER_REGISTRY with metadata for the given model_type
    • Exceptions: ValueError if placeholder_metadata is missing
    • Why relevant: Primary means of wiring model types to their input processors and placeholder configurations.
  • create_input_processor(model_path_or_dir, tokenizer, checkpoint_format="HF")

    • Description: Factory function to create an input processor for a specific model.
    • Behavior:
      • Optionally loads HF config via ModelConfig if checkpoint_format == "HF"
      • Determines model class via get_model_architecture(config)
      • Looks up registered input processor class for that model
      • Returns an instance of the registered processor or falls back to DefaultInputProcessor
    • Raises/Errors: Falls back gracefully to DefaultInputProcessor if unregistered
    • Why relevant: Central factory used to instantiate the correct input processor at runtime.
  • create_input_processor_with_hash(input_processor, hash_lib=default_hasher)

    • Description: Wraps an input processor to apply multimodal hashing logic to inputs before processing.
    • Returns: A callable wrapper that processes multimodal data, computes hashes, derives mm token lengths/positions, constructs MultimodalInput, and integrates with the underlying input processor.
    • Key behaviors:
      • Requires multi_modal_data in inputs
      • Computes hashes via apply_mm_hashes
      • Determines mm token lengths via find_mm_token_lengths
      • Derives start positions via find_mm_token_positions
      • Validates inputs with validate_mm_inputs
      • Builds MultimodalInput and attaches as extra_processed_inputs
      • Fallback logic to disable hashing on failure and retry with base processor
    • Exceptions/Errors: Raises ValueError if necessary fields are missing; logs warnings on hashing failures
    • Why relevant: Enables multimodal hashing support when available, as an optional wrapper around a base multimodal input processor.
  • Key helper and data flow references (conceptual, tied to this file):

    • TextPrompt, ExtraProcessedInputs types and how they flow through the InputProcessor
    • MultimodalInput.from_components and its to_tensor representation
    • The interplay between get_vocab_size, get_mm_token_ids, get_mm_special_token_ids from the input processor and the hashing utilities (find_mm_token_lengths, find_mm_token_positions, hexdigest_to_int32, validate_mm_inputs)

Note: The above entries summarize the relevant components and their purposes within tensorrt_llm/inputs/registry.py and adjacent code paths that interact with it. If you want, I can extract and present specific small code blocks (from the provided snippets) that map directly to a particular component listed here.


✏️ Learnings added
Learnt from: yechank-nvidia
Repo: NVIDIA/TensorRT-LLM PR: 8916
File: tensorrt_llm/inputs/registry.py:45-47
Timestamp: 2025-11-05T01:53:40.145Z
Learning: In tensorrt_llm/inputs/registry.py, the InputProcessor Protocol must define __init__(self, **kwargs) with super().__init__(**kwargs) to support cooperative multiple inheritance. Classes like Qwen2VLInputProcessorBase inherit from both BaseMultimodalInputProcessor and BaseMultimodalDummyInputsBuilder, and removing or not calling super() in InputProcessor.__init__ would break the MRO chain, preventing BaseMultimodalDummyInputsBuilder from being initialized.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.

Copy link
Collaborator

@jaedeok-nvidia jaedeok-nvidia Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, here Protocol is the typing.Protocol - that means its a kind of type. I'm not familiar with the best practice of Protocol.

This document explains the usage of the Protocol class, and the section Explicitly declaring implementation may help understand how protocol works.

From my understanding, BaseMultimodalInputProcessor shouldn't call super().__init__ (here).

And, I think Qwen2VLInputProcessorBase needs to be revised (here).

class Qwen2VLInputProcessorBase(BaseMultimodalInputProcessor, BaseMultimodalDummyInputsBuilder):
    def __init__(self,
                 model_path: str,
                 config: PretrainedConfig,
                 tokenizer: AutoTokenizer,
                 trust_remote_code: bool = True,
                 **kwargs):   # For `use_fast`.
        super().__init__(model_path=model_path, config=config, tokenizer=tokenizer, **kwargs)
        self._dtype = self._config.torch_dtype
        self._model_path = model_path

and accordingly,

class BaseMultimodalInputProcessor(InputProcessor, ABC):
    def __init__(self, 
                 model_path,
                 config,
                 tokenizer,
                 trust_remote_code: bool = True) -> None:
        self.config = config
        self.model_path = model_path
        self.tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained(
            model_path)
        self._use_fast: bool = kwargs.get('use_fast', True)
        self._multimodal_hashing_supported: Optional[bool] = None

def __call__(
self, inputs: TextPrompt, sampling_params: SamplingParams
) -> Tuple[List[int], Optional[ExtraProcessedInputs]]:
Expand Down