Override get_input_embeddings in Eagle3 to process text-only inputs

rahul-tuli · rahul-tuli · commit 730f04d88a97 · 2025-10-06T13:07:19.000Z
Implement custom get_input_embeddings() in Eagle3LlamaForCausalLM that
accepts multimodal parameters but only processes text embeddings. This
ensures the Llama3-based Eagle3 drafter correctly handles text inputs
while remaining compatible with multimodal verifier interfaces.

The drafter receives multimodal context through auxiliary hidden states
from the verifier rather than processing multimodal inputs directly.

Signed-off-by: rahul-tuli &lt;rtuli@redhat.com&gt;
Signed-off-by: Rahul Tuli &lt;rtuli@redhat.com&gt;
diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py
@@ -21,6 +21,7 @@
 )
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.models.llama import LlamaDecoderLayer, LlamaForCausalLM
+from vllm.multimodal.inputs import NestedTensors
 
 from .utils import AutoWeightsLoader, maybe_prefix
 
@@ -241,8 +242,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             requires_grad=False,
         )
 
-    def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
-        return self.model.get_input_embeddings(input_ids)
+    def get_input_embeddings(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: Optional[NestedTensors] = None,
+        is_multimodal: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        # The llama3 drafter only processes text embeddings
+        return self.model.embed_tokens(input_ids)
 
     def forward(
         self,