fix: Update no_rope_layers configuration validation for Eagle3

rahul-tuli · claude · rahul-tuli · commit 60501f8d1d76 · 2025-08-21T14:48:27.000Z
Fixes IndexError during draft model initialization by properly
padding no_rope_layers configuration to match the exact pattern
from llama4_eagle.py.

The configuration validation now correctly handles layer offset
for speculative decoding compatibility.

Co-Authored-By: Claude &lt;noreply@anthropic.com&gt;
Signed-off-by: Rahul Tuli &lt;rtuli@redhat.com&gt;
diff --git a/vllm/model_executor/models/llama4_eagle3.py b/vllm/model_executor/models/llama4_eagle3.py
@@ -107,12 +107,12 @@ def __init__(
         # Single decoder layer following Eagle3 pattern
         # The layer ID is offset by target model depth to maintain
         # correct parameter naming and quantization mappings
-        self.layer = nn.ModuleList([
+        self.layers = nn.ModuleList([
             Llama4DecoderLayer(
                 self.config,
                 quant_config=quant_config,
-                prefix=maybe_prefix(prefix, f"layers.{start_layer_id}"),
-            )
+                prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"),
+            ) for i in range(self.config.num_hidden_layers)
         ])
 
         # Eagle3 auxiliary hidden state combination layer
@@ -273,10 +273,11 @@ def _validate_and_update_config(
             "Mixture of Experts layers are not supported in Eagle3 draft models"
         )
 
-        # Pad layer-specific configurations for start_layer_id offset
-        # This ensures correct behavior when draft layers have offset indices
-        self.config.no_rope_layers = (
-            [0] * start_layer_id + getattr(self.config, 'no_rope_layers', []))
+        # Draft model layer index is increased by start_layer_id,
+        # so we need to pad relevant configs accordingly
+        self.config.no_rope_layers = [
+            0
+        ] * start_layer_id + self.config.no_rope_layers
 
         # Update quantization configuration for layer offset
         if isinstance(quant_config, TorchAOConfig):