Review comments

rahul-tuli · rahul-tuli · commit 2158396d0459 · 2025-10-03T08:44:58.000Z
Signed-off-by: Rahul Tuli &lt;rtuli@redhat.com&gt;
diff --git a/vllm/transformers_utils/configs/speculators/algos.py b/vllm/transformers_utils/configs/speculators/algos.py
@@ -17,11 +17,15 @@ def decorator(fn):
 def update_eagle3(config_dict: dict, vllm_config: dict) -> None:
     """
     Apply Eagle-3 specific configuration transformations.
-    
+
     Eagle-3 specific fields:
     - draft_vocab_size: Size of the draft model's vocabulary
     - target_hidden_size: Hidden size of the target model
     - norm_before_residual: Whether to apply norm before residual connection
+    - eagle_aux_hidden_state_layer_ids: List of layer indices from the base
+        model to use as auxiliary inputs for the Eagle3 drafter. These layers
+        provide intermediate hidden states that help the drafter make better
+        predictions. This is the standard field used in Eagle3 checkpoints.
     """
 
     vllm_config["draft_vocab_size"] = config_dict.get("draft_vocab_size")
@@ -33,5 +37,3 @@ def update_eagle3(config_dict: dict, vllm_config: dict) -> None:
     if config_dict.get("eagle_aux_hidden_state_layer_ids"):
         vllm_config["eagle_aux_hidden_state_layer_ids"] = config_dict[
             "eagle_aux_hidden_state_layer_ids"]
-    if config_dict.get("inference_type"):
-        vllm_config["inference_type"] = config_dict["inference_type"]
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -2753,14 +2753,14 @@ def load_model(self, eep_scale_up: bool = False) -> None:
 
                 # Try to get auxiliary layers from speculative config,
                 # otherwise use model's default layers
-                aux_layers = (self._get_eagle3_aux_layers_from_config() or
-                              self.model.get_eagle3_aux_hidden_state_layers())
-
-                if aux_layers != self.model.get_eagle3_aux_hidden_state_layers(
-                ):
+                aux_layers = self._get_eagle3_aux_layers_from_config()
+                if aux_layers:
                     logger.info(
                         "Using auxiliary layers from speculative config: %s",
                         aux_layers)
+                else:
+                    aux_layers = self.model.get_eagle3_aux_hidden_state_layers(
+                    )
 
                 self.model.set_aux_hidden_state_layers(aux_layers)
             time_after_load = time.perf_counter()
@@ -2814,7 +2814,11 @@ def load_model(self, eep_scale_up: bool = False) -> None:
                                            CUDAGraphMode.NONE, self.device)
 
     def _get_eagle3_aux_layers_from_config(self) -> Optional[tuple[int, ...]]:
-        """Extract Eagle3 auxiliary layer IDs from speculative config.
+        """Extract Eagle3 auxiliary layer indices from speculative config.
+
+        These indices specify which hidden states from the base model should
+        be used as auxiliary inputs for the Eagle3 drafter model during
+        speculative decoding.
 
         Returns:
             Tuple of layer indices if found in draft model config,
@@ -2824,18 +2828,13 @@ def _get_eagle3_aux_layers_from_config(self) -> Optional[tuple[int, ...]]:
                 and self.speculative_config.draft_model_config):
             return None
 
-        try:
-            hf_config = self.speculative_config.draft_model_config.hf_config
-            if not hasattr(hf_config, 'eagle_aux_hidden_state_layer_ids'):
-                return None
-
-            layer_ids = hf_config.eagle_aux_hidden_state_layer_ids
-            if layer_ids and isinstance(layer_ids, (list, tuple)):
-                return tuple(layer_ids)
-        except Exception as e:
-            logger.warning(
-                "Failed to read auxiliary layers from speculative config: %s",
-                e)
+        hf_config = self.speculative_config.draft_model_config.hf_config
+        if not hasattr(hf_config, 'eagle_aux_hidden_state_layer_ids'):
+            return None
+
+        layer_ids = hf_config.eagle_aux_hidden_state_layer_ids
+        if layer_ids and isinstance(layer_ids, (list, tuple)):
+            return tuple(layer_ids)
 
         return None