Review comments

rahul-tuli · rahul-tuli · commit 1c1d67949c1a · 2025-10-06T13:07:53.000Z
Signed-off-by: Rahul Tuli &lt;rtuli@redhat.com&gt;
diff --git a/vllm/transformers_utils/configs/speculators/algos.py b/vllm/transformers_utils/configs/speculators/algos.py
@@ -21,6 +21,10 @@ def update_eagle3(config_dict: dict, vllm_config: dict) -> None:
     - draft_vocab_size: Size of the draft model's vocabulary
     - target_hidden_size: Hidden size of the target model
     - norm_before_residual: Whether to apply norm before residual connection
+    - eagle_aux_hidden_state_layer_ids: List of layer indices from the base
+        model to use as auxiliary inputs for the Eagle3 drafter. These layers
+        provide intermediate hidden states that help the drafter make better
+        predictions. This is the standard field used in Eagle3 checkpoints.
     """
 
     vllm_config["draft_vocab_size"] = config_dict.get("draft_vocab_size")
@@ -31,5 +35,3 @@ def update_eagle3(config_dict: dict, vllm_config: dict) -> None:
     if config_dict.get("eagle_aux_hidden_state_layer_ids"):
         vllm_config["eagle_aux_hidden_state_layer_ids"] = config_dict[
             "eagle_aux_hidden_state_layer_ids"]
-    if config_dict.get("inference_type"):
-        vllm_config["inference_type"] = config_dict["inference_type"]
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -2951,19 +2951,14 @@ def load_model(self, eep_scale_up: bool = False) -> None:
 
                 # Try to get auxiliary layers from speculative config,
                 # otherwise use model's default layers
-                aux_layers = (
-                    self._get_eagle3_aux_layers_from_config()
-                    or self.model.get_eagle3_aux_hidden_state_layers()
-                )
-
-                if (
-                    aux_layers
-                    != self.model.get_eagle3_aux_hidden_state_layers()
-                ):
+                aux_layers = self._get_eagle3_aux_layers_from_config()
+                if aux_layers:
                     logger.info(
                         "Using auxiliary layers from speculative config: %s",
                         aux_layers,
                     )
+                else:
+                    aux_layers = self.model.get_eagle3_aux_hidden_state_layers()
 
                 self.model.set_aux_hidden_state_layers(aux_layers)
             time_after_load = time.perf_counter()
@@ -3021,7 +3016,11 @@ def load_model(self, eep_scale_up: bool = False) -> None:
                 )
 
     def _get_eagle3_aux_layers_from_config(self) -> Optional[tuple[int, ...]]:
-        """Extract Eagle3 auxiliary layer IDs from speculative config.
+        """Extract Eagle3 auxiliary layer indices from speculative config.
+
+        These indices specify which hidden states from the base model should
+        be used as auxiliary inputs for the Eagle3 drafter model during
+        speculative decoding.
 
         Returns:
             Tuple of layer indices if found in draft model config,
@@ -3031,18 +3030,13 @@ def _get_eagle3_aux_layers_from_config(self) -> Optional[tuple[int, ...]]:
                 and self.speculative_config.draft_model_config):
             return None
 
-        try:
-            hf_config = self.speculative_config.draft_model_config.hf_config
-            if not hasattr(hf_config, 'eagle_aux_hidden_state_layer_ids'):
-                return None
-
-            layer_ids = hf_config.eagle_aux_hidden_state_layer_ids
-            if layer_ids and isinstance(layer_ids, (list, tuple)):
-                return tuple(layer_ids)
-        except Exception as e:
-            logger.warning(
-                "Failed to read auxiliary layers from speculative config: %s",
-                e)
+        hf_config = self.speculative_config.draft_model_config.hf_config
+        if not hasattr(hf_config, 'eagle_aux_hidden_state_layer_ids'):
+            return None
+
+        layer_ids = hf_config.eagle_aux_hidden_state_layer_ids
+        if layer_ids and isinstance(layer_ids, (list, tuple)):
+            return tuple(layer_ids)
 
         return None