Skip to content

Commit afa9b18

Browse files
committed
Add dynamic Eagle3 auxiliary layer configuration from speculative config
Implement _get_eagle3_aux_layers_from_config() helper method to extract auxiliary layer IDs from the draft model's speculative config. The GPU model runner now prefers config-specified layers over model defaults, with fallback to model's get_eagle3_aux_hidden_state_layers() when not configured. Changes: - Refactor auxiliary layer setup with early return pattern for errors - Add config extraction with proper error handling - Log only when using non-default layer configuration - Enable dynamic layer configuration per deployment Signed-off-by: rahul-tuli <[email protected]> Signed-off-by: Rahul Tuli <[email protected]>
1 parent 7753daf commit afa9b18

File tree

1 file changed

+40
-4
lines changed

1 file changed

+40
-4
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2746,13 +2746,23 @@ def load_model(self, eep_scale_up: bool = False) -> None:
27462746
logger.info("Loading drafter model...")
27472747
self.drafter.load_model(self.model)
27482748
if self.use_aux_hidden_state_outputs:
2749-
if supports_eagle3(self.model):
2750-
self.model.set_aux_hidden_state_layers(
2751-
self.model.get_eagle3_aux_hidden_state_layers())
2752-
else:
2749+
if not supports_eagle3(self.model):
27532750
raise RuntimeError(
27542751
"Model does not support EAGLE3 interface but "
27552752
"aux_hidden_state_outputs was requested")
2753+
2754+
# Try to get auxiliary layers from speculative config,
2755+
# otherwise use model's default layers
2756+
aux_layers = (self._get_eagle3_aux_layers_from_config() or
2757+
self.model.get_eagle3_aux_hidden_state_layers())
2758+
2759+
if aux_layers != self.model.get_eagle3_aux_hidden_state_layers(
2760+
):
2761+
logger.info(
2762+
"Using auxiliary layers from speculative config: %s",
2763+
aux_layers)
2764+
2765+
self.model.set_aux_hidden_state_layers(aux_layers)
27562766
time_after_load = time.perf_counter()
27572767
self.model_memory_usage = m.consumed_memory
27582768
logger.info("Model loading took %.4f GiB and %.6f seconds",
@@ -2803,6 +2813,32 @@ def load_model(self, eep_scale_up: bool = False) -> None:
28032813
self.model = UBatchWrapper(self.model, self.vllm_config,
28042814
CUDAGraphMode.NONE, self.device)
28052815

2816+
def _get_eagle3_aux_layers_from_config(self) -> Optional[tuple[int, ...]]:
2817+
"""Extract Eagle3 auxiliary layer IDs from speculative config.
2818+
2819+
Returns:
2820+
Tuple of layer indices if found in draft model config,
2821+
None otherwise.
2822+
"""
2823+
if not (self.speculative_config
2824+
and self.speculative_config.draft_model_config):
2825+
return None
2826+
2827+
try:
2828+
hf_config = self.speculative_config.draft_model_config.hf_config
2829+
if not hasattr(hf_config, 'eagle_aux_hidden_state_layer_ids'):
2830+
return None
2831+
2832+
layer_ids = hf_config.eagle_aux_hidden_state_layer_ids
2833+
if layer_ids and isinstance(layer_ids, (list, tuple)):
2834+
return tuple(layer_ids)
2835+
except Exception as e:
2836+
logger.warning(
2837+
"Failed to read auxiliary layers from speculative config: %s",
2838+
e)
2839+
2840+
return None
2841+
28062842
def reload_weights(self) -> None:
28072843
assert getattr(self, "model", None) is not None, \
28082844
"Cannot reload weights before model is loaded."

0 commit comments

Comments
 (0)