Skip to content

Commit 1695608

Browse files
committed
Add dynamic Eagle3 auxiliary layer configuration from speculative config
Implement _get_eagle3_aux_layers_from_config() helper method to extract auxiliary layer IDs from the draft model's speculative config. The GPU model runner now prefers config-specified layers over model defaults, with fallback to model's get_eagle3_aux_hidden_state_layers() when not configured. Changes: - Refactor auxiliary layer setup with early return pattern for errors - Add config extraction with proper error handling - Log only when using non-default layer configuration - Enable dynamic layer configuration per deployment Signed-off-by: rahul-tuli <[email protected]>
1 parent 25def1f commit 1695608

File tree

1 file changed

+40
-4
lines changed

1 file changed

+40
-4
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2729,13 +2729,23 @@ def load_model(self, eep_scale_up: bool = False) -> None:
27292729
logger.info("Loading drafter model...")
27302730
self.drafter.load_model(self.model)
27312731
if self.use_aux_hidden_state_outputs:
2732-
if supports_eagle3(self.model):
2733-
self.model.set_aux_hidden_state_layers(
2734-
self.model.get_eagle3_aux_hidden_state_layers())
2735-
else:
2732+
if not supports_eagle3(self.model):
27362733
raise RuntimeError(
27372734
"Model does not support EAGLE3 interface but "
27382735
"aux_hidden_state_outputs was requested")
2736+
2737+
# Try to get auxiliary layers from speculative config,
2738+
# otherwise use model's default layers
2739+
aux_layers = (self._get_eagle3_aux_layers_from_config() or
2740+
self.model.get_eagle3_aux_hidden_state_layers())
2741+
2742+
if aux_layers != self.model.get_eagle3_aux_hidden_state_layers(
2743+
):
2744+
logger.info(
2745+
"Using auxiliary layers from speculative config: %s",
2746+
aux_layers)
2747+
2748+
self.model.set_aux_hidden_state_layers(aux_layers)
27392749
time_after_load = time.perf_counter()
27402750
self.model_memory_usage = m.consumed_memory
27412751
logger.info("Model loading took %.4f GiB and %.6f seconds",
@@ -2786,6 +2796,32 @@ def load_model(self, eep_scale_up: bool = False) -> None:
27862796
self.model = UBatchWrapper(self.model, self.vllm_config,
27872797
CUDAGraphMode.NONE, self.device)
27882798

2799+
def _get_eagle3_aux_layers_from_config(self) -> Optional[tuple[int, ...]]:
2800+
"""Extract Eagle3 auxiliary layer IDs from speculative config.
2801+
2802+
Returns:
2803+
Tuple of layer indices if found in draft model config,
2804+
None otherwise.
2805+
"""
2806+
if not (self.speculative_config
2807+
and self.speculative_config.draft_model_config):
2808+
return None
2809+
2810+
try:
2811+
hf_config = self.speculative_config.draft_model_config.hf_config
2812+
if not hasattr(hf_config, 'eagle_aux_hidden_state_layer_ids'):
2813+
return None
2814+
2815+
layer_ids = hf_config.eagle_aux_hidden_state_layer_ids
2816+
if layer_ids and isinstance(layer_ids, (list, tuple)):
2817+
return tuple(layer_ids)
2818+
except Exception as e:
2819+
logger.warning(
2820+
"Failed to read auxiliary layers from speculative config: %s",
2821+
e)
2822+
2823+
return None
2824+
27892825
def reload_weights(self) -> None:
27902826
assert getattr(self, "model", None) is not None, \
27912827
"Cannot reload weights before model is loaded."

0 commit comments

Comments
 (0)