Skip to content

Commit 06c6c93

Browse files
committed
Add dynamic Eagle3 auxiliary layer configuration from speculative config
Implement _get_eagle3_aux_layers_from_config() helper method to extract auxiliary layer IDs from the draft model's speculative config. The GPU model runner now prefers config-specified layers over model defaults, with fallback to model's get_eagle3_aux_hidden_state_layers() when not configured. Changes: - Refactor auxiliary layer setup with early return pattern for errors - Add config extraction with proper error handling - Log only when using non-default layer configuration - Enable dynamic layer configuration per deployment Signed-off-by: rahul-tuli <[email protected]> Signed-off-by: Rahul Tuli <[email protected]>
1 parent 730f04d commit 06c6c93

File tree

1 file changed

+45
-5
lines changed

1 file changed

+45
-5
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2943,15 +2943,29 @@ def load_model(self, eep_scale_up: bool = False) -> None:
29432943
logger.info("Loading drafter model...")
29442944
self.drafter.load_model(self.model)
29452945
if self.use_aux_hidden_state_outputs:
2946-
if supports_eagle3(self.model):
2947-
self.model.set_aux_hidden_state_layers(
2948-
self.model.get_eagle3_aux_hidden_state_layers()
2949-
)
2950-
else:
2946+
if not supports_eagle3(self.model):
29512947
raise RuntimeError(
29522948
"Model does not support EAGLE3 interface but "
29532949
"aux_hidden_state_outputs was requested"
29542950
)
2951+
2952+
# Try to get auxiliary layers from speculative config,
2953+
# otherwise use model's default layers
2954+
aux_layers = (
2955+
self._get_eagle3_aux_layers_from_config()
2956+
or self.model.get_eagle3_aux_hidden_state_layers()
2957+
)
2958+
2959+
if (
2960+
aux_layers
2961+
!= self.model.get_eagle3_aux_hidden_state_layers()
2962+
):
2963+
logger.info(
2964+
"Using auxiliary layers from speculative config: %s",
2965+
aux_layers,
2966+
)
2967+
2968+
self.model.set_aux_hidden_state_layers(aux_layers)
29552969
time_after_load = time.perf_counter()
29562970
self.model_memory_usage = m.consumed_memory
29572971
logger.info(
@@ -3006,6 +3020,32 @@ def load_model(self, eep_scale_up: bool = False) -> None:
30063020
self.model, self.vllm_config, CUDAGraphMode.NONE, self.device
30073021
)
30083022

3023+
def _get_eagle3_aux_layers_from_config(self) -> Optional[tuple[int, ...]]:
3024+
"""Extract Eagle3 auxiliary layer IDs from speculative config.
3025+
3026+
Returns:
3027+
Tuple of layer indices if found in draft model config,
3028+
None otherwise.
3029+
"""
3030+
if not (self.speculative_config
3031+
and self.speculative_config.draft_model_config):
3032+
return None
3033+
3034+
try:
3035+
hf_config = self.speculative_config.draft_model_config.hf_config
3036+
if not hasattr(hf_config, 'eagle_aux_hidden_state_layer_ids'):
3037+
return None
3038+
3039+
layer_ids = hf_config.eagle_aux_hidden_state_layer_ids
3040+
if layer_ids and isinstance(layer_ids, (list, tuple)):
3041+
return tuple(layer_ids)
3042+
except Exception as e:
3043+
logger.warning(
3044+
"Failed to read auxiliary layers from speculative config: %s",
3045+
e)
3046+
3047+
return None
3048+
30093049
def reload_weights(self) -> None:
30103050
assert getattr(self, "model", None) is not None, (
30113051
"Cannot reload weights before model is loaded."

0 commit comments

Comments
 (0)