rm redundant class

xiaoxi-s · xiaoxi-s · commit e4bfba0bf8c8 · 2026-02-09T17:36:22.000Z
diff --git a/specforge/modeling/draft/llama3_eagle.py b/specforge/modeling/draft/llama3_eagle.py
@@ -1244,73 +1244,6 @@ def forward(self, hidden_states):
         return self.weight * hidden_states.to(input_dtype)
 
 
-class BasicDecoderLayer(nn.Module):
-    """
-    The traditional decoder layer.
-    """
-
-    def __init__(self, config, attention_backend: str = "sdpa"):
-        super().__init__()
-        self.hidden_size = config.hidden_size
-
-        if attention_backend == "sdpa":
-            self.self_attn = LlamaAttention(config=config, fused_input=False)
-        elif attention_backend == "flex_attention":
-            print_with_rank("Using flex attention on draft model training!")
-            self.self_attn = LlamaFlexAttention(config=config, fused_input=False)
-        elif attention_backend == "fa":
-            self.self_attn = LlamaFlashAttention(config=config, fused_input=False)
-        elif attention_backend == "usp":
-            self.self_attn = LlamaAttention(config=config, fused_input=False)
-        else:
-            raise ValueError(f"Unknown attention backend {attention_backend}")
-
-        self.attention_backend = attention_backend
-        self.mlp = LlamaMLP(config)
-
-        self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
-        self.post_attention_layernorm = LlamaRMSNorm(
-            config.hidden_size, eps=config.rms_norm_eps
-        )
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        attention_mask: Optional[torch.Tensor] = None,
-        position_ids: Optional[torch.LongTensor] = None,
-        past_key_values: Optional[Cache] = None,
-        output_attentions: Optional[bool] = False,
-        use_cache: Optional[bool] = False,
-    ) -> Tuple[
-        torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]
-    ]:
-        """
-        Basic decoder layer forward pass with self-attention and mlp.
-        """
-        residual = hidden_states
-
-        hidden_states = self.input_layernorm(hidden_states)
-        hidden_states = self.self_attn(
-            hidden_states=hidden_states,
-            attention_mask=attention_mask,
-            position_ids=position_ids,
-            past_key_values=past_key_values,
-            output_attentions=output_attentions,
-            use_cache=use_cache,
-        )
-
-        # First residual connection
-        hidden_states = residual + hidden_states
-
-        # Feed Forward Network with res connection
-        residual = hidden_states
-        hidden_states = self.post_attention_layernorm(hidden_states)
-        hidden_states = self.mlp(hidden_states)
-        hidden_states = residual + hidden_states
-
-        return hidden_states
-
-
 class LlamaDecoderLayer(nn.Module):
     def __init__(self, config, attention_backend: str = "sdpa", fused_input=True):
         super().__init__()