Skip to content

Commit f85861a

Browse files
authored
fix: liger swiglu for llama4 (axolotl-ai-cloud#2504)
* fix: liger swiglu for llama4 * feat: add liger to deepseek v3 * fix: unpack not found * fix: spelling * fix: comment out deepseek v3 * fix: retest deepseek * fix: map glu * fix: patch model forward * chore: add temp code to save * fix: remove deepseek to move into separate PR
1 parent 630e40d commit f85861a

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

src/axolotl/integrations/liger/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,5 +185,7 @@ def _liger_rms_norm_wrapper(dim, **kwargs):
185185
rms_norm=cfg.liger_rms_norm,
186186
layer_norm=cfg.liger_layer_norm,
187187
)
188-
elif cfg.model_config_type in ["deepseek_v3"]:
189-
raise ValueError(f"Unsupported model config type: {cfg.model_config_type}")
188+
else:
189+
logging.warning(
190+
f"Unsupported model config type: {cfg.model_config_type}. Liger not applied."
191+
)

src/axolotl/integrations/liger/models/llama4.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import sys
6+
from copy import deepcopy
67
from typing import List, Optional, Tuple, Union
78

89
import torch
@@ -158,7 +159,16 @@ def apply_liger_kernel_to_llama4(
158159
if rms_norm:
159160
modeling_llama4.Llama4TextRMSNorm = LigerRMSNorm
160161
if glu_activation:
161-
modeling_llama4.Llama4TextMLP = LigerSwiGLUMLP
162+
163+
def _liger_swiglu_mlp_wrapper(config, intermediate_size=None, **kwargs):
164+
"Accepts intermediate_size to pass to LigerSwiGLUMLP"
165+
# clone config to avoid modifying the original
166+
config = deepcopy(config)
167+
if intermediate_size:
168+
setattr(config, "intermediate_size", intermediate_size)
169+
return LigerSwiGLUMLP(config, **kwargs)
170+
171+
modeling_llama4.Llama4TextMLP = _liger_swiglu_mlp_wrapper
162172
if layer_norm:
163173
modeling_llama4.nn.LayerNorm = LigerLayerNorm
164174

0 commit comments

Comments
 (0)