Add support for GPT-OSS

degenfabian · degenfabian · commit a6bddfad0b21 · 2025-08-14T16:41:56.000+02:00
diff --git a/transformer_lens/factories/architecture_adapter_factory.py b/transformer_lens/factories/architecture_adapter_factory.py
@@ -15,6 +15,7 @@
     GPT2ArchitectureAdapter,
     Gpt2LmHeadCustomArchitectureAdapter,
     GptjArchitectureAdapter,
+    GPTOSSArchitectureAdapter,
     LlamaArchitectureAdapter,
     MingptArchitectureAdapter,
     MistralArchitectureAdapter,
@@ -40,6 +41,7 @@
     "Gemma2ForCausalLM": Gemma2ArchitectureAdapter,
     "Gemma3ForCausalLM": Gemma3ArchitectureAdapter,
     "GPT2LMHeadModel": GPT2ArchitectureAdapter,
+    "GptOssForCausalLM": GPTOSSArchitectureAdapter,
     "GPT2LMHeadCustomModel": Gpt2LmHeadCustomArchitectureAdapter,
     "GPTJForCausalLM": GptjArchitectureAdapter,
     "LlamaForCausalLM": LlamaArchitectureAdapter,
diff --git a/transformer_lens/model_bridge/__init__.py b/transformer_lens/model_bridge/__init__.py
@@ -23,6 +23,7 @@
     EmbeddingBridge,
     NormalizationBridge,
     JointQKVAttentionBridge,
+    JointGateUpMLPBridge,
     LinearBridge,
     MLPBridge,
     MoEBridge,
@@ -49,7 +50,7 @@
     "EmbeddingBridge",
     "NormalizationBridge",
     "JointQKVAttentionBridge",
-    "LinearBridge",
+    "JointGateUpMLPBridge" "LinearBridge",
     "MLPBridge",
     "MoEBridge",
     "UnembeddingBridge",
diff --git a/transformer_lens/model_bridge/generalized_components/__init__.py b/transformer_lens/model_bridge/generalized_components/__init__.py
@@ -24,6 +24,9 @@
 from transformer_lens.model_bridge.generalized_components.joint_qkv_attention import (
     JointQKVAttentionBridge,
 )
+from transformer_lens.model_bridge.generalized_components.joint_gate_up_mlp import (
+    JointGateUpMLPBridge,
+)
 from transformer_lens.model_bridge.generalized_components.unembedding import (
     UnembeddingBridge,
 )
@@ -34,7 +37,7 @@
     "EmbeddingBridge",
     "NormalizationBridge",
     "JointQKVAttentionBridge",
-    "LinearBridge",
+    "JointGateUpMLPBridge" "LinearBridge",
     "MLPBridge",
     "MoEBridge",
     "JointQKVAttentionBridge",
diff --git a/transformer_lens/model_bridge/generalized_components/joint_gate_up_mlp.py b/transformer_lens/model_bridge/generalized_components/joint_gate_up_mlp.py
@@ -0,0 +1,77 @@
+"""MLP bridge component.
+
+This module contains the bridge component for MLP layers with joint gating and up-projection.
+"""
+
+
+from typing import Any, Dict, Optional
+
+import torch
+
+from transformer_lens.model_bridge.generalized_components.mlp import MLPBridge
+
+
+class JointGateUpMLPBridge(MLPBridge):
+    """Bridge component for MLP layers with joint gating and up-projections.
+
+    This component wraps an MLP layer with fused gate and up projections such that both the activations
+    from the joint projection and the seperate gate and up projections are hooked and accessible.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        model_config: Optional[Any] = None,
+        submodules: Optional[Dict[str, GeneralizedComponent]] = {},
+        gate_up_config: Optional[Dict[str, Any]] = None,
+    ):
+        """Initialize the JointGateUpMLP bridge.
+
+        Args:
+            name: The name of the component in the model
+            model_config: Optional configuration (unused for MLPBridge)
+            submodules: Dictionary of submodules to register (e.g., gate_proj, up_proj, down_proj)
+            gate_up_config: Gate_Up-specific configuration which holds function to split the joint projection into two
+        """
+        super().__init__(name, model_config, submodules=submodules)
+        self.gate = LinearBridge(name="gate", config=model_config)
+        self.up = LinearBridge(name="up", config=model_config)
+
+    def set_original_component(self, original_component: torch.nn.Module) -> None:
+        """Set the original MLP component and initialize LinearBridges for gate and up projections.
+
+        Args:
+            original_component: The original MLP component to wrap
+        """
+        super().set_original_component(original_component)
+
+        Gate_projection, Up_projection = self.gate_up_config["split_gate_up_matrix"](
+            original_component
+        )
+
+        # Initialize the LinearBridges for the seperated gate and up projections
+        self.gate.set_original_component(Gate_projection)
+        self.up.set_original_component(Up_projection)
+
+    def forward(self, *args, **kwargs) -> torch.Tensor:
+        """Forward pass through the JointGateUpMLP bridge.
+
+        Args:
+            *args: Positional arguments for the original component
+            **kwargs: Keyword arguments for the original component
+
+        Returns:
+            Output hidden states
+        """
+        output = super().forward(*args, **kwargs)
+
+        # Extract input tensor to run through gate and up projections
+        # in order to hook their outputs
+        input_tensor = (
+            args[0] if len(args) > 0 else kwargs.get("input", kwargs.get("hidden_states"))
+        )
+        if input_tensor is not None:
+            gated_output = self.gate(input_tensor)
+            self.up(gated_output)
+
+        return output
diff --git a/transformer_lens/model_bridge/supported_architectures/__init__.py b/transformer_lens/model_bridge/supported_architectures/__init__.py
@@ -21,6 +21,7 @@
 from transformer_lens.model_bridge.supported_architectures.gpt2 import (
     GPT2ArchitectureAdapter,
 )
+from transformer_lens.model_bridge.supported_architectures.gpt_oss import GPTOSSArchitectureAdapter
 from transformer_lens.model_bridge.supported_architectures.gpt2_lm_head_custom import (
     Gpt2LmHeadCustomArchitectureAdapter,
 )
diff --git a/transformer_lens/model_bridge/supported_architectures/gpt_oss.py b/transformer_lens/model_bridge/supported_architectures/gpt_oss.py
@@ -0,0 +1,86 @@
+"""GPT-OSS architecture adapter."""
+
+from typing import Any
+
+import torch
+
+from transformer_lens.model_bridge.architecture_adapter import ArchitectureAdapter
+from transformer_lens.model_bridge.generalized_components import (
+    BlockBridge,
+    EmbeddingBridge,
+    JointGateUpMLPBridge,
+    LinearBridge,
+    MLPBridge,
+    NormalizationBridge,
+    UnembeddingBridge,
+)
+
+
+class GPTOSSArchitectureAdapter(ArchitectureAdapter):
+    """Architecture adapter for GPT-OSS model."""
+
+    def __init__(self, cfg: Any) -> None:
+        """Initialize the GPT-OSS architecture adapter."""
+        super().__init__(cfg)
+
+        self.component_mapping = {
+            "embed": EmbeddingBridge(name="model.embed_tokens"),
+            "rotary_emb": EmbeddingBridge(name="model.rotary_emb"),
+            "blocks": BlockBridge(
+                name="model.layers",
+                submodules={
+                    "ln1": NormalizationBridge(name="input_layernorm"),
+                    "attn": AttentionBridge(
+                        name="self_attn",
+                        submodules={
+                            "q": LinearBridge(name="q_proj"),
+                            "k": LinearBridge(name="k_proj"),
+                            "v": LinearBridge(name="v_proj"),
+                            "o": LinearBridge(name="o_proj"),
+                        },
+                    ),
+                    "ln2": NormalizationBridge(name="post_attention_layernorm"),
+                    "mlp": MLPBridge(
+                        name="mlp",
+                        submodules={
+                            "router": LinearBridge(name="router"),
+                            "experts": BlockBridge(
+                                name="experts",
+                                submodules={
+                                    "gate_up": JointGateUpMLPBridge(name="gate_up_proj"),
+                                    "down": LinearBridge(name="down_proj"),
+                                },
+                            ),
+                        },
+                    ),
+                },
+            ),
+            "ln_final": NormalizationBridge(name="model.norm"),
+            "unembed": UnembeddingBridge(name="lm_head"),
+        }
+
+        def split_gate_up_matrix(
+            self, original_mlp_component: Any
+        ) -> tuple[torch.nn.Linear, torch.nn.Linear]:
+            gate_up_weight = original_mlp_component.gate_up_proj
+            gate_up_bias = original_mlp_component.gate_up_proj_bias
+
+            # In GPT-OSS, all the gate projection weights lie at even indices,
+            # all the up projection weights lie at odd indices
+            gate_weight = gate_up_weight[..., ::2]
+            up_weight = gate_up_weight[..., 1::2]
+
+            gate_bias = gate_up_bias[..., ::2]
+            up_bias = gate_up_bias[..., 1::2]
+
+            gate_projection = torch.nn.Linear(gate_weight.shape[0], gate_weight.shape[1], bias=True)
+
+            gate_projection.weight = torch.nn.Parameter(gate_weight)
+            gate_projection.bias = torch.nn.Parameter(bias)
+
+            up_projection = torch.nn.Linear(up_weight.shape[0], up_weight.shape[1])
+
+            up_projection.weight = torch.nn.Parameter(up_weight)
+            up_projection.bias = torch.nn.Parameter(up_bias)
+
+            return gate_projection, up_projection

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`	`from transformer_lens.model_bridge.supported_architectures.gpt2 import (`
`22`	`22`	`GPT2ArchitectureAdapter,`
`23`	`23`	`)`
	`24`	`+from transformer_lens.model_bridge.supported_architectures.gpt_oss import GPTOSSArchitectureAdapter`
`24`	`25`	`from transformer_lens.model_bridge.supported_architectures.gpt2_lm_head_custom import (`
`25`	`26`	`Gpt2LmHeadCustomArchitectureAdapter,`
`26`	`27`	`)`