TransformerLensOrg
diff --git a/‎tests/integration/model_bridge/generalized_components/test_joint_qkv_attention_bridge_integration.py‎
Lines changed: 9 additions & 6 deletions b/‎tests/integration/model_bridge/generalized_components/test_joint_qkv_attention_bridge_integration.py‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎tests/integration/model_bridge/test_weight_processing_integration.py‎
Lines changed: 5 additions & 8 deletions b/‎tests/integration/model_bridge/test_weight_processing_integration.py‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎tests/integration/model_bridge/test_weight_processing_perfect_match.py‎
Lines changed: 1 addition & 2 deletions b/‎tests/integration/model_bridge/test_weight_processing_perfect_match.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎transformer_lens/benchmarks/component_outputs.py‎
Lines changed: 106 additions & 16 deletions b/‎transformer_lens/benchmarks/component_outputs.py‎
Lines changed: 106 additions & 16 deletions
diff --git a/‎transformer_lens/benchmarks/forward_pass.py‎
Lines changed: 44 additions & 2 deletions b/‎transformer_lens/benchmarks/forward_pass.py‎
Lines changed: 44 additions & 2 deletions
@@ -49,9 +49,9 @@ def dummy_hook(x, hook):
         assert not hook_point.has_hooks()
 
     def test_architecture_imports(self):
-        """Test that architecture files can be imported and reference JointQKVAttentionBridge."""
+        """Test that architecture files can be imported and use appropriate attention bridges."""
         # Test that we can import the architecture files without errors
-        # Test that JointQKVAttentionBridge is referenced in the source files
+        # Test that appropriate attention bridges are referenced in the source files
         import inspect
 
         from transformer_lens.model_bridge.supported_architectures import (
@@ -65,15 +65,18 @@ def test_architecture_imports(self):
             "JointQKVAttentionBridge" in gpt2_source
         ), "GPT-2 architecture should reference JointQKVAttentionBridge"
 
+        # BLOOM uses BloomAttentionBridge instead of JointQKVAttentionBridge
+        # because it requires alibi bias and residual connections
         bloom_source = inspect.getsource(bloom)
         assert (
-            "JointQKVAttentionBridge" in bloom_source
-        ), "BLOOM architecture should reference JointQKVAttentionBridge"
+            "BloomAttentionBridge" in bloom_source
+        ), "BLOOM architecture should reference BloomAttentionBridge"
 
+        # NeoX uses JointQKVPositionEmbeddingsAttentionBridge for rotary embeddings
         neox_source = inspect.getsource(neox)
         assert (
-            "JointQKVAttentionBridge" in neox_source
-        ), "NeoX architecture should reference JointQKVAttentionBridge"
+            "JointQKVPositionEmbeddingsAttentionBridge" in neox_source
+        ), "NeoX architecture should reference JointQKVPositionEmbeddingsAttentionBridge"
 
     @pytest.mark.slow
     def test_distilgpt2_integration(self):
 
@@ -88,9 +88,7 @@ def head_ablation_hook(value: Float[torch.Tensor, "batch pos head_index d_head"]
     # ===========================================
     print("\n3. Loading TransformerBridge without processing...")
     try:
-        bridge_unprocessed = TransformerBridge.boot_transformers(
-            model_name, device=device, apply_weight_processing=False
-        )
+        bridge_unprocessed = TransformerBridge.boot_transformers(model_name, device=device)
 
         print("\n   Testing baseline performance...")
         bridge_unprocessed_baseline = bridge_unprocessed(tokens, return_type="loss")
@@ -122,9 +120,9 @@ def head_ablation_hook(value: Float[torch.Tensor, "batch pos head_index d_head"]
     # ===========================================
     print("\n4. Loading TransformerBridge with processing...")
     try:
-        bridge_processed = TransformerBridge.boot_transformers(
-            model_name, device=device, apply_weight_processing=True
-        )
+        bridge_processed = TransformerBridge.boot_transformers(model_name, device=device)
+
+        bridge_processed.process_weights()
 
         print("\n   Testing baseline performance...")
         bridge_processed_baseline = bridge_processed(tokens, return_type="loss")
@@ -288,10 +286,9 @@ def head_ablation_hook(value: Float[torch.Tensor, "batch pos head_index d_head"]
     if overall_success:
         print("\n🎉🎉🎉 FULL INTEGRATION COMPATIBILITY ACHIEVED! 🎉🎉🎉")
         print("TransformerBridge is fully compatible with HookedTransformer!")
-        return True
     else:
         print("\n⚠️ Integration compatibility issues detected")
-        return False
+        pytest.fail("Integration compatibility issues detected")
 
 
 @pytest.mark.skip(
 
@@ -127,10 +127,9 @@ def head_ablation_hook(value, hook):
         print("\n🎉🎉🎉 PERFECT MATCH ACHIEVED! 🎉🎉🎉")
         print("The corrected processing matches HookedTransformer exactly!")
         print("This solution can be applied to TransformerBridge for perfect ablation matching.")
-        return True
     else:
         print("\n⚠️  Not quite perfect yet, but very close!")
-        return False
+        pytest.fail("Not quite perfect yet, but very close!")
 
 
 if __name__ == "__main__":
 
@@ -265,31 +265,44 @@ def benchmark_all_components(
 
         results: List[ComponentTestResult] = []
 
+        # Block-type components that need to be tested recursively by layer
+        # (they are ModuleLists that don't have direct forward methods)
+        block_components = {"blocks", "encoder_blocks", "decoder_blocks"}
+
         # Test top-level components (embed, pos_embed, ln_final, unembed)
         for comp_name, component in component_mapping.items():
             if comp_name in skip_components:
                 continue
 
-            if comp_name == "blocks":
-                # Handle blocks separately
+            if comp_name in block_components:
+                # Handle blocks separately - test their subcomponents by layer
                 continue
 
             result = self._test_component(comp_name, component, test_inputs)
             if result is not None:
                 results.append(result)
 
         # Test block components recursively
-        if "blocks" in component_mapping and "blocks" not in skip_components:
-            blocks_component = component_mapping["blocks"]
-            n_layers = self.cfg.n_layers
-
-            for layer_idx in range(n_layers):
-                # Recursively test each subcomponent and its nested subcomponents
-                for subcomp_name, subcomponent in blocks_component.submodules.items():
-                    comp_path = f"blocks.{layer_idx}.{subcomp_name}"
-                    self._test_component_recursive(
-                        comp_path, subcomponent, test_inputs, results, skip_components
-                    )
+        for block_type in block_components:
+            if block_type in component_mapping and block_type not in skip_components:
+                blocks_component = component_mapping[block_type]
+                n_layers = self.cfg.n_layers
+
+                for layer_idx in range(n_layers):
+                    # Recursively test each subcomponent and its nested subcomponents
+                    for subcomp_name, subcomponent in blocks_component.submodules.items():
+                        comp_path = f"{block_type}.{layer_idx}.{subcomp_name}"
+                        self._test_component_recursive(
+                            comp_path, subcomponent, test_inputs, results, skip_components
+                        )
+
+        # Clean up test inputs to free memory
+        if test_inputs is not None:
+            for key in list(test_inputs.keys()):
+                tensor = test_inputs[key]
+                if tensor is not None and isinstance(tensor, torch.Tensor):
+                    del tensor
+            test_inputs.clear()
 
         # Create report
         passed = sum(1 for r in results if r.passed)
@@ -333,6 +346,58 @@ def _test_component_recursive(
         if component_path in skip_components:
             return
 
+        # Skip MLP components that don't exist as separate modules in HF (name=None)
+        # These are virtual components where fc1/fc2 are directly on the layer
+        # Component testing doesn't work for these because get_component returns the parent layer
+        if "mlp" in component_path and hasattr(component, "name") and component.name is None:
+            return
+
+        # Skip MLP components with custom forward signatures (e.g., BLOOM requires residual)
+        # These can't be tested in isolation without full model context
+        if "mlp" in component_path and hasattr(component, "hf_component"):
+            import inspect
+
+            try:
+                sig = inspect.signature(component.hf_component.forward)
+                params = list(sig.parameters.keys())
+                # Standard MLP only needs hidden_states (or self + hidden_states)
+                # If there are more required params, skip testing
+                if len(params) > 2:  # self + hidden_states + other required params
+                    return
+            except Exception:
+                # If we can't inspect, proceed with testing
+                pass
+
+        # Skip attention components that require position embeddings in Phase 3
+        # These can't be tested in isolation without full model context for position embeddings
+        if (
+            "attn" in component_path
+            and hasattr(component, "requires_position_embeddings")
+            and component.requires_position_embeddings
+        ):
+            return
+
+        # Skip attention components that use native HF attention (maintain_native_attention=True)
+        # These have custom forward signatures (e.g., BLOOM requires residual, alibi, attention_mask)
+        # and can't be tested in isolation without full model context
+        if (
+            "attn" in component_path
+            and hasattr(component, "maintain_native_attention")
+            and component.maintain_native_attention
+        ):
+            return
+
+        # Skip BLOOM and T5 attention and MLP components - they have custom signatures that require
+        # residual connections, alibi bias, or cache_position from the full model context
+        if "attn" in component_path or "mlp" in component_path:
+            # Check if this is a BLOOM or T5 model by looking at the HF model config
+            hf_model_config = getattr(self.hf_model, "config", None)
+            if hf_model_config and hasattr(hf_model_config, "model_type"):
+                # BLOOM requires residual and alibi bias
+                # T5 requires cache_position for relative position embeddings
+                if hf_model_config.model_type in ["bloom", "t5"]:
+                    return
+
         # Skip components that require specific shaped inputs from their parent modules
         # These components expect intermediate outputs from their parent attention/MLP
         # modules and can't be tested with generic hidden state inputs
@@ -402,7 +467,10 @@ def _test_component(
         """
         try:
             # Get bridge component
-            bridge_component = self.adapter.get_component(self.bridge_model, component_path)
+            # The adapter returns nn.Module, but for bridge models it's actually GeneralizedComponent
+            bridge_component = cast(
+                GeneralizedComponent, self.adapter.get_component(self.bridge_model, component_path)
+            )
 
             # Get HuggingFace component
             hf_component = self.adapter.get_component(self.hf_model, component_path)
@@ -412,7 +480,14 @@ def _test_component(
             if test_input is None:
                 return None
 
-            # For embedding components, generate token indices once to use for both
+            # Get input args/kwargs from the Bridge component
+            # All bridge components inherit from GeneralizedComponent and have get_dummy_inputs()
+            batch, seq_len, _ = test_input.shape
+            pos_indices = (
+                torch.arange(seq_len, device=test_input.device).unsqueeze(0).expand(batch, -1)
+            )
+
+            # For embedding components, generate token indices once
             shared_token_indices = None
             if component_path == "embed":
                 batch, seq_len, _ = test_input.shape
@@ -490,13 +565,28 @@ def _test_component(
                 bridge_tensor, hf_tensor
             )
 
+            # Get output shape before deleting tensors
+            output_shape = tuple(bridge_tensor.shape)
+
+            # Clean up output tensors immediately to free memory
+            del bridge_output, hf_output, bridge_tensor, hf_tensor
+            if shared_inputs is not None:
+                # Clean up shared inputs
+                for key in list(shared_inputs.keys()):
+                    val = shared_inputs[key]
+                    if val is not None and isinstance(val, torch.Tensor):
+                        del val
+                    shared_inputs[key] = None
+            if shared_token_indices is not None:
+                del shared_token_indices
+
             return ComponentTestResult(
                 component_path=component_path,
                 component_type=type(component).__name__,
                 passed=passed,
                 max_diff=max_diff,
                 mean_diff=mean_diff,
-                output_shape=tuple(bridge_tensor.shape),
+                output_shape=output_shape,
                 percentile_diffs=percentile_diffs,
             )
 
 
@@ -14,6 +14,29 @@
 from transformer_lens.model_bridge import TransformerBridge
 
 
+def _is_encoder_decoder(model: torch.nn.Module) -> bool:
+    """Check if a model is an encoder-decoder architecture."""
+    config = getattr(model, "config", None)
+    if config is None:
+        return False
+    return getattr(config, "is_encoder_decoder", False)
+
+
+def _get_decoder_input_ids(model: torch.nn.Module, batch_size: int = 1) -> torch.Tensor:
+    """Get decoder_input_ids for encoder-decoder models.
+
+    Args:
+        model: The model to get decoder_start_token_id from
+        batch_size: Batch size for the decoder_input_ids
+
+    Returns:
+        Tensor of shape [batch_size, 1] with decoder_start_token_id
+    """
+    config = getattr(model, "config", None)
+    decoder_start_token_id = getattr(config, "decoder_start_token_id", 0) if config else 0
+    return torch.tensor([[decoder_start_token_id]] * batch_size)
+
+
 def benchmark_forward_pass(
     bridge: TransformerBridge,
     test_text: str,
@@ -34,8 +57,20 @@ def benchmark_forward_pass(
         BenchmarkResult with comparison details
     """
     try:
+        # Check if this is an encoder-decoder model
+        is_enc_dec = _is_encoder_decoder(bridge.original_model)
+
+        # Prepare extra kwargs for encoder-decoder models
+        extra_kwargs = {}
+        if is_enc_dec:
+            tokens = bridge.to_tokens(test_text)
+            batch_size = tokens.shape[0]
+            decoder_input_ids = _get_decoder_input_ids(bridge.original_model, batch_size)
+            decoder_input_ids = decoder_input_ids.to(tokens.device)
+            extra_kwargs["decoder_input_ids"] = decoder_input_ids
+
         # Run bridge forward pass
-        bridge_output = bridge(test_text, return_type="logits")
+        bridge_output = bridge(test_text, return_type="logits", **extra_kwargs)
 
         if reference_model is None:
             # No reference model - just verify output shape and validity
@@ -69,7 +104,14 @@ def benchmark_forward_pass(
             # HuggingFace model
             tokens = bridge.to_tokens(test_text)
             with torch.no_grad():
-                hf_output = reference_model(tokens)
+                if is_enc_dec:
+                    # Encoder-decoder models need decoder_input_ids
+                    batch_size = tokens.shape[0]
+                    decoder_input_ids = _get_decoder_input_ids(reference_model, batch_size)
+                    decoder_input_ids = decoder_input_ids.to(tokens.device)
+                    hf_output = reference_model(tokens, decoder_input_ids=decoder_input_ids)
+                else:
+                    hf_output = reference_model(tokens)
                 reference_output = hf_output.logits
 
         return compare_tensors(