TransformerLensOrg
diff --git a/‎…/test_joint_qkv_attention_integration.py‎ ‎…omponents/test_qkv_bridge_integration.py‎tests/integration/model_bridge/generalized_components/test_joint_qkv_attention_integration.py renamed to tests/integration/model_bridge/generalized_components/test_qkv_bridge_integration.py
Lines changed: 40 additions & 24 deletions b/‎…/test_joint_qkv_attention_integration.py‎ ‎…omponents/test_qkv_bridge_integration.py‎tests/integration/model_bridge/generalized_components/test_joint_qkv_attention_integration.py renamed to tests/integration/model_bridge/generalized_components/test_qkv_bridge_integration.py
Lines changed: 40 additions & 24 deletions
diff --git a/‎tests/integration/model_bridge/test_bridge_integration.py‎
Lines changed: 34 additions & 25 deletions b/‎tests/integration/model_bridge/test_bridge_integration.py‎
Lines changed: 34 additions & 25 deletions
diff --git a/‎tests/integration/model_bridge/test_qkv_hook_compatibility.py‎
Lines changed: 191 additions & 0 deletions b/‎tests/integration/model_bridge/test_qkv_hook_compatibility.py‎
Lines changed: 191 additions & 0 deletions
@@ -1,4 +1,4 @@
-"""Lightweight integration tests for JointQKVAttentionBridge.
+"""Lightweight integration tests for QKVBridge.
 
 Tests the core functionality without loading large models to keep CI fast.
 """
@@ -7,10 +7,11 @@
 import torch
 
 import transformer_lens.utils as utils
+from transformer_lens.model_bridge.generalized_components.qkv_bridge import QKVBridge
 
 
-class TestJointQKVAttentionBridgeIntegration:
-    """Minimal integration tests for JointQKVAttentionBridge."""
+class TestQKVBridgeIntegration:
+    """Minimal integration tests for QKVBridge."""
 
     def test_hook_alias_resolution(self):
         """Test that hook aliases are properly resolved."""
@@ -24,16 +25,37 @@ def test_hook_alias_resolution(self):
         assert utils.get_act_name("q", 1) == "blocks.1.attn.hook_q"
         assert utils.get_act_name("k", 2) == "blocks.2.attn.hook_k"
 
-    def test_component_class_exists(self):
-        """Test that JointQKVAttentionBridge class can be imported."""
+    def test_joint_qkv_attention_bridge_properties(self):
+        """Test that JointQKVAttentionBridge properties are properly resolved."""
         from transformer_lens.model_bridge.generalized_components.joint_qkv_attention import (
             JointQKVAttentionBridge,
         )
 
+        class TestConfig:
+            n_heads = 12
+
+        qkv_bridge = QKVBridge(name="qkv", config=TestConfig())
+
+        qkv_attention_bridge = JointQKVAttentionBridge(
+            name="blocks.0.attn",
+            config=TestConfig(),
+            submodules={"qkv": qkv_bridge},
+        )
+
+        assert qkv_attention_bridge.q.hook_in == qkv_bridge.q_hook_in
+        assert qkv_attention_bridge.q.hook_out == qkv_bridge.q_hook_out
+        assert qkv_attention_bridge.k.hook_in == qkv_bridge.k_hook_in
+        assert qkv_attention_bridge.k.hook_out == qkv_bridge.k_hook_out
+        assert qkv_attention_bridge.v.hook_in == qkv_bridge.v_hook_in
+        assert qkv_attention_bridge.v.hook_out == qkv_bridge.v_hook_out
+
+    def test_component_class_exists(self):
+        """Test that QKVBridge class can be imported."""
+
         # Verify the class exists and has expected methods
-        assert hasattr(JointQKVAttentionBridge, "forward")
-        assert hasattr(JointQKVAttentionBridge, "_reconstruct_attention")
-        assert hasattr(JointQKVAttentionBridge, "_manual_attention_computation")
+        assert hasattr(QKVBridge, "forward")
+        assert hasattr(QKVBridge, "_create_qkv_conversion_rule")
+        assert hasattr(QKVBridge, "_create_qkv_separation_rule")
 
     def test_hook_point_has_hooks_method(self):
         """Test that HookPoint.has_hooks method works correctly."""
@@ -60,9 +82,9 @@ def dummy_hook(x, hook):
         assert not hook_point.has_hooks()
 
     def test_architecture_imports(self):
-        """Test that architecture files can be imported and reference JointQKVAttentionBridge."""
+        """Test that architecture files can be imported and reference QKVBridge."""
         # Test that we can import the architecture files without errors
-        # Test that JointQKVAttentionBridge is referenced in the source files
+        # Test that QKVBridge is referenced in the source files
         import inspect
 
         from transformer_lens.model_bridge.supported_architectures import (
@@ -72,19 +94,13 @@ def test_architecture_imports(self):
         )
 
         gpt2_source = inspect.getsource(gpt2)
-        assert (
-            "JointQKVAttentionBridge" in gpt2_source
-        ), "GPT-2 architecture should reference JointQKVAttentionBridge"
+        assert "QKVBridge" in gpt2_source, "GPT-2 architecture should reference QKVBridge"
 
         bloom_source = inspect.getsource(bloom)
-        assert (
-            "JointQKVAttentionBridge" in bloom_source
-        ), "BLOOM architecture should reference JointQKVAttentionBridge"
+        assert "QKVBridge" in bloom_source, "BLOOM architecture should reference QKVBridge"
 
         neox_source = inspect.getsource(neox)
-        assert (
-            "JointQKVAttentionBridge" in neox_source
-        ), "NeoX architecture should reference JointQKVAttentionBridge"
+        assert "QKVBridge" in neox_source, "NeoX architecture should reference QKVBridge"
 
     @pytest.mark.skip(reason="Requires model loading - too slow for CI")
     def test_distilgpt2_integration(self):
@@ -96,15 +112,15 @@ def test_distilgpt2_integration(self):
         torch.set_grad_enabled(False)
         model = TransformerBridge.boot_transformers("distilgpt2", device="cpu")
 
-        # Verify JointQKVAttentionBridge usage
-        joint_qkv_modules = [
+        # Verify QKVBridge usage
+        qkv_bridge_modules = [
             name
             for name, module in model.named_modules()
-            if "JointQKVAttentionBridge" in getattr(module, "__class__", {}).get("__name__", "")
+            if "QKVBridge" in getattr(module, "__class__", {}).get("__name__", "")
         ]
         assert (
-            len(joint_qkv_modules) == 6
-        ), f"Expected 6 JointQKVAttentionBridge modules, got {len(joint_qkv_modules)}"
+            len(qkv_bridge_modules) == 6
+        ), f"Expected 6 QKVBridge modules, got {len(qkv_bridge_modules)}"
 
         # Test basic functionality
         tokens = model.to_tokens("Test")
 
@@ -154,14 +154,13 @@ def test_component_access():
 
 
 def test_joint_qkv_custom_conversion_rule():
-    """Test that custom QKV conversion rules can be passed to JointQKVAttentionBridge."""
+    """Test that custom QKV conversion rules can be passed to QKVBridge."""
     from transformer_lens.conversion_utils.conversion_steps.rearrange_hook_conversion import (
         RearrangeHookConversion,
     )
-    from transformer_lens.model_bridge.generalized_components.joint_qkv_attention import (
-        JointQKVAttentionBridge,
+    from transformer_lens.model_bridge.generalized_components.qkv_bridge import (
+        QKVBridge,
     )
-    from transformer_lens.model_bridge.generalized_components.linear import LinearBridge
 
     model_name = "gpt2"  # Use a smaller model for testing
     bridge = TransformerBridge.boot_transformers(model_name)
@@ -172,36 +171,46 @@ def test_joint_qkv_custom_conversion_rule():
         num_attention_heads=12,  # GPT-2 small has 12 heads
     )
 
-    # Create QKV config
-    qkv_config = {
-        "split_qkv_matrix": lambda x: (x, x, x),  # Dummy function for test
-    }
-
-    # Create submodules
-    submodules = {
-        "qkv": LinearBridge(name="c_attn"),
-        "o": LinearBridge(name="c_proj"),
-    }
+    custom_qkv_separation = RearrangeHookConversion(
+        "batch seq (three d_model) -> three batch seq d_model",
+        three=3,
+    )
 
     # This should not raise an error
-    test_bridge = JointQKVAttentionBridge(
-        name="test_joint_qkv",
-        model_config=bridge.cfg,
-        submodules=submodules,
-        qkv_config=qkv_config,
+    test_bridge = QKVBridge(
+        name="test_qkv_bridge",
+        config=bridge.cfg,
+        submodules={},
         qkv_conversion_rule=custom_qkv_conversion,
+        qkv_separation_rule=custom_qkv_separation,
     )
 
     # Verify the custom conversion rule was set on Q, K, V components
     assert (
-        test_bridge.q.hook_out.hook_conversion is custom_qkv_conversion
-    ), "Custom QKV conversion rule should be set on Q"
+        test_bridge.q_hook_in.hook_conversion is custom_qkv_conversion
+    ), "Custom QKV conversion rule should be set on hook_in of Q"
+    assert (
+        test_bridge.k_hook_in.hook_conversion is custom_qkv_conversion
+    ), "Custom QKV conversion rule should be set on hook_in of K"
+    assert (
+        test_bridge.v_hook_in.hook_conversion is custom_qkv_conversion
+    ), "Custom QKV conversion rule should be set on hook_in of V"
+    assert (
+        test_bridge.q_hook_out.hook_conversion is custom_qkv_conversion
+    ), "Custom QKV conversion rule should be set on hook_out of Q"
+    assert (
+        test_bridge.k_hook_out.hook_conversion is custom_qkv_conversion
+    ), "Custom QKV conversion rule should be set on hook_out of K"
+    assert (
+        test_bridge.v_hook_out.hook_conversion is custom_qkv_conversion
+    ), "Custom QKV conversion rule should be set on hook_out of V"
+
     assert (
-        test_bridge.k.hook_out.hook_conversion is custom_qkv_conversion
-    ), "Custom QKV conversion rule should be set on K"
+        test_bridge.qkv_conversion_rule is custom_qkv_conversion
+    ), "Custom QKV conversion rule should be set"
     assert (
-        test_bridge.v.hook_out.hook_conversion is custom_qkv_conversion
-    ), "Custom QKV conversion rule should be set on V"
+        test_bridge.qkv_separation_rule is custom_qkv_separation
+    ), "Custom QKV separation rule should be set"
 
 
 def test_attention_pattern_hook_shape_custom_conversion():
 
@@ -0,0 +1,191 @@
+"""Integration tests for QKV hook compatibility in TransformerBridge."""
+
+import torch
+
+from transformer_lens.model_bridge import TransformerBridge
+
+
+class TestQKVHookCompatibility:
+    """Test that QKV bridge hooks are compatible with overall model hook access."""
+
+    def test_v_hook_out_equals_blocks_attn_hook_v(self):
+        """Test that v_hook_out in QKV bridge equals blocks.0.attn.hook_v on the overall model."""
+        # Load GPT-2 in TransformerBridge
+        bridge = TransformerBridge.boot_transformers("gpt2", device="cpu")
+
+        # Turn on compatibility mode
+        bridge.enable_compatibility_mode(disable_warnings=True)
+
+        # Create test input
+        test_input = torch.tensor([[1, 2, 3, 4, 5]])  # Simple test sequence
+
+        # Get the QKV bridge from the first attention layer
+        qkv_bridge = bridge.blocks[0].attn.qkv
+
+        # Verify that qkv_bridge is indeed a QKVBridge
+        from transformer_lens.model_bridge.generalized_components.qkv_bridge import (
+            QKVBridge,
+        )
+
+        assert isinstance(qkv_bridge, QKVBridge), "First attention layer should have a QKVBridge"
+
+        # Run a forward pass to populate the hooks
+        with torch.no_grad():
+            _ = bridge(test_input)
+
+        # Assert that v_hook_out in the QKV bridge is the same object as
+        # blocks.0.attn.hook_v on the overall model
+        assert (
+            qkv_bridge.v_hook_out is bridge.blocks[0].attn.hook_v
+        ), "v_hook_out in QKV bridge should be the same object as blocks.0.attn.hook_v"
+
+        # Also test that the hook points have the same properties
+        assert (
+            qkv_bridge.v_hook_out.has_hooks() == bridge.blocks[0].attn.hook_v.has_hooks()
+        ), "Hook points should have the same hook status"
+
+    def test_q_hook_out_equals_blocks_attn_hook_q(self):
+        """Test that q_hook_out in QKV bridge equals blocks.0.attn.hook_q on the overall model."""
+        # Load GPT-2 in TransformerBridge
+        bridge = TransformerBridge.boot_transformers("gpt2", device="cpu")
+
+        # Turn on compatibility mode
+        bridge.enable_compatibility_mode(disable_warnings=True)
+
+        # Create test input
+        test_input = torch.tensor([[1, 2, 3, 4, 5]])  # Simple test sequence
+
+        # Get the QKV bridge from the first attention layer
+        qkv_bridge = bridge.blocks[0].attn.qkv
+
+        # Run a forward pass to populate the hooks
+        with torch.no_grad():
+            _ = bridge(test_input)
+
+        # Assert that q_hook_out in the QKV bridge is the same object as
+        # blocks.0.attn.hook_q on the overall model
+        assert (
+            qkv_bridge.q_hook_out is bridge.blocks[0].attn.hook_q
+        ), "q_hook_out in QKV bridge should be the same object as blocks.0.attn.hook_q"
+
+    def test_k_hook_out_equals_blocks_attn_hook_k(self):
+        """Test that k_hook_out in QKV bridge equals blocks.0.attn.hook_k on the overall model."""
+        # Load GPT-2 in TransformerBridge
+        bridge = TransformerBridge.boot_transformers("gpt2", device="cpu")
+
+        # Turn on compatibility mode
+        bridge.enable_compatibility_mode(disable_warnings=True)
+
+        # Create test input
+        test_input = torch.tensor([[1, 2, 3, 4, 5]])  # Simple test sequence
+
+        # Get the QKV bridge from the first attention layer
+        qkv_bridge = bridge.blocks[0].attn.qkv
+
+        # Run a forward pass to populate the hooks
+        with torch.no_grad():
+            _ = bridge(test_input)
+
+        # Assert that k_hook_out in the QKV bridge is the same object as
+        # blocks.0.attn.hook_k on the overall model
+        assert (
+            qkv_bridge.k_hook_out is bridge.blocks[0].attn.hook_k
+        ), "k_hook_out in QKV bridge should be the same object as blocks.0.attn.hook_k"
+
+    def test_hook_aliases_work_correctly(self):
+        """Test that hook aliases work correctly in compatibility mode."""
+        # Load GPT-2 in TransformerBridge
+        bridge = TransformerBridge.boot_transformers("gpt2", device="cpu")
+
+        # Turn on compatibility mode
+        bridge.enable_compatibility_mode(disable_warnings=True)
+
+        # Create test input
+        test_input = torch.tensor([[1, 2, 3, 4, 5]])  # Simple test sequence
+
+        # Get the QKV bridge from the first attention layer
+        qkv_bridge = bridge.blocks[0].attn.qkv
+
+        # Run a forward pass to populate the hooks
+        with torch.no_grad():
+            _ = bridge(test_input)
+
+        # Test that hook aliases work correctly
+        # These should all reference the same hook points
+        assert qkv_bridge.q_hook_out is bridge.blocks[0].attn.hook_q, "Q hook alias should work"
+        assert qkv_bridge.k_hook_out is bridge.blocks[0].attn.hook_k, "K hook alias should work"
+        assert qkv_bridge.v_hook_out is bridge.blocks[0].attn.hook_v, "V hook alias should work"
+
+        # Test that the hook points are accessible through the attention bridge properties
+        assert qkv_bridge.q_hook_out is bridge.blocks[0].attn.q.hook_out, "Q property should work"
+        assert qkv_bridge.k_hook_out is bridge.blocks[0].attn.k.hook_out, "K property should work"
+        assert qkv_bridge.v_hook_out is bridge.blocks[0].attn.v.hook_out, "V property should work"
+
+    def test_head_ablation_hook_works_correctly(self):
+        """Test that head ablation hook works correctly with TransformerBridge."""
+        # Load GPT-2 in TransformerBridge
+        bridge = TransformerBridge.boot_transformers("gpt2", device="cpu")
+
+        # Turn on compatibility mode
+        bridge.enable_compatibility_mode(disable_warnings=True)
+
+        # Create test tokens (same as in the demo)
+        gpt2_tokens = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
+
+        layer_to_ablate = 0
+        head_index_to_ablate = 8
+
+        # Test both hook names
+        hook_names_to_test = [
+            "blocks.0.attn.hook_v",  # Compatibility mode alias
+            "blocks.0.attn.v.hook_out",  # Direct property access
+        ]
+
+        for hook_name in hook_names_to_test:
+            print(f"\nTesting hook name: {hook_name}")
+
+            # Track if the hook was called
+            hook_called = False
+            mutation_applied = False
+
+            # We define a head ablation hook
+            def head_ablation_hook(value, hook):
+                nonlocal hook_called, mutation_applied
+                hook_called = True
+                print(f"Shape of the value tensor: {value.shape}")
+
+                # Apply the ablation (out-of-place to avoid view modification error)
+                result = value.clone()
+                result[:, :, head_index_to_ablate, :] = 0.0
+
+                # Check if the mutation was applied (the result should be zero for the ablated head)
+                if torch.all(result[:, :, head_index_to_ablate, :] == 0.0):
+                    mutation_applied = True
+
+                return result
+
+            # Get original loss
+            original_loss = bridge(gpt2_tokens, return_type="loss")
+
+            # Run with head ablation hook
+            ablated_loss = bridge.run_with_hooks(
+                gpt2_tokens, return_type="loss", fwd_hooks=[(hook_name, head_ablation_hook)]
+            )
+
+            print(f"Original Loss: {original_loss.item():.3f}")
+            print(f"Ablated Loss: {ablated_loss.item():.3f}")
+
+            # Assert that the hook was called
+            assert hook_called, f"Head ablation hook should have been called for {hook_name}"
+
+            # Assert that the mutation was applied
+            assert (
+                mutation_applied
+            ), f"Mutation should have been applied to the value tensor for {hook_name}"
+
+            # Assert that ablated loss is higher than original loss (ablation should hurt performance)
+            assert (
+                ablated_loss.item() > original_loss.item()
+            ), f"Ablated loss should be higher than original loss for {hook_name}"
+
+            print(f"✅ Hook {hook_name} works correctly!")