TransformerLensOrg
diff --git a/‎tests/conftest.py‎
Lines changed: 45 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎tests/integration/model_bridge/compatibility/test_hooks.py‎
Lines changed: 14 additions & 9 deletions b/‎tests/integration/model_bridge/compatibility/test_hooks.py‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎tests/integration/model_bridge/compatibility/test_match_huggingface.py‎
Lines changed: 16 additions & 22 deletions b/‎tests/integration/model_bridge/compatibility/test_match_huggingface.py‎
Lines changed: 16 additions & 22 deletions
diff --git a/‎…omponents/test_qkv_bridge_integration.py‎ ‎…oint_qkv_attention_bridge_integration.py‎tests/integration/model_bridge/generalized_components/test_qkv_bridge_integration.py renamed to tests/integration/model_bridge/generalized_components/test_joint_qkv_attention_bridge_integration.py
Lines changed: 19 additions & 46 deletions b/‎…omponents/test_qkv_bridge_integration.py‎ ‎…oint_qkv_attention_bridge_integration.py‎tests/integration/model_bridge/generalized_components/test_qkv_bridge_integration.py renamed to tests/integration/model_bridge/generalized_components/test_joint_qkv_attention_bridge_integration.py
Lines changed: 19 additions & 46 deletions
@@ -0,0 +1,45 @@
+"""Global pytest configuration for memory management and test optimization."""
+
+import gc
+
+import pytest
+import torch
+
+
+@pytest.fixture(autouse=True, scope="function")
+def cleanup_memory():
+    """Automatically clean up memory after each test."""
+    yield
+    # Clear torch cache
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    # Force garbage collection multiple times for better cleanup
+    for _ in range(3):
+        gc.collect()
+
+
+@pytest.fixture(autouse=True, scope="class")
+def cleanup_class_memory():
+    """Clean up memory after each test class."""
+    yield
+    # More aggressive cleanup after test classes
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    gc.collect()
+
+
+# Configure pytest to be more memory-efficient
+def pytest_configure(config):
+    """Configure pytest for better memory usage."""
+    # Set torch to use less memory
+    torch.set_num_threads(1)  # Reduce threading overhead
+
+    # Configure garbage collection to be more aggressive
+    gc.set_threshold(700, 10, 10)
+
+
+def pytest_sessionfinish(session, exitstatus):
+    """Clean up at the end of test session."""
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    gc.collect()
@@ -6,10 +6,15 @@
 MODEL = "gpt2"  # Use a model supported by TransformerBridge
 
 prompt = "Hello World!"
-model = TransformerBridge.boot_transformers(MODEL, device="cpu")
 embed = lambda name: name == "hook_embed"
 
 
+@pytest.fixture(scope="module")
+def model():
+    """Load model once per test module to reduce memory usage."""
+    return TransformerBridge.boot_transformers(MODEL, device="cpu")
+
+
 class Counter:
     def __init__(self):
         self.count = 0
@@ -18,7 +23,7 @@ def inc(self, *args, **kwargs):
         self.count += 1
 
 
-def test_hook_attaches_normally():
+def test_hook_attaches_normally(model):
     """Test that hooks can be attached and removed normally with TransformerBridge."""
     c = Counter()
     _ = model.run_with_hooks(prompt, fwd_hooks=[(embed, c.inc)])
@@ -40,7 +45,7 @@ def test_hook_attaches_normally():
         pass
 
 
-def test_perma_hook_attaches_normally():
+def test_perma_hook_attaches_normally(model):
     """Test that permanent hooks can be attached with TransformerBridge."""
     c = Counter()
 
@@ -66,7 +71,7 @@ def test_perma_hook_attaches_normally():
         pass
 
 
-def test_hook_context_manager():
+def test_hook_context_manager(model):
     """Test that hook context manager works with TransformerBridge."""
     c = Counter()
 
@@ -95,7 +100,7 @@ def test_hook_context_manager():
         pass
 
 
-def test_run_with_cache_functionality():
+def test_run_with_cache_functionality(model):
     """Test that run_with_cache works with TransformerBridge."""
     try:
         output, cache = model.run_with_cache(prompt)
@@ -122,7 +127,7 @@ def test_run_with_cache_functionality():
         pytest.skip(f"run_with_cache not working on TransformerBridge: {e}")
 
 
-def test_hook_dict_access():
+def test_hook_dict_access(model):
     """Test that hook_dict property works with TransformerBridge."""
     try:
         hook_dict = model.hook_dict
@@ -141,7 +146,7 @@ def test_hook_dict_access():
         pytest.skip(f"hook_dict not working on TransformerBridge: {e}")
 
 
-def test_basic_forward_with_hooks():
+def test_basic_forward_with_hooks(model):
     """Test basic forward pass with hooks on TransformerBridge."""
 
     def simple_hook(tensor, hook):
@@ -168,7 +173,7 @@ def simple_hook(tensor, hook):
         pytest.skip(f"Forward with hooks not working on TransformerBridge: {e}")
 
 
-def test_hook_names_consistency():
+def test_hook_names_consistency(model):
     """Test that hook names are consistent and follow expected patterns."""
     try:
         hook_dict = model.hook_dict
@@ -194,7 +199,7 @@ def test_hook_names_consistency():
         pytest.skip(f"Hook names check failed on TransformerBridge: {e}")
 
 
-def test_caching_with_names_filter():
+def test_caching_with_names_filter(model):
     """Test that caching with names filter works with TransformerBridge."""
     try:
         hook_dict = model.hook_dict
 
@@ -13,13 +13,20 @@ class TestMatchHuggingFace:
     def model_name(self, request):
         return request.param
 
+    @pytest.fixture(scope="class")
+    def bridge_model(self, model_name):
+        """Load TransformerBridge once per test class."""
+        return TransformerBridge.boot_transformers(model_name, device="cpu")
+
+    @pytest.fixture(scope="class")
+    def hf_model(self, model_name):
+        """Load HuggingFace model once per test class."""
+        return AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
+
     # tests
-    def test_compare_huggingface_mlp_match_local_implementation(self, model_name):
+    def test_compare_huggingface_mlp_match_local_implementation(self, bridge_model, hf_model):
         """Test that TransformerBridge MLP outputs match HuggingFace MLP outputs."""
         try:
-            bridge_model = TransformerBridge.boot_transformers(model_name, device="cpu")
-            hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
-
             tensor_shape = (3, 5, bridge_model.cfg.d_model)
             test_tensor = torch.randn(tensor_shape)
 
@@ -48,12 +55,9 @@ def test_compare_huggingface_mlp_match_local_implementation(self, model_name):
         except Exception as e:
             pytest.fail(f"Unexpected error in MLP comparison: {e}")
 
-    def test_compare_huggingface_attention_match_local_implementation(self, model_name):
+    def test_compare_huggingface_attention_match_local_implementation(self, bridge_model, hf_model):
         """Test that TransformerBridge attention outputs match HuggingFace attention outputs."""
         try:
-            bridge_model = TransformerBridge.boot_transformers(model_name, device="cpu")
-            hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
-
             batch, pos, d_model = 3, 5, bridge_model.cfg.d_model
             input_tensor = torch.randn(batch, pos, d_model)
 
@@ -113,12 +117,9 @@ def test_compare_huggingface_attention_match_local_implementation(self, model_na
         except Exception as e:
             pytest.fail(f"Unexpected error in attention comparison: {e}")
 
-    def test_full_model_output_match(self, model_name):
+    def test_full_model_output_match(self, bridge_model, hf_model):
         """Test that full TransformerBridge model output matches HuggingFace model output."""
         try:
-            bridge_model = TransformerBridge.boot_transformers(model_name, device="cpu")
-            hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
-
             # Test with a simple prompt
             prompt = "The capital of France is"
 
@@ -144,11 +145,9 @@ def test_full_model_output_match(self, model_name):
         except Exception as e:
             pytest.fail(f"Unexpected error in full model comparison: {e}")
 
-    def test_tokenizer_consistency(self, model_name):
+    def test_tokenizer_consistency(self, bridge_model):
         """Test that TransformerBridge tokenizer matches HuggingFace tokenizer."""
         try:
-            bridge_model = TransformerBridge.boot_transformers(model_name, device="cpu")
-
             # Test tokenization
             prompt = "Hello, world! This is a test."
             bridge_tokens = bridge_model.to_tokens(prompt)
@@ -171,12 +170,9 @@ def test_tokenizer_consistency(self, model_name):
         except Exception as e:
             pytest.fail(f"Unexpected error in tokenizer consistency: {e}")
 
-    def test_config_consistency(self, model_name):
+    def test_config_consistency(self, bridge_model, hf_model):
         """Test that TransformerBridge config matches HuggingFace config."""
         try:
-            bridge_model = TransformerBridge.boot_transformers(model_name, device="cpu")
-            hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
-
             bridge_cfg = bridge_model.cfg
             hf_cfg = hf_model.config
 
@@ -199,11 +195,9 @@ def test_config_consistency(self, model_name):
         except Exception as e:
             pytest.fail(f"Unexpected error in config consistency: {e}")
 
-    def test_weight_access_consistency(self, model_name):
+    def test_weight_access_consistency(self, bridge_model):
         """Test that TransformerBridge weight access provides expected values."""
         try:
-            bridge_model = TransformerBridge.boot_transformers(model_name, device="cpu")
-
             # Test basic weight access patterns
             weight_checks = []
 
 
@@ -1,4 +1,4 @@
-"""Lightweight integration tests for QKVBridge.
+"""Lightweight integration tests for JointQKVAttentionBridge.
 
 Tests the core functionality without loading large models to keep CI fast.
 """
@@ -7,11 +7,10 @@
 import torch
 
 import transformer_lens.utils as utils
-from transformer_lens.model_bridge.generalized_components.qkv_bridge import QKVBridge
 
 
-class TestQKVBridgeIntegration:
-    """Minimal integration tests for QKVBridge."""
+class TestJointQKVAttentionBridgeIntegration:
+    """Minimal integration tests for JointQKVAttentionBridge."""
 
     def test_hook_alias_resolution(self):
         """Test that hook aliases are properly resolved."""
@@ -25,38 +24,6 @@ def test_hook_alias_resolution(self):
         assert utils.get_act_name("q", 1) == "blocks.1.attn.hook_q"
         assert utils.get_act_name("k", 2) == "blocks.2.attn.hook_k"
 
-    def test_joint_qkv_attention_bridge_properties(self):
-        """Test that JointQKVAttentionBridge properties are properly resolved."""
-        from transformer_lens.model_bridge.generalized_components.joint_qkv_attention import (
-            JointQKVAttentionBridge,
-        )
-
-        class TestConfig:
-            n_heads = 12
-
-        qkv_bridge = QKVBridge(name="qkv", config=TestConfig())
-
-        qkv_attention_bridge = JointQKVAttentionBridge(
-            name="blocks.0.attn",
-            config=TestConfig(),
-            submodules={"qkv": qkv_bridge},
-        )
-
-        assert qkv_attention_bridge.q.hook_in == qkv_bridge.q_hook_in
-        assert qkv_attention_bridge.q.hook_out == qkv_bridge.q_hook_out
-        assert qkv_attention_bridge.k.hook_in == qkv_bridge.k_hook_in
-        assert qkv_attention_bridge.k.hook_out == qkv_bridge.k_hook_out
-        assert qkv_attention_bridge.v.hook_in == qkv_bridge.v_hook_in
-        assert qkv_attention_bridge.v.hook_out == qkv_bridge.v_hook_out
-
-    def test_component_class_exists(self):
-        """Test that QKVBridge class can be imported."""
-
-        # Verify the class exists and has expected methods
-        assert hasattr(QKVBridge, "forward")
-        assert hasattr(QKVBridge, "_create_qkv_conversion_rule")
-        assert hasattr(QKVBridge, "_create_qkv_separation_rule")
-
     def test_hook_point_has_hooks_method(self):
         """Test that HookPoint.has_hooks method works correctly."""
         from transformer_lens.hook_points import HookPoint
@@ -82,9 +49,9 @@ def dummy_hook(x, hook):
         assert not hook_point.has_hooks()
 
     def test_architecture_imports(self):
-        """Test that architecture files can be imported and reference QKVBridge."""
+        """Test that architecture files can be imported and reference JointQKVAttentionBridge."""
         # Test that we can import the architecture files without errors
-        # Test that QKVBridge is referenced in the source files
+        # Test that JointQKVAttentionBridge is referenced in the source files
         import inspect
 
         from transformer_lens.model_bridge.supported_architectures import (
@@ -94,13 +61,19 @@ def test_architecture_imports(self):
         )
 
         gpt2_source = inspect.getsource(gpt2)
-        assert "QKVBridge" in gpt2_source, "GPT-2 architecture should reference QKVBridge"
+        assert (
+            "JointQKVAttentionBridge" in gpt2_source
+        ), "GPT-2 architecture should reference JointQKVAttentionBridge"
 
         bloom_source = inspect.getsource(bloom)
-        assert "QKVBridge" in bloom_source, "BLOOM architecture should reference QKVBridge"
+        assert (
+            "JointQKVAttentionBridge" in bloom_source
+        ), "BLOOM architecture should reference JointQKVAttentionBridge"
 
         neox_source = inspect.getsource(neox)
-        assert "QKVBridge" in neox_source, "NeoX architecture should reference QKVBridge"
+        assert (
+            "JointQKVAttentionBridge" in neox_source
+        ), "NeoX architecture should reference JointQKVAttentionBridge"
 
     @pytest.mark.skip(reason="Requires model loading - too slow for CI")
     def test_distilgpt2_integration(self):
@@ -112,15 +85,15 @@ def test_distilgpt2_integration(self):
         torch.set_grad_enabled(False)
         model = TransformerBridge.boot_transformers("distilgpt2", device="cpu")
 
-        # Verify QKVBridge usage
-        qkv_bridge_modules = [
+        # Verify JointQKVAttentionBridge usage
+        joint_qkv_attention_bridge_modules = [
             name
             for name, module in model.named_modules()
-            if "QKVBridge" in getattr(module, "__class__", {}).get("__name__", "")
+            if "JointQKVAttentionBridge" in getattr(module, "__class__", {}).get("__name__", "")
         ]
         assert (
-            len(qkv_bridge_modules) == 6
-        ), f"Expected 6 QKVBridge modules, got {len(qkv_bridge_modules)}"
+            len(joint_qkv_attention_bridge_modules) == 6
+        ), f"Expected 6 JointQKVAttentionBridge modules, got {len(joint_qkv_attention_bridge_modules)}"
 
         # Test basic functionality
         tokens = model.to_tokens("Test")