Enable setting cached hooks (#1048)

degenfabian · bryce13950 · web-flow · commit 840dc44a77b0 · 2025-09-16T16:40:14.000+02:00
* Enable setting which hooks to cache

* Scan aliases when compatibility mode is enabled

* Add bridge aliases to hook_registry again

* Simplifications

* Add new hooks to hooks cached by default

* Restore proper GPT-2 configuration

* ran format

* Proper access of mlp.in with getattr

---------

Co-authored-by: Bryce Meyer &lt;bryce13950@gmail.com&gt;
diff --git a/transformer_lens/model_bridge/bridge.py b/transformer_lens/model_bridge/bridge.py
@@ -105,6 +105,9 @@ def __init__(
         # Initialize hook registry after components are set up
         self._initialize_hook_registry()
 
+        # Intiialize dictionary containing hooks that will be cached
+        self._initialize_hooks_to_cache()
+
     def __setattr__(self, name: str, value: Any) -> None:
         """Override setattr to track HookPoint objects dynamically."""
         # Call parent setattr first
@@ -141,8 +144,37 @@ def _initialize_hook_registry(self) -> None:
         # Scan existing components for hooks
         self._scan_existing_hooks(self, "")
 
+        # Add bridge aliases if compatibility mode is enabled
+        if self.compatibility_mode:
+            self._add_aliases_to_hooks(self._hook_registry)
+
         self._hook_registry_initialized = True
 
+    def _add_aliases_to_hooks(self, hooks: Dict[str, HookPoint]) -> None:
+        """Add aliases to hooks in place."""
+
+        # If no aliases, do nothing
+        if not self.hook_aliases:
+            return
+
+        for alias_name, target in self.hook_aliases.items():
+            # Use the existing alias system to resolve the target hook
+            # Convert to Dict[str, str] for resolve_alias if target_name is a list
+            if isinstance(target, list):
+                # For list targets, try each one until one works
+                for single_target in target:
+                    try:
+                        target_hook = resolve_alias(self, alias_name, {alias_name: single_target})
+                        if target_hook is not None:
+                            hooks[alias_name] = target_hook
+                            break
+                    except AttributeError:
+                        continue
+            else:
+                target_hook = resolve_alias(self, alias_name, {alias_name: target})
+                if target_hook is not None:
+                    hooks[alias_name] = target_hook
+
     def _scan_existing_hooks(self, module: nn.Module, prefix: str = "") -> None:
         """Scan existing modules for hooks and add them to registry."""
         visited = set()
@@ -210,8 +242,13 @@ def scan_module(mod: nn.Module, path: str = "") -> None:
     @property
     def hook_dict(self) -> dict[str, HookPoint]:
         """Get all HookPoint objects in the model for compatibility with HookedTransformer."""
-        # Start with the current registry
-        return self._hook_registry.copy()
+        hooks = self._hook_registry.copy()
+
+        # Add aliases if compatibility mode is enabled
+        if self.compatibility_mode:
+            self._add_aliases_to_hooks(hooks)
+
+        return hooks
 
     def _discover_hooks(self) -> dict[str, HookPoint]:
         """Get all HookPoint objects from the registry (deprecated, use hook_dict)."""
@@ -226,6 +263,108 @@ def clear_hook_registry(self) -> None:
         self._hook_registry.clear()
         self._hook_registry_initialized = False
 
+    def _initialize_hooks_to_cache(self) -> None:
+        """Initialize the hooks to cache when running the model with cache."""
+        self.hooks_to_cache = {}
+
+        default_cached_hooks_names = [
+            "embed.hook_in",
+            "embed.hook_out",
+            "pos_embed.hook_in",
+            "pos_embed.hook_out",
+            "rotary_embed.hook_in",
+            "rotary_embed.hook_out",
+            "ln_final.hook_in",
+            "ln_final.hook_scale",
+            "ln_final.hook_normalized",
+            "ln_final.hook_out",
+            "unembed.hook_in",
+            "unembed.hook_out",
+        ]
+
+        for block_idx in range(self.cfg.n_layers):
+            default_cached_hooks_names.append(f"blocks.{block_idx}.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln1.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln1.hook_scale")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln1.hook_normalized")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln1.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln1_post.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln1_post.hook_scale")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln1_post.hook_normalized")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln1_post.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.q.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.q.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.q_norm.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.q_norm.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.k.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.k.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.k_norm.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.k_norm.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.v.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.v.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.o.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.o.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.hook_attn_scores")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.hook_pattern")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.hook_hidden_states")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.attn.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln2.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln2.hook_scale")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln2.hook_normalized")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln2.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln2_post.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln2_post.hook_scale")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln2_post.hook_normalized")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.ln2_post.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.mlp.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.mlp.in.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.mlp.in.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.mlp.out.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.mlp.out.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.mlp.gate.hook_in")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.mlp.gate.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.mlp.hook_out")
+            default_cached_hooks_names.append(f"blocks.{block_idx}.hook_out")
+
+        for hook_name in default_cached_hooks_names:
+            if hook_name in self._hook_registry:
+                self.hooks_to_cache[hook_name] = self._hook_registry[hook_name]
+
+    def set_hooks_to_cache(
+        self, hook_names: Optional[List[str]] = None, include_all: bool = False
+    ) -> None:
+        """Set the hooks to cache when running the model with cache.
+
+        You can specify hook names that were only available in the old HookedTransformer,
+        but in this case you need to make sure to enable compatibility mode.
+
+        Args:
+            hook_names (Optional[List[str]]): List of hook names to cache
+            include_all (bool): Whether to cache all hooks
+        """
+        hooks_to_cache = {}
+
+        if self.compatibility_mode:
+            aliases = collect_aliases_recursive(self)
+
+        if include_all:
+            self.hooks_to_cache = self.hook_dict
+            return
+
+        if hook_names is not None:
+            for hook_name in hook_names:
+                if hook_name in self._hook_registry:
+                    hooks_to_cache[hook_name] = self._hook_registry[hook_name]
+                else:
+                    raise ValueError(
+                        f"Hook {hook_name} does not exist. If you are using a hook name used with the old HookedTransformer, make sure to enable compatibility mode."
+                    )
+        else:
+            raise ValueError("hook_names must be provided if include_all is False")
+
+        self.hooks_to_cache = hooks_to_cache
+
     def __getattr__(self, name: str) -> Any:
         """Provide a clear error message for missing attributes."""
         if name in self.__dict__:
@@ -1543,7 +1682,7 @@ def cache_hook(tensor: torch.Tensor, *, hook: Any) -> torch.Tensor:
             return cache_hook
 
         # Use cached hooks instead of re-discovering them
-        hook_dict = self.hook_dict
+        hook_dict = self.hooks_to_cache
 
         # Filter hooks based on names_filter
         for hook_name, hook in hook_dict.items():