|
| 1 | +"""Utilities for traversing and applying functions to every component in a TransformerBridge model.""" |
| 2 | + |
| 3 | +from typing import Any, Callable |
| 4 | + |
| 5 | +import torch.nn as nn |
| 6 | + |
| 7 | +from transformer_lens.model_bridge.bridge import TransformerBridge |
| 8 | +from transformer_lens.model_bridge.generalized_components.base import ( |
| 9 | + GeneralizedComponent, |
| 10 | +) |
| 11 | + |
| 12 | + |
| 13 | +def collect_all_submodules_of_component( |
| 14 | + model: TransformerBridge, |
| 15 | + component: GeneralizedComponent, |
| 16 | + submodules: dict, |
| 17 | + block_prefix: str = "", |
| 18 | +) -> dict: |
| 19 | + """Recursively collects all submodules of a component in a TransformerBridge model. |
| 20 | + Args: |
| 21 | + model: The TransformerBridge model to collect submodules from |
| 22 | + component: The component to collect submodules from |
| 23 | + submodules: A dictionary to populate with submodules (modified in-place) |
| 24 | + block_prefix: Prefix for the block name, needed for components that are part of a block bridge |
| 25 | + Returns: |
| 26 | + Dictionary mapping submodule names to their respective submodules |
| 27 | + """ |
| 28 | + for component_submodule in component.submodules.values(): |
| 29 | + submodules[block_prefix + component_submodule.name] = component_submodule |
| 30 | + |
| 31 | + # If the component is a list item, we need to collect all submodules of the block bridge |
| 32 | + if component_submodule.is_list_item: |
| 33 | + submodules = collect_components_of_block_bridge(model, component_submodule, submodules) |
| 34 | + |
| 35 | + # If the component has submodules, we need to collect them recursively |
| 36 | + if component_submodule.submodules: |
| 37 | + submodules = collect_all_submodules_of_component( |
| 38 | + model, component_submodule, submodules, block_prefix |
| 39 | + ) |
| 40 | + return submodules |
| 41 | + |
| 42 | + |
| 43 | +def collect_components_of_block_bridge( |
| 44 | + model: TransformerBridge, component: GeneralizedComponent, components: dict |
| 45 | +) -> dict: |
| 46 | + """Collects all components of a BlockBridge component. |
| 47 | + Args: |
| 48 | + model: The TransformerBridge model to collect components from |
| 49 | + component: The BlockBridge component to collect components from |
| 50 | + components: A dictionary to populate with components (modified in-place) |
| 51 | + Returns: |
| 52 | + Dictionary mapping component names to their respective components |
| 53 | + """ |
| 54 | + |
| 55 | + # Retrieve the remote component list from the adapter (we need a ModuleList to iterate over) |
| 56 | + remote_module_list = model.adapter.get_remote_component(model.original_model, component.name) |
| 57 | + |
| 58 | + # Make sure the remote component is a ModuleList |
| 59 | + if isinstance(remote_module_list, nn.ModuleList): |
| 60 | + for block in remote_module_list: |
| 61 | + components[block.name] = block |
| 62 | + components = collect_all_submodules_of_component(model, block, components, block.name) |
| 63 | + return components |
| 64 | + |
| 65 | + |
| 66 | +def collect_all_components(model: TransformerBridge, components: dict) -> dict: |
| 67 | + """Collects all components in a TransformerBridge inside a dictionary. |
| 68 | + The keys are the component names, and the values are the components themselves. |
| 69 | + Args: |
| 70 | + model: The TransformerBridge model to collect components from |
| 71 | + components: A dictionary to populate with components (modified in-place) |
| 72 | + Returns: |
| 73 | + Dictionary mapping component names to their respective components |
| 74 | + """ |
| 75 | + |
| 76 | + # Iterate through all components in component mapping |
| 77 | + for component in model.adapter.get_component_mapping().values(): |
| 78 | + components[component.name] = component |
| 79 | + components = collect_all_submodules_of_component(model, component, components) |
| 80 | + |
| 81 | + # We need to enable compatibility mode for all different blocks of the component if the component is a list item |
| 82 | + if component.is_list_item: |
| 83 | + components = collect_components_of_block_bridge(model, component, components) |
| 84 | + return components |
| 85 | + |
| 86 | + |
| 87 | +def apply_fn_to_all_components( |
| 88 | + model: TransformerBridge, |
| 89 | + fn: Callable[[GeneralizedComponent], Any], |
| 90 | + components: dict | None = None, |
| 91 | +) -> dict[str, Any]: |
| 92 | + """Applies a function to all components in the TransformerBridge model. |
| 93 | + Args: |
| 94 | + model: The TransformerBridge model to apply the function to |
| 95 | + fn: The function to apply to each component |
| 96 | + components: Optional dictionary of components to apply the function to, if None, all components are collected |
| 97 | + Returns: |
| 98 | + return_values: A dictionary mapping component names to the return values of the function |
| 99 | + """ |
| 100 | + |
| 101 | + if components is None: |
| 102 | + components = collect_all_components(model, {}) |
| 103 | + |
| 104 | + return_values = {} |
| 105 | + |
| 106 | + for component in components.values(): |
| 107 | + return_values[component.name] = fn(component) |
| 108 | + |
| 109 | + return return_values |
0 commit comments