feat(language-model): introduce Matrix and Node classes for enhanced matrix operations

Frankstein73 · Frankstein73 · commit 8a2803cc260c · 2026-03-09T20:47:26.000+08:00
- Add `Matrix` class to manage a dynamic matrix with source and target node mappings.
- Introduce `Node` dataclass to encapsulate node information and indices.
- Implement methods for adding source and target nodes, updating the matrix, and retrieving submatrices.
- Enhance type hints and docstrings for improved clarity and maintainability.
- Add unit tests for `Matrix` class to validate functionality and ensure correctness.
diff --git a/src/lm_saes/backend/language_model.py b/src/lm_saes/backend/language_model.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from dataclasses import dataclass
 import json
 import os
 import re
@@ -8,7 +9,7 @@
 from contextlib import contextmanager
 from functools import partial
 from itertools import accumulate
-from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Self, Union, cast
 
 import einops
 import torch
@@ -348,6 +349,132 @@ def detach_hook_fn(x: torch.Tensor, hook: HookPoint):
     return detach_hooks
 
 
+@dataclass
+class Node:
+    key: Any
+    indices: torch.Tensor
+    matrix_indices: torch.Tensor
+
+
+NodeInfo = tuple[Any, torch.Tensor]
+
+
+class Matrix:
+    def __init__(self):
+        self.matrix = torch.zeros(0, 0, dtype=torch.float32, device="cpu")
+        self.source = {}
+        self.target = {}
+
+    def _add_elements(self, node_infos: list[NodeInfo], dim: int):
+        node_dict = self.source if dim == 0 else self.target
+
+        m_start = self.matrix.shape[dim]
+        new_matrix_shape = list(self.matrix.shape)
+        new_matrix_shape[dim] = sum([node_info[1].shape[0] for node_info in node_infos])
+        self.matrix = torch.cat(
+            [self.matrix, torch.zeros(new_matrix_shape, dtype=self.matrix.dtype, device=self.matrix.device)],
+            dim=dim,
+        )
+        for node_info in node_infos:
+            node_length = node_info[1].shape[0]
+            node = Node(
+                node_info[0],
+                node_info[1],
+                torch.arange(m_start, m_start + node_length, device=self.matrix.device),
+            )
+            self._update_node(node, node_dict)
+            m_start += node_length
+
+    def add_source(self, node_infos: list[NodeInfo] | NodeInfo):
+        node_infos = node_infos if isinstance(node_infos, list) else [node_infos]
+        self._add_elements(node_infos, 0)
+
+    def add_target(self, node_infos: list[NodeInfo] | NodeInfo):
+        node_infos = node_infos if isinstance(node_infos, list) else [node_infos]
+        self._add_elements(node_infos, 1)
+
+    def update_matrix(self, matrix: torch.Tensor):
+        self.matrix[:, :] = matrix
+
+    @staticmethod
+    def _update_node(node: Node, node_dict: dict[Any, Node]):
+        if node.key not in node_dict:
+            node_dict[node.key] = node
+        else:
+            node_dict[node.key].matrix_indices = torch.cat(
+                [node_dict[node.key].matrix_indices, node.matrix_indices],
+                dim=0,
+            )
+            node_dict[node.key].indices = torch.cat(
+                [node_dict[node.key].indices, node.indices],
+                dim=0,
+            )
+
+    def _get_sublines(self, node_infos: list[NodeInfo] | None, dim: int):
+        full_node_dict = self.source if dim == 0 else self.target
+        if node_infos is None:
+            return (full_node_dict, torch.arange(self.matrix.shape[dim], device=self.matrix.device))
+
+        new_node_dict = {}
+        old_matrix_indices = torch.zeros(0, device=self.matrix.device, dtype=torch.long)
+        for node_info in node_infos:
+            r = torch.empty(full_node_dict[node_info[0]].indices.max() + 1, device=self.matrix.device, dtype=torch.long)
+            r[full_node_dict[node_info[0]].indices] = torch.arange(
+                full_node_dict[node_info[0]].indices.shape[0], device=self.matrix.device
+            )
+            matrix_indices = full_node_dict[node_info[0]].matrix_indices[r[node_info[1]]]
+            old_matrix_indices = torch.cat(
+                [old_matrix_indices, matrix_indices],
+                dim=0,
+            )
+            self._update_node(
+                Node(
+                    node_info[0],
+                    node_info[1],
+                    torch.arange(
+                        old_matrix_indices.shape[0] - matrix_indices.shape[0],
+                        old_matrix_indices.shape[0],
+                        device=self.matrix.device,
+                    ),
+                ),
+                new_node_dict,
+            )
+
+        return (new_node_dict, old_matrix_indices)
+
+    @classmethod
+    def _build_submatrix(
+        cls, matrix: torch.Tensor, source_node_dict: dict[Any, Node], target_node_dict: dict[Any, Node]
+    ) -> Self:
+        submatrix = cls()
+        submatrix.matrix = matrix
+        submatrix.source = source_node_dict
+        submatrix.target = target_node_dict
+        return submatrix
+
+    def get_submatrix(
+        self,
+        source_node_infos: NodeInfo | list[NodeInfo] | None = None,
+        target_node_infos: NodeInfo | list[NodeInfo] | None = None,
+    ):
+        source_node_infos = (
+            [source_node_infos]
+            if not isinstance(source_node_infos, list) and source_node_infos is not None
+            else source_node_infos
+        )
+        target_node_infos = (
+            [target_node_infos]
+            if not isinstance(target_node_infos, list) and target_node_infos is not None
+            else target_node_infos
+        )
+        source_node_dict, source_matrix_indices = self._get_sublines(source_node_infos, 0)
+        target_node_dict, target_matrix_indices = self._get_sublines(target_node_infos, 1)
+        submatrix = Matrix._build_submatrix(
+            self.matrix[source_matrix_indices][:, target_matrix_indices], source_node_dict, target_node_dict
+        )
+        return submatrix
+
+
 class AdjacencyMatrix(torch.Tensor):
     matrix: torch.Tensor
     source_list: list[tuple[torch.Tensor, Any]]
diff --git a/tests/unit/test_matrix.py b/tests/unit/test_matrix.py
@@ -0,0 +1,82 @@
+import torch
+
+from lm_saes.backend.language_model import Matrix
+
+
+def _build_sample_matrix() -> Matrix:
+    matrix = Matrix()
+    matrix.add_source([("s1", torch.tensor([0, 2])), ("s2", torch.tensor([1]))])
+    matrix.add_target([("t1", torch.tensor([10, 11])), ("t2", torch.tensor([12]))])
+    matrix.update_matrix(
+        torch.tensor(
+            [
+                [1.0, 2.0, 3.0],
+                [4.0, 5.0, 6.0],
+                [7.0, 8.0, 9.0],
+            ],
+            dtype=torch.float32,
+        )
+    )
+    return matrix
+
+
+def test_add_source_and_target_shapes_and_node_mappings():
+    matrix = Matrix()
+    matrix.add_source(("src", torch.tensor([3, 7])))
+    matrix.add_target(("tgt", torch.tensor([11, 13, 17])))
+
+    assert matrix.matrix.shape == (2, 3)
+    assert torch.equal(matrix.source["src"].indices, torch.tensor([3, 7]))
+    assert torch.equal(matrix.source["src"].matrix_indices, torch.tensor([0, 1]))
+    assert torch.equal(matrix.target["tgt"].indices, torch.tensor([11, 13, 17]))
+    assert torch.equal(matrix.target["tgt"].matrix_indices, torch.tensor([0, 1, 2]))
+
+
+def test_add_source_merges_same_key():
+    matrix = Matrix()
+    matrix.add_source(("src", torch.tensor([0, 2])))
+    matrix.add_source(("src", torch.tensor([5])))
+
+    assert matrix.matrix.shape == (3, 0)
+    assert torch.equal(matrix.source["src"].indices, torch.tensor([0, 2, 5]))
+    assert torch.equal(matrix.source["src"].matrix_indices, torch.tensor([0, 1, 2]))
+
+
+def test_update_matrix_overwrites_values():
+    matrix = Matrix()
+    matrix.add_source(("src", torch.tensor([0, 1])))
+    matrix.add_target(("tgt", torch.tensor([0, 1])))
+
+    new_values = torch.tensor([[1.5, -2.0], [3.25, 4.0]], dtype=torch.float32)
+    matrix.update_matrix(new_values)
+
+    assert torch.equal(matrix.matrix, new_values)
+
+
+def test_get_submatrix_selects_expected_rows_and_columns():
+    matrix = _build_sample_matrix()
+
+    submatrix = matrix.get_submatrix(
+        source_node_infos=[("s1", torch.tensor([2]))],
+        target_node_infos=[("t1", torch.tensor([11])), ("t2", torch.tensor([12]))],
+    )
+
+    expected = torch.tensor([[5.0, 6.0]], dtype=torch.float32)
+    assert torch.equal(submatrix.matrix, expected)
+    assert torch.equal(submatrix.source["s1"].indices, torch.tensor([2]))
+    assert torch.equal(submatrix.source["s1"].matrix_indices, torch.tensor([0]))
+    assert torch.equal(submatrix.target["t1"].indices, torch.tensor([11]))
+    assert torch.equal(submatrix.target["t1"].matrix_indices, torch.tensor([0]))
+    assert torch.equal(submatrix.target["t2"].indices, torch.tensor([12]))
+    assert torch.equal(submatrix.target["t2"].matrix_indices, torch.tensor([1]))
+
+
+def test_add_multiple_nodes_assigns_contiguous_matrix_indices():
+    matrix = Matrix()
+    matrix.add_source([("s1", torch.tensor([0, 2])), ("s2", torch.tensor([1]))])
+    matrix.add_target([("t1", torch.tensor([10, 11])), ("t2", torch.tensor([12]))])
+
+    assert torch.equal(matrix.source["s1"].matrix_indices, torch.tensor([0, 1]))
+    assert torch.equal(matrix.source["s2"].matrix_indices, torch.tensor([2]))
+    assert torch.equal(matrix.target["t1"].matrix_indices, torch.tensor([0, 1]))
+    assert torch.equal(matrix.target["t2"].matrix_indices, torch.tensor([2]))