|
33 | 33 | # SOFTWARE. |
34 | 34 | # |
35 | 35 |
|
36 | | -from typing import Union |
| 36 | +"""Jacobian computation utilities for attribution analysis. |
| 37 | +
|
| 38 | +This module provides core functionality for computing Jacobian matrices, which are |
| 39 | +essential for many attribution methods. The Jacobian matrix represents the first-order |
| 40 | +partial derivatives of a model's output with respect to its input features. |
| 41 | +
|
| 42 | +The module includes: |
| 43 | +- Utility functions for tensor manipulation |
| 44 | +- Core Jacobian computation function |
| 45 | +- Helper functions for matrix operations |
| 46 | +""" |
| 47 | + |
| 48 | +from typing import Union, Tuple, Optional |
37 | 49 |
|
38 | 50 | import numpy as np |
39 | 51 | import torch |
@@ -102,86 +114,135 @@ def tensors_to_cuda(vars_: list[torch.Tensor], |
102 | 114 |
|
103 | 115 | def compute_jacobian( |
104 | 116 | model: torch.nn.Module, |
105 | | - input_vars: list[torch.Tensor], |
106 | | - mode: str = "autograd", |
107 | | - cuda_device: str = "cuda", |
108 | | - double_precision: bool = False, |
109 | | - convert_to_numpy: bool = True, |
110 | | - hybrid_solver: bool = False, |
111 | | -) -> Union[torch.Tensor, np.ndarray]: |
| 117 | + x: torch.Tensor, |
| 118 | + output_dim: Optional[int] = None, |
| 119 | + device: Optional[torch.device] = None |
| 120 | +) -> torch.Tensor: |
112 | 121 | """Compute the Jacobian matrix for a given model and input. |
113 | 122 |
|
114 | | - This function computes the Jacobian matrix using PyTorch's autograd functionality. |
115 | | - The Jacobian represents the first-order partial derivatives of the model's output |
116 | | - with respect to its input parameters. It supports both CPU and CUDA computation, |
117 | | - as well as single and double precision. |
| 123 | + The Jacobian matrix J is defined as: |
| 124 | + J[i,j] = ∂f(x)[i]/∂x[j] |
| 125 | + where f is the model function and x is the input tensor. |
| 126 | +
|
| 127 | + Parameters |
| 128 | + ---------- |
| 129 | + model : torch.nn.Module |
| 130 | + The neural network model for which to compute the Jacobian. |
| 131 | + x : torch.Tensor |
| 132 | + Input tensor of shape (batch_size, input_dim). |
| 133 | + output_dim : Optional[int] |
| 134 | + The dimension of the model's output. If None, it will be inferred from the model. |
| 135 | + device : Optional[torch.device] |
| 136 | + The device on which to perform computations. If None, uses the model's device. |
| 137 | +
|
| 138 | + Returns |
| 139 | + ------- |
| 140 | + torch.Tensor |
| 141 | + Jacobian matrix of shape (batch_size, output_dim, input_dim). |
| 142 | +
|
| 143 | + Raises |
| 144 | + ------ |
| 145 | + ValueError |
| 146 | + If the input tensor is not 2D or if the model's output is not compatible |
| 147 | + with the specified output_dim. |
| 148 | +
|
| 149 | + Examples |
| 150 | + -------- |
| 151 | + >>> model = torch.nn.Linear(10, 5) |
| 152 | + >>> x = torch.randn(32, 10) |
| 153 | + >>> jacobian = compute_jacobian(model, x) |
| 154 | + >>> print(jacobian.shape) # (32, 5, 10) |
| 155 | + """ |
| 156 | + if output_dim is None: |
| 157 | + output_dim = model(x).shape[1] |
| 158 | + if device is None: |
| 159 | + device = x.device |
| 160 | + |
| 161 | + if x.ndim != 2: |
| 162 | + raise ValueError("Input tensor must be 2D") |
| 163 | + if model(x).shape[1] != output_dim: |
| 164 | + raise ValueError("Model's output dimension must match the specified output_dim") |
| 165 | + |
| 166 | + model = model.to(device).float() |
| 167 | + x = x.to(device) |
| 168 | + |
| 169 | + jacobian = [] |
| 170 | + for i in range(output_dim): |
| 171 | + grads = torch.autograd.grad( |
| 172 | + model(x)[:, i:i + 1], |
| 173 | + x, |
| 174 | + retain_graph=True, |
| 175 | + create_graph=False, |
| 176 | + grad_outputs=torch.ones(model(x)[:, i:i + 1].shape).to(device), |
| 177 | + ) |
| 178 | + jacobian.append(torch.cat(grads, dim=1)) |
| 179 | + |
| 180 | + jacobian = torch.stack(jacobian, dim=1) |
| 181 | + return jacobian |
118 | 182 |
|
119 | | - Args: |
120 | | - model: PyTorch model to compute Jacobian for. The model should be callable |
121 | | - with the input variables. |
122 | | - input_vars: List of input tensors to compute the Jacobian with respect to. |
123 | | - Each tensor should have requires_grad=True if gradients are needed. |
124 | | - mode: Computation mode. Currently only "autograd" is supported, which uses |
125 | | - PyTorch's automatic differentiation. |
126 | | - cuda_device: CUDA device identifier to use for computation (e.g., "cuda:0"). |
127 | | - Ignored if double_precision is True. |
128 | | - double_precision: If True, use double precision (float64) for computation. |
129 | | - This moves all tensors to CPU. |
130 | | - convert_to_numpy: If True, convert the output Jacobian to a numpy array. |
131 | | - If False, returns a PyTorch tensor. |
132 | | - hybrid_solver: If True, concatenate multiple model outputs along dimension 1 |
133 | | - before computing the Jacobian. Useful for models with multiple outputs. |
134 | 183 |
|
135 | | - Returns: |
136 | | - Jacobian matrix as either a PyTorch tensor or numpy array. The shape is |
137 | | - (batch_size, output_dim, input_dim), where: |
138 | | - - batch_size is the size of the input batch |
139 | | - - output_dim is the dimension of the model's output |
140 | | - - input_dim is the total dimension of all input variables |
| 184 | +def _reshape_for_jacobian(tensor: torch.Tensor) -> torch.Tensor: |
| 185 | + """Reshape a tensor to be compatible with Jacobian computation. |
141 | 186 |
|
142 | | - Example: |
143 | | - >>> model = torch.nn.Linear(10, 5) |
144 | | - >>> x = torch.randn(3, 10, requires_grad=True) |
145 | | - >>> jacobian = compute_jacobian(model, [x]) |
146 | | - >>> jacobian.shape |
147 | | - (3, 5, 10) |
| 187 | + Parameters |
| 188 | + ---------- |
| 189 | + tensor : torch.Tensor |
| 190 | + Input tensor of any shape. |
| 191 | +
|
| 192 | + Returns |
| 193 | + ------- |
| 194 | + torch.Tensor |
| 195 | + Reshaped tensor of shape (batch_size, -1). |
| 196 | +
|
| 197 | + Notes |
| 198 | + ----- |
| 199 | + This function ensures that the input tensor is properly flattened for |
| 200 | + Jacobian computation while preserving the batch dimension. |
148 | 201 | """ |
149 | | - if double_precision: |
150 | | - model = model.to("cpu").double() |
151 | | - input_vars = tensors_to_cpu_and_double(input_vars) |
152 | | - if hybrid_solver: |
153 | | - output = model(*input_vars) |
154 | | - output_vars = torch.cat(output, dim=1).to("cpu").double() |
155 | | - else: |
156 | | - output_vars = model(*input_vars).to("cpu").double() |
157 | | - else: |
158 | | - model = model.to(cuda_device).float() |
159 | | - input_vars = tensors_to_cuda(input_vars, cuda_device=cuda_device) |
160 | | - |
161 | | - if hybrid_solver: |
162 | | - output = model(*input_vars) |
163 | | - output_vars = torch.cat(output, dim=1) |
164 | | - else: |
165 | | - output_vars = model(*input_vars) |
166 | | - |
167 | | - if mode == "autograd": |
168 | | - jacob = [] |
169 | | - for i in range(output_vars.shape[1]): |
170 | | - grads = torch.autograd.grad( |
171 | | - output_vars[:, i:i + 1], |
172 | | - input_vars, |
173 | | - retain_graph=True, |
174 | | - create_graph=False, |
175 | | - grad_outputs=torch.ones(output_vars[:, i:i + 1].shape).to( |
176 | | - output_vars.device), |
177 | | - ) |
178 | | - jacob.append(torch.cat(grads, dim=1)) |
179 | | - |
180 | | - jacobian = torch.stack(jacob, dim=1) |
181 | | - |
182 | | - jacobian = jacobian.detach().cpu() |
183 | | - |
184 | | - if convert_to_numpy: |
185 | | - jacobian = jacobian.numpy() |
| 202 | + return tensor.view(tensor.shape[0], -1) |
186 | 203 |
|
| 204 | + |
| 205 | +def _compute_jacobian_columns( |
| 206 | + model: torch.nn.Module, |
| 207 | + x: torch.Tensor, |
| 208 | + output_dim: int, |
| 209 | + device: torch.device |
| 210 | +) -> torch.Tensor: |
| 211 | + """Compute Jacobian matrix column by column using automatic differentiation. |
| 212 | +
|
| 213 | + Parameters |
| 214 | + ---------- |
| 215 | + model : torch.nn.Module |
| 216 | + The neural network model. |
| 217 | + x : torch.Tensor |
| 218 | + Input tensor of shape (batch_size, input_dim). |
| 219 | + output_dim : int |
| 220 | + The dimension of the model's output. |
| 221 | + device : torch.device |
| 222 | + The device on which to perform computations. |
| 223 | +
|
| 224 | + Returns |
| 225 | + ------- |
| 226 | + torch.Tensor |
| 227 | + Jacobian matrix of shape (batch_size, output_dim, input_dim). |
| 228 | +
|
| 229 | + Notes |
| 230 | + ----- |
| 231 | + This function computes the Jacobian by iterating over input dimensions and |
| 232 | + using automatic differentiation to compute partial derivatives. It is more |
| 233 | + memory-efficient than computing the full Jacobian at once but may be slower |
| 234 | + for large input dimensions. |
| 235 | + """ |
| 236 | + jacobian = [] |
| 237 | + for i in range(output_dim): |
| 238 | + grads = torch.autograd.grad( |
| 239 | + model(x)[:, i:i + 1], |
| 240 | + x, |
| 241 | + retain_graph=True, |
| 242 | + create_graph=False, |
| 243 | + grad_outputs=torch.ones(model(x)[:, i:i + 1].shape).to(device), |
| 244 | + ) |
| 245 | + jacobian.append(torch.cat(grads, dim=1)) |
| 246 | + |
| 247 | + jacobian = torch.stack(jacobian, dim=1) |
187 | 248 | return jacobian |
0 commit comments