SDP-CROWN_auto-verify/utils.py at main · henba1/SDP-CROWN_auto-verify · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import torch
import torch.nn as nn
import numpy as np
import gc
from collections import OrderedDict

from pathlib import Path
from models import *
from fractions import Fraction

def parse_float_or_fraction(x: str) -> float:
    try:
        return float(x)
    except ValueError:
        return float(Fraction(x))

# Helper function to generate C matrix for calculate the margins.
def build_C(label, classes):
    """
    label: shape (B,). Each label[b] in [0..classes-1].
    Return:
        C: shape (B, classes-1, classes).
        For each sample b, each row is a “negative class” among [0..classes-1]\{label[b]}.
        Puts +1 at column=label[b], -1 at each negative class column.
    """
    device = label.device
    batch_size = label.size(0)

    # 1) Initialize
    C = torch.zeros((batch_size, classes-1, classes), device=device)

    # 2) All class indices
    # shape: (1, K) -> (B, K)
    all_cls = torch.arange(classes, device=device).unsqueeze(0).expand(batch_size, -1)

    # 3) Negative classes only, shape (B, K-1)
    # mask out the ground-truth
    mask = all_cls != label.unsqueeze(1)
    neg_cls = all_cls[mask].view(batch_size, -1)

    # 4) Scatter +1 at each sample’s ground-truth label
    #    shape needed: (B, K-1, 1)
    pos_idx = label.unsqueeze(1).expand(-1, classes-1).unsqueeze(-1)
    C.scatter_(dim=2, index=pos_idx, value=1.0)

    # 5) Scatter -1 at each row’s negative label
    #    We have (B, K-1) negative labels. For row j in each sample b, neg_cls[b, j] is that row’s negative label
    row_idx = torch.arange(classes-1, device=device).unsqueeze(0).expand(batch_size, -1)
    # shape: (B, K-1)

    # We can do advanced indexing:
    C[torch.arange(batch_size).unsqueeze(1), row_idx, neg_cls] = -1.0

    return C

def infer_model_architecture(model_name: str) -> nn.Module:
    """
    Infer PyTorch model architecture from model filename.

    Args:
        model_name: Model filename (without extension)

    Returns:
        Instantiated model architecture
    """
    name = model_name.lower()

    # MNIST models
    if "mnist" in name:
        if "mlp" in name:
            return MNIST_MLP()
        elif "convsmall" in name:
            return MNIST_ConvSmall()
        else:
            return MNIST_ConvLarge()

    # CIFAR-10 models
    elif "cifar" in name or "cifar10" in name:
        if "cnn_a" in name:
            return CIFAR10_CNN_A()
        elif "cnn_b" in name:
            return CIFAR10_CNN_B()
        elif "cnn_c" in name:
            return CIFAR10_CNN_C()
        elif "convsmall" in name:
            return CIFAR10_ConvSmall()
        elif "convdeep" in name:
            return CIFAR10_ConvDeep()
        elif "convlarge" in name or "conv_large" in name:
            return CIFAR10_ConvLarge()
        else:
            # Default to ConvLarge for CIFAR-10
            return CIFAR10_ConvLarge()

    # JAIR CIFAR-10 architectures
    elif "conv_big" in name:
        return CONV_BIG()
    elif "cifar_7_1024" in name:
        return CIFAR_7_1024()
    elif "resnet_4b" in name or "resnet4b" in name:
        return ResNet4B(bn=False)

    else:
        raise ValueError(
            f"Could not infer architecture from model name '{model_name}'. "
            "Please use one of the known SDP-CROWN architectures."
        )


def load_model_and_dataset(args, device, image: np.ndarray):
    """
    Load a PyTorch model from a checkpoint path and wrap a single image
    instance into tensors usable by SDP-CROWN.

    Args:
        args: Argument namespace, with args.model (path to a .pth checkpoint)
              and args.radius already set.
        device: Torch device.
        image: Numpy array representing a single input instance (flattened or shaped).

    Returns:
        model: nn.Module on the correct device, in eval mode.
        image_tensor: Tensor of shape (1, C, H, W) containing the image.
        radius_rescale: Float radius used for the perturbation.
        classes: Integer number of output classes inferred from the model.
    """

    model_path = Path(args.model)

    loaded = torch.load(model_path, map_location=device, weights_only=False)

    # Check if loaded object is a state_dict (OrderedDict or dict)
    if isinstance(loaded, (OrderedDict, dict)) and not isinstance(loaded, nn.Module):
        # It's a state_dict, need to instantiate the model architecture first
        print(f"[SDP-CROWN] Detected state_dict in {model_path.name}, inferring architecture from filename")
        model = infer_model_architecture(model_path.stem)
        model.load_state_dict(loaded)
        model = model.to(device)
        model.eval()
    elif isinstance(loaded, nn.Module):
        # It's a full model object
        model = loaded.to(device)
        model.eval()
    else:
        raise ValueError(
            f"Unsupported checkpoint format in {model_path}. "
            "Expected either a state_dict (OrderedDict/dict) or a full model (nn.Module)."
        )

    # Process single image. Verona stores CIFAR-10 images in CHW format (C,H,W),
    # while the original SDP-CROWN utilities assumed HWC. To avoid channel
    # mismatches like [1, 32, 3, 32] (seen in conv2d error), we explicitly
    # normalize to (1, 3, 32, 32) here.
    image_arr = image.copy()

    if image_arr.ndim == 1:
        if image_arr.size == 3072:  # CIFAR-10: 3*32*32
            # Interpret as CHW: (3, 32, 32)
            # Image is already preprocessed, so we just reshape
            image_arr = image_arr.reshape(3, 32, 32)
        else:
            raise ValueError(f"Unexpected flattened image size: {image_arr.size}")

    # Handle 3D images: either CHW (3,32,32) or HWC (32,32,3).
    if image_arr.ndim == 3:
        if image_arr.shape == (3, 32, 32):
            # Already CHW, nothing to do.
            pass
        elif image_arr.shape == (32, 32, 3):
            # HWC -> CHW
            image_arr = np.transpose(image_arr, (2, 0, 1))
        else:
            raise ValueError(f"Unexpected 3D image shape: {image_arr.shape}")

    # At this point, image_arr is in CHW format (3, 32, 32).
    # Normalization is already done in VERONA before flattening, so we skip preprocessing here.

    # Add batch dimension: (C, H, W) -> (1, C, H, W)
    if image_arr.ndim == 3:
        image_arr = image_arr[np.newaxis, ...]

    #because of the normalization in VERONA, we need to rescale radius by dataset std
    radius_rescale = args.radius/0.225

    # Convert to tensor; already in (1, C, H, W).
    image_tensor = torch.from_numpy(image_arr).float().to(device)

    with torch.no_grad():
        logits = model(image_tensor)
    classes = int(logits.shape[-1])

    return model, image_tensor, radius_rescale, classes


#GPU memory management utility functions
def get_gpu_memory_info(device):
    """
    Get current GPU memory usage in GB and percentage.

    Args:
        device: CUDA device

    Returns:
        dict: Contains memory_allocated_gb, memory_reserved_gb, total_memory_gb, memory_percent
    """
    if torch.cuda.is_available():
        torch.cuda.synchronize()
        memory_allocated = (
            torch.cuda.memory_allocated(device) / 1024**3
        )  # Convert to GB
        memory_reserved = torch.cuda.memory_reserved(device) / 1024**3  # Convert to GB
        total_memory = (
            torch.cuda.get_device_properties(device).total_memory / 1024**3
        )  # Convert to GB
        memory_percent = (memory_allocated / total_memory) * 100
        return {
            "memory_allocated_gb": memory_allocated,
            "memory_reserved_gb": memory_reserved,
            "total_memory_gb": total_memory,
            "memory_percent": memory_percent,
        }
    return None


def cleanup_gpu_memory(model):
    """Clear GPU memory after each sample."""
    if torch.cuda.is_available():
        # Clear gradients from model
        for param in model.parameters():
            if param.grad is not None:
                param.grad.detach_()
                param.grad = None

        gc.collect()

        # Clear CUDA cache
        torch.cuda.empty_cache()
        torch.cuda.synchronize()