|
9 | 9 | import torch |
10 | 10 | import numpy as np |
11 | 11 |
|
12 | | -unreleased_model_name = os.getenv('UNRELEASED_MODEL_NAME') |
13 | | - |
14 | | -parser = argparse.ArgumentParser(description='Process model with specified path') |
15 | | -parser.add_argument('--model-path', '-m', help='Path to the model') |
| 12 | +### If you want to dump RoPE activations, apply this monkey patch to the model |
| 13 | +### class from Transformers that you are running (replace apertus.modeling_apertus |
| 14 | +### with the proper package and class for your model |
| 15 | +### === START ROPE DEBUG === |
| 16 | +# from transformers.models.apertus.modeling_apertus import apply_rotary_pos_emb |
| 17 | + |
| 18 | +# orig_rope = apply_rotary_pos_emb |
| 19 | +# torch.set_printoptions(threshold=float('inf')) |
| 20 | +# torch.set_printoptions(precision=6, sci_mode=False) |
| 21 | + |
| 22 | +# def debug_rope(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): |
| 23 | +# # log inputs |
| 24 | +# summarize(q, "RoPE.q_in") |
| 25 | +# summarize(k, "RoPE.k_in") |
| 26 | + |
| 27 | +# # call original |
| 28 | +# q_out, k_out = orig_rope(q, k, cos, sin, position_ids, unsqueeze_dim) |
| 29 | + |
| 30 | +# # log outputs |
| 31 | +# summarize(q_out, "RoPE.q_out") |
| 32 | +# summarize(k_out, "RoPE.k_out") |
| 33 | + |
| 34 | +# return q_out, k_out |
| 35 | + |
| 36 | +# # Patch it |
| 37 | +# import transformers.models.apertus.modeling_apertus as apertus_mod # noqa: E402 |
| 38 | +# apertus_mod.apply_rotary_pos_emb = debug_rope |
| 39 | +### == END ROPE DEBUG === |
| 40 | + |
| 41 | + |
| 42 | +def summarize(tensor: torch.Tensor, name: str, max_seq: int = 3, max_vals: int = 3): |
| 43 | + """ |
| 44 | + Print a tensor in llama.cpp debug style. |
| 45 | +
|
| 46 | + Supports: |
| 47 | + - 2D tensors (seq, hidden) |
| 48 | + - 3D tensors (batch, seq, hidden) |
| 49 | + - 4D tensors (batch, seq, heads, dim_per_head) via flattening heads × dim_per_head |
| 50 | +
|
| 51 | + Shows first and last max_vals of each vector per sequence position. |
| 52 | + """ |
| 53 | + t = tensor.detach().to(torch.float32).cpu() |
| 54 | + |
| 55 | + # Determine dimensions |
| 56 | + if t.ndim == 3: |
| 57 | + _, s, _ = t.shape |
| 58 | + elif t.ndim == 2: |
| 59 | + _, s = 1, t.shape[0] |
| 60 | + t = t.unsqueeze(0) |
| 61 | + elif t.ndim == 4: |
| 62 | + _, s, _, _ = t.shape |
| 63 | + else: |
| 64 | + print(f"Skipping tensor due to unsupported dimensions: {t.ndim}") |
| 65 | + return |
| 66 | + |
| 67 | + ten_shape = t.shape |
| 68 | + |
| 69 | + print(f"ggml_debug: {name} = (f32) ... = {{{ten_shape}}}") |
| 70 | + print(" [") |
| 71 | + print(" [") |
| 72 | + |
| 73 | + # Determine indices for first and last sequences |
| 74 | + first_indices = list(range(min(s, max_seq))) |
| 75 | + last_indices = list(range(max(0, s - max_seq), s)) |
| 76 | + |
| 77 | + # Check if there's an overlap between first and last indices or if we're at the edge case of s = 2 * max_seq |
| 78 | + has_overlap = bool(set(first_indices) & set(last_indices)) or (max_seq * 2 == s) |
| 79 | + |
| 80 | + # Combine indices |
| 81 | + if has_overlap: |
| 82 | + # If there's overlap, just use the combined unique indices |
| 83 | + indices = sorted(list(set(first_indices + last_indices))) |
| 84 | + separator_index = None |
| 85 | + else: |
| 86 | + # If no overlap, we'll add a separator between first and last sequences |
| 87 | + indices = first_indices + last_indices |
| 88 | + separator_index = len(first_indices) |
| 89 | + |
| 90 | + for i, si in enumerate(indices): |
| 91 | + # Add separator if needed |
| 92 | + if separator_index is not None and i == separator_index: |
| 93 | + print(" ...") |
| 94 | + |
| 95 | + # Extract appropriate slice |
| 96 | + vec = t[0, si] |
| 97 | + if vec.ndim == 2: # 4D case: flatten heads × dim_per_head |
| 98 | + flat = vec.flatten().tolist() |
| 99 | + else: # 2D or 3D case |
| 100 | + flat = vec.tolist() |
| 101 | + |
| 102 | + # First and last slices |
| 103 | + first = flat[:max_vals] |
| 104 | + last = flat[-max_vals:] if len(flat) >= max_vals else flat |
| 105 | + first_str = ", ".join(f"{v:12.4f}" for v in first) |
| 106 | + last_str = ", ".join(f"{v:12.4f}" for v in last) |
| 107 | + |
| 108 | + print(f" [{first_str}, ..., {last_str}]") |
| 109 | + |
| 110 | + print(" ],") |
| 111 | + print(" ]") |
| 112 | + print(f" sum = {t.sum().item():.6f}\n") |
| 113 | + |
| 114 | + |
| 115 | +def debug_hook(name): |
| 116 | + def fn(_m, input, output): |
| 117 | + if isinstance(input, torch.Tensor): |
| 118 | + summarize(input, name + "_in") |
| 119 | + elif isinstance(input, (tuple, list)) and isinstance(input[0], torch.Tensor): |
| 120 | + summarize(input[0], name + "_in") |
| 121 | + if isinstance(output, torch.Tensor): |
| 122 | + summarize(output, name + "_out") |
| 123 | + elif isinstance(output, (tuple, list)) and isinstance(output[0], torch.Tensor): |
| 124 | + summarize(output[0], name + "_out") |
| 125 | + |
| 126 | + return fn |
| 127 | + |
| 128 | + |
| 129 | +unreleased_model_name = os.getenv("UNRELEASED_MODEL_NAME") |
| 130 | + |
| 131 | +parser = argparse.ArgumentParser(description="Process model with specified path") |
| 132 | +parser.add_argument("--model-path", "-m", help="Path to the model") |
16 | 133 | args = parser.parse_args() |
17 | 134 |
|
18 | | -model_path = os.environ.get('MODEL_PATH', args.model_path) |
| 135 | +model_path = os.environ.get("MODEL_PATH", args.model_path) |
19 | 136 | if model_path is None: |
20 | | - parser.error("Model path must be specified either via --model-path argument or MODEL_PATH environment variable") |
| 137 | + parser.error( |
| 138 | + "Model path must be specified either via --model-path argument or MODEL_PATH environment variable" |
| 139 | + ) |
21 | 140 |
|
22 | 141 | config = AutoConfig.from_pretrained(model_path) |
23 | 142 |
|
|
34 | 153 |
|
35 | 154 | if unreleased_model_name: |
36 | 155 | model_name_lower = unreleased_model_name.lower() |
37 | | - unreleased_module_path = f"transformers.models.{model_name_lower}.modular_{model_name_lower}" |
| 156 | + unreleased_module_path = ( |
| 157 | + f"transformers.models.{model_name_lower}.modular_{model_name_lower}" |
| 158 | + ) |
38 | 159 | class_name = f"{unreleased_model_name}ForCausalLM" |
39 | 160 | print(f"Importing unreleased model module: {unreleased_module_path}") |
40 | 161 |
|
41 | 162 | try: |
42 | | - model_class = getattr(importlib.import_module(unreleased_module_path), class_name) |
43 | | - model = model_class.from_pretrained(model_path) # Note: from_pretrained, not fromPretrained |
| 163 | + model_class = getattr( |
| 164 | + importlib.import_module(unreleased_module_path), class_name |
| 165 | + ) |
| 166 | + model = model_class.from_pretrained( |
| 167 | + model_path |
| 168 | + ) # Note: from_pretrained, not fromPretrained |
44 | 169 | except (ImportError, AttributeError) as e: |
45 | 170 | print(f"Failed to import or load model: {e}") |
46 | 171 | exit(1) |
47 | 172 | else: |
48 | | - model = AutoModelForCausalLM.from_pretrained(model_path) |
| 173 | + model = AutoModelForCausalLM.from_pretrained( |
| 174 | + model_path, device_map="auto", offload_folder="offload" |
| 175 | + ) |
| 176 | + |
| 177 | +for name, module in model.named_modules(): |
| 178 | + if len(list(module.children())) == 0: # only leaf modules |
| 179 | + module.register_forward_hook(debug_hook(name)) |
49 | 180 |
|
50 | 181 | model_name = os.path.basename(model_path) |
51 | 182 | # Printing the Model class to allow for easier debugging. This can be useful |
|
0 commit comments