|
| 1 | +import re |
1 | 2 | import warnings |
2 | | -from typing import Literal, Optional |
| 3 | +from typing import Dict, Literal, Optional |
3 | 4 |
|
4 | 5 | from hf_mem.metadata import SafetensorsMetadata |
5 | 6 |
|
@@ -105,8 +106,14 @@ def _format_short_number(n: float) -> str: |
105 | 106 | return f"{n:.2f}P" |
106 | 107 |
|
107 | 108 |
|
108 | | -def _bytes_to_gb(nbytes: int) -> float: |
109 | | - return nbytes / (1024**3) |
| 109 | +def _bytes_to_gb(nbytes: int, use_decimal: bool = False) -> float: |
| 110 | + """Convert bytes to gigabytes. |
| 111 | +
|
| 112 | + Args: |
| 113 | + nbytes: Number of bytes |
| 114 | + use_decimal: If True, use GB (1e9), else use GiB (1024^3) |
| 115 | + """ |
| 116 | + return nbytes / 1e9 if use_decimal else nbytes / (1024**3) |
110 | 117 |
|
111 | 118 |
|
112 | 119 | def print_report( |
@@ -191,3 +198,123 @@ def print_report( |
191 | 198 | ) |
192 | 199 |
|
193 | 200 | _print_divider(current_len + 1, "bottom") |
| 201 | + |
| 202 | + |
| 203 | +def _print_header_gguf(current_len: int, name_len: int) -> None: |
| 204 | + length = current_len + name_len + BORDERS_AND_PADDING |
| 205 | + top = BOX["tl"] + (BOX["tsep"] * (length - 2)) + BOX["tr"] |
| 206 | + _print_with_color(top) |
| 207 | + |
| 208 | + bottom = BOX["lm"] + (BOX["bsep"] * (length - 2)) + BOX["rm"] |
| 209 | + _print_with_color(bottom) |
| 210 | + |
| 211 | + |
| 212 | +def _print_centered_gguf(text: str, current_len: int, name_len: int) -> None: |
| 213 | + max_len = current_len + name_len - BORDERS_AND_PADDING |
| 214 | + total_width = max_len + 12 |
| 215 | + text_len = len(text) |
| 216 | + pad_left = (total_width - text_len) // 2 |
| 217 | + pad_right = total_width - text_len - pad_left |
| 218 | + _print_with_color(f"{BOX['vt']}{' ' * pad_left}{text}{' ' * pad_right}{BOX['vt']}") |
| 219 | + |
| 220 | + |
| 221 | +def _print_divider_gguf( |
| 222 | + current_len: int, |
| 223 | + name_len: int, |
| 224 | + side: Optional[Literal["top", "top-continue", "bottom", "bottom-continue"]] = None, |
| 225 | +) -> None: |
| 226 | + match side: |
| 227 | + case "top": |
| 228 | + left, mid, right = BOX["lm"], BOX["tsep"], BOX["rm"] |
| 229 | + case "top-continue": |
| 230 | + left, mid, right = BOX["lm"], BOX["bsep"], BOX["rm"] |
| 231 | + case "bottom": |
| 232 | + left, mid, right = BOX["bl"], BOX["bsep"], BOX["br"] |
| 233 | + case "bottom-continue": |
| 234 | + left, mid, right = BOX["lm"], BOX["bsep"], BOX["rm"] |
| 235 | + case _: |
| 236 | + left, mid, right = BOX["lm"], BOX["mm"], BOX["rm"] |
| 237 | + |
| 238 | + name_col_inner = name_len + 2 |
| 239 | + data_col_inner = current_len + 1 |
| 240 | + |
| 241 | + line = left |
| 242 | + line += BOX["ht"] * name_col_inner |
| 243 | + line += mid |
| 244 | + line += BOX["ht"] * data_col_inner |
| 245 | + line += right |
| 246 | + _print_with_color(line) |
| 247 | + |
| 248 | + |
| 249 | +def _print_row_gguf(name: str, text: str, current_len: int, name_len: int) -> None: |
| 250 | + name_fmt = f"{name:<{name_len}}" |
| 251 | + data_fmt = f"{str(text):<{current_len}}" |
| 252 | + _print_with_color(f"{BOX['vt']} {name_fmt} {BOX['vt']} {data_fmt} {BOX['vt']}") |
| 253 | + |
| 254 | + |
| 255 | +def _group_gguf_files(gguf_files: Dict[str, int]) -> Dict[str, int]: |
| 256 | + """Group sharded GGUF files by model variant and sum their sizes. |
| 257 | +
|
| 258 | + Files like 'BF16/model-00001-of-00010.gguf' are grouped together. |
| 259 | + Single files like 'model-Q4_K_M.gguf' remain as-is. |
| 260 | + """ |
| 261 | + grouped: Dict[str, int] = {} |
| 262 | + shard_pattern = re.compile(r"-\d{5}-of-\d{5}\.gguf$") |
| 263 | + |
| 264 | + for path, size in gguf_files.items(): |
| 265 | + if shard_pattern.search(path): |
| 266 | + base = shard_pattern.sub(".gguf", path) |
| 267 | + grouped[base] = grouped.get(base, 0) + size |
| 268 | + else: |
| 269 | + grouped[path] = size |
| 270 | + |
| 271 | + return grouped |
| 272 | + |
| 273 | + |
| 274 | +def print_report_for_gguf( |
| 275 | + model_id: str, |
| 276 | + revision: str, |
| 277 | + gguf_files: Dict[str, int], |
| 278 | + ignore_table_width: bool = False, |
| 279 | +) -> None: |
| 280 | + """Print VRAM report for GGUF models. |
| 281 | +
|
| 282 | + Args: |
| 283 | + model_id: HuggingFace model ID |
| 284 | + revision: Model revision |
| 285 | + gguf_files: Dict mapping filename to file size in bytes |
| 286 | + ignore_table_width: Whether to ignore max table width |
| 287 | + """ |
| 288 | + grouped_files = _group_gguf_files(gguf_files) |
| 289 | + |
| 290 | + max_name_len = max(len(filename) for filename in grouped_files.keys()) |
| 291 | + |
| 292 | + rows = [ |
| 293 | + "INFERENCE MEMORY ESTIMATE FOR", |
| 294 | + f"https://hf.co/{model_id} @ {revision}", |
| 295 | + ] |
| 296 | + |
| 297 | + max_len = 0 |
| 298 | + for r in rows: |
| 299 | + max_len = max(max_len, len(str(r))) |
| 300 | + |
| 301 | + if max_len > MAX_DATA_LEN and ignore_table_width is False: |
| 302 | + warnings.warn( |
| 303 | + f"Given that the provided `--model-id {model_id}` (with `--revision {revision}`) is longer than {MAX_DATA_LEN} characters, the table width will be expanded to fit the provided values within their row, but it might lead to unexpected table views. If you'd like to ignore the limit, then provide the `--ignore-table-width` flag to ignore the {MAX_DATA_LEN} width limit, to simply accommodate to whatever the longest text length is." |
| 304 | + ) |
| 305 | + |
| 306 | + current_len = min(max_len, MAX_DATA_LEN) if ignore_table_width is False else max_len |
| 307 | + |
| 308 | + _print_header_gguf(current_len, max_name_len) |
| 309 | + _print_centered_gguf("INFERENCE MEMORY ESTIMATE FOR", current_len, max_name_len) |
| 310 | + _print_centered_gguf(f"https://hf.co/{model_id} @ {revision}", current_len, max_name_len) |
| 311 | + _print_divider_gguf(current_len + 1, max_name_len, "top") |
| 312 | + |
| 313 | + for i, (filename, size_bytes) in enumerate(grouped_files.items()): |
| 314 | + file_gb = _bytes_to_gb(size_bytes, use_decimal=True) |
| 315 | + _print_row_gguf(filename, f"{file_gb:.2f} GB", current_len, max_name_len) |
| 316 | + |
| 317 | + if i < len(grouped_files) - 1: |
| 318 | + _print_divider_gguf(current_len + 1, max_name_len) |
| 319 | + |
| 320 | + _print_divider_gguf(current_len + 1, max_name_len, "bottom") |
0 commit comments