Skip to content

Commit a5f30ed

Browse files
committed
Add GGUF file support in CLI and printing functions
- Added functionality to check for GGUF files in the CLI and print a report using `print_report_for_gguf`. - Updated error handling to include GGUF files in the search criteria. - Introduced new helper functions in `print.py` for formatting and displaying GGUF file reports, including grouping sharded files and adjusting table widths. - Updated function `_bytes_to_gb` with `use_decimal` argument to match with Huggingface file size.
1 parent 6cc92e9 commit a5f30ed

File tree

2 files changed

+150
-7
lines changed

2 files changed

+150
-7
lines changed

src/hf_mem/cli.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import httpx
1111

1212
from hf_mem.metadata import parse_safetensors_metadata
13-
from hf_mem.print import print_report
13+
from hf_mem.print import print_report, print_report_for_gguf
1414

1515
# NOTE: Defines the bytes that will be fetched per safetensors file, but the metadata
1616
# can indeed be larger than that
@@ -185,9 +185,25 @@ async def fetch_with_semaphore(url: str) -> Dict[str, Any]:
185185

186186
metadata = parse_safetensors_metadata(raw_metadata=raw_metadata)
187187
else:
188-
raise RuntimeError(
189-
"NONE OF `model.safetensors`, `model.safetensors.index.json`, `model_index.json` HAS BEEN FOUND"
190-
)
188+
# Check for GGUF files
189+
gguf_files = {
190+
f["path"]: f["size"]
191+
for f in files
192+
if f.get("path", "").endswith(".gguf") and f.get("size") is not None
193+
}
194+
195+
if gguf_files:
196+
print_report_for_gguf(
197+
model_id=model_id,
198+
revision=revision,
199+
gguf_files=gguf_files,
200+
ignore_table_width=ignore_table_width,
201+
)
202+
return
203+
else:
204+
raise RuntimeError(
205+
"NONE OF `model.safetensors`, `model.safetensors.index.json`, `model_index.json`, OR `.gguf` FILES HAVE BEEN FOUND"
206+
)
191207

192208
if json_output:
193209
out = {"model_id": model_id, "revision": revision, **asdict(metadata)}

src/hf_mem/print.py

Lines changed: 130 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
import re
12
import warnings
2-
from typing import Literal, Optional
3+
from typing import Dict, Literal, Optional
34

45
from hf_mem.metadata import SafetensorsMetadata
56

@@ -105,8 +106,14 @@ def _format_short_number(n: float) -> str:
105106
return f"{n:.2f}P"
106107

107108

108-
def _bytes_to_gb(nbytes: int) -> float:
109-
return nbytes / (1024**3)
109+
def _bytes_to_gb(nbytes: int, use_decimal: bool = False) -> float:
110+
"""Convert bytes to gigabytes.
111+
112+
Args:
113+
nbytes: Number of bytes
114+
use_decimal: If True, use GB (1e9), else use GiB (1024^3)
115+
"""
116+
return nbytes / 1e9 if use_decimal else nbytes / (1024**3)
110117

111118

112119
def print_report(
@@ -191,3 +198,123 @@ def print_report(
191198
)
192199

193200
_print_divider(current_len + 1, "bottom")
201+
202+
203+
def _print_header_gguf(current_len: int, name_len: int) -> None:
204+
length = current_len + name_len + BORDERS_AND_PADDING
205+
top = BOX["tl"] + (BOX["tsep"] * (length - 2)) + BOX["tr"]
206+
_print_with_color(top)
207+
208+
bottom = BOX["lm"] + (BOX["bsep"] * (length - 2)) + BOX["rm"]
209+
_print_with_color(bottom)
210+
211+
212+
def _print_centered_gguf(text: str, current_len: int, name_len: int) -> None:
213+
max_len = current_len + name_len - BORDERS_AND_PADDING
214+
total_width = max_len + 12
215+
text_len = len(text)
216+
pad_left = (total_width - text_len) // 2
217+
pad_right = total_width - text_len - pad_left
218+
_print_with_color(f"{BOX['vt']}{' ' * pad_left}{text}{' ' * pad_right}{BOX['vt']}")
219+
220+
221+
def _print_divider_gguf(
222+
current_len: int,
223+
name_len: int,
224+
side: Optional[Literal["top", "top-continue", "bottom", "bottom-continue"]] = None,
225+
) -> None:
226+
match side:
227+
case "top":
228+
left, mid, right = BOX["lm"], BOX["tsep"], BOX["rm"]
229+
case "top-continue":
230+
left, mid, right = BOX["lm"], BOX["bsep"], BOX["rm"]
231+
case "bottom":
232+
left, mid, right = BOX["bl"], BOX["bsep"], BOX["br"]
233+
case "bottom-continue":
234+
left, mid, right = BOX["lm"], BOX["bsep"], BOX["rm"]
235+
case _:
236+
left, mid, right = BOX["lm"], BOX["mm"], BOX["rm"]
237+
238+
name_col_inner = name_len + 2
239+
data_col_inner = current_len + 1
240+
241+
line = left
242+
line += BOX["ht"] * name_col_inner
243+
line += mid
244+
line += BOX["ht"] * data_col_inner
245+
line += right
246+
_print_with_color(line)
247+
248+
249+
def _print_row_gguf(name: str, text: str, current_len: int, name_len: int) -> None:
250+
name_fmt = f"{name:<{name_len}}"
251+
data_fmt = f"{str(text):<{current_len}}"
252+
_print_with_color(f"{BOX['vt']} {name_fmt} {BOX['vt']} {data_fmt} {BOX['vt']}")
253+
254+
255+
def _group_gguf_files(gguf_files: Dict[str, int]) -> Dict[str, int]:
256+
"""Group sharded GGUF files by model variant and sum their sizes.
257+
258+
Files like 'BF16/model-00001-of-00010.gguf' are grouped together.
259+
Single files like 'model-Q4_K_M.gguf' remain as-is.
260+
"""
261+
grouped: Dict[str, int] = {}
262+
shard_pattern = re.compile(r"-\d{5}-of-\d{5}\.gguf$")
263+
264+
for path, size in gguf_files.items():
265+
if shard_pattern.search(path):
266+
base = shard_pattern.sub(".gguf", path)
267+
grouped[base] = grouped.get(base, 0) + size
268+
else:
269+
grouped[path] = size
270+
271+
return grouped
272+
273+
274+
def print_report_for_gguf(
275+
model_id: str,
276+
revision: str,
277+
gguf_files: Dict[str, int],
278+
ignore_table_width: bool = False,
279+
) -> None:
280+
"""Print VRAM report for GGUF models.
281+
282+
Args:
283+
model_id: HuggingFace model ID
284+
revision: Model revision
285+
gguf_files: Dict mapping filename to file size in bytes
286+
ignore_table_width: Whether to ignore max table width
287+
"""
288+
grouped_files = _group_gguf_files(gguf_files)
289+
290+
max_name_len = max(len(filename) for filename in grouped_files.keys())
291+
292+
rows = [
293+
"INFERENCE MEMORY ESTIMATE FOR",
294+
f"https://hf.co/{model_id} @ {revision}",
295+
]
296+
297+
max_len = 0
298+
for r in rows:
299+
max_len = max(max_len, len(str(r)))
300+
301+
if max_len > MAX_DATA_LEN and ignore_table_width is False:
302+
warnings.warn(
303+
f"Given that the provided `--model-id {model_id}` (with `--revision {revision}`) is longer than {MAX_DATA_LEN} characters, the table width will be expanded to fit the provided values within their row, but it might lead to unexpected table views. If you'd like to ignore the limit, then provide the `--ignore-table-width` flag to ignore the {MAX_DATA_LEN} width limit, to simply accommodate to whatever the longest text length is."
304+
)
305+
306+
current_len = min(max_len, MAX_DATA_LEN) if ignore_table_width is False else max_len
307+
308+
_print_header_gguf(current_len, max_name_len)
309+
_print_centered_gguf("INFERENCE MEMORY ESTIMATE FOR", current_len, max_name_len)
310+
_print_centered_gguf(f"https://hf.co/{model_id} @ {revision}", current_len, max_name_len)
311+
_print_divider_gguf(current_len + 1, max_name_len, "top")
312+
313+
for i, (filename, size_bytes) in enumerate(grouped_files.items()):
314+
file_gb = _bytes_to_gb(size_bytes, use_decimal=True)
315+
_print_row_gguf(filename, f"{file_gb:.2f} GB", current_len, max_name_len)
316+
317+
if i < len(grouped_files) - 1:
318+
_print_divider_gguf(current_len + 1, max_name_len)
319+
320+
_print_divider_gguf(current_len + 1, max_name_len, "bottom")

0 commit comments

Comments
 (0)