Skip to content

Commit 6b3273e

Browse files
committed
convert : use reflinks for faster conversion
1 parent 786b32d commit 6b3273e

File tree

6 files changed

+265
-63
lines changed

6 files changed

+265
-63
lines changed

convert_hf_to_gguf.py

Lines changed: 139 additions & 48 deletions
Large diffs are not rendered by default.

ggml/src/ggml-impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ void ggml_print_backtrace(void);
4242
# define MAX(a, b) ((a) > (b) ? (a) : (b))
4343
#endif
4444

45-
// required for mmap as gguf only guarantees 32-byte alignment
46-
#define TENSOR_ALIGNMENT 32
45+
// required for mmap as gguf converted with reflinks from safetensors only guarantees 8-byte alignment
46+
#define TENSOR_ALIGNMENT 8
4747

4848
// static_assert should be a #define, but if it's not,
4949
// fall back to the _Static_assert C11 keyword.

ggml/src/gguf.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,8 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
624624
ctx->size = 0;
625625
for (size_t i = 0; i < ctx->info.size(); ++i) {
626626
const gguf_tensor_info & ti = ctx->info[i];
627+
// HACK: bypass the continuity check
628+
ctx->size = ti.offset;
627629
if (ti.offset != ctx->size) {
628630
GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
629631
__func__, ti.t.name, ti.offset, ctx->size);

gguf-py/gguf/gguf_writer.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
)
3131

3232
from .quants import quant_shape_from_byte_shape
33+
from .utility import LocalTensorRange, best_alignment_offset, copy_tensor_ranges
3334

3435
logger = logging.getLogger(__name__)
3536

@@ -84,14 +85,16 @@ class GGUFWriter:
8485

8586
def __init__(
8687
self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
87-
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
88+
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False,
89+
use_reflinks = False, # opportunistically attempt to use copy-on-write
8890
):
8991
self.fout = None
9092
self.path = Path(path) if path else None
9193
self.arch = arch
9294
self.endianess = endianess
9395
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
94-
self.use_temp_file = use_temp_file
96+
self.use_reflinks = use_reflinks and hasattr(os, "copy_file_range")
97+
self.use_temp_file = use_temp_file if not self.use_reflinks else False
9598
self.temp_file = None
9699
self.tensors = [{}]
97100
self.kv_data = [{}]
@@ -107,6 +110,10 @@ def __init__(
107110
if self.small_first_shard:
108111
self.tensors.append({})
109112

113+
if self.use_reflinks:
114+
# common default block size for COW filesystems
115+
self.add_custom_alignment(4096)
116+
110117
self.add_architecture()
111118

112119
def get_total_parameter_count(self) -> tuple[int, int, int, int]:
@@ -257,14 +264,20 @@ def write_ti_data_to_file(self) -> None:
257264
offset_tensor = 0
258265

259266
for name, ti in tensors.items():
267+
align_offset = 0
268+
if self.use_reflinks:
269+
ranges: tuple[LocalTensorRange, ...] = getattr(ti.tensor, "_ranges", ())
270+
if len(ranges) > 0:
271+
align_offset = best_alignment_offset(ranges, self.data_alignment)
272+
260273
ti_data += self._pack_val(name, GGUFValueType.STRING, add_vtype=False)
261274
n_dims = len(ti.shape)
262275
ti_data += self._pack("I", n_dims)
263276
for j in range(n_dims):
264277
ti_data += self._pack("Q", ti.shape[n_dims - 1 - j])
265278
ti_data += self._pack("I", ti.dtype)
266-
ti_data += self._pack("Q", offset_tensor)
267-
offset_tensor += GGUFWriter.ggml_pad(ti.nbytes, self.data_alignment)
279+
ti_data += self._pack("Q", offset_tensor + align_offset)
280+
offset_tensor += GGUFWriter.ggml_pad(ti.nbytes + align_offset, self.data_alignment)
268281

269282
fout.write(ti_data)
270283
fout.flush()
@@ -398,6 +411,7 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
398411
if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS:
399412
raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
400413
assert self.fout is not None
414+
assert not self.use_reflinks # TODO: handle this here too
401415

402416
if self.endianess == GGUFEndian.BIG:
403417
tensor.byteswap(inplace=True)
@@ -450,15 +464,21 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
450464
shard_bar.reset(total=(total if total > 0 else None))
451465

452466
# relying on the fact that Python dicts preserve insertion order (since 3.7)
453-
for ti in tensors.values():
467+
for name, ti in tensors.items():
454468
assert ti.tensor is not None # can only iterate once over the tensors
455469
assert ti.tensor.nbytes == ti.nbytes
456-
ti.tensor.tofile(fout)
470+
if self.use_reflinks and len(ranges := getattr(ti.tensor, "_ranges", ())) > 0:
471+
logger.debug(f"using reflinks for {name}")
472+
start_offset = fout.tell()
473+
copy_tensor_ranges(fout, ranges, self.data_alignment)
474+
self.write_padding(fout, fout.tell() - start_offset)
475+
else:
476+
ti.tensor.tofile(fout)
477+
self.write_padding(fout, ti.nbytes)
457478
if shard_bar is not None:
458479
shard_bar.update(ti.nbytes)
459480
if bar is not None:
460481
bar.update(ti.nbytes)
461-
self.write_padding(fout, ti.nbytes)
462482
ti.tensor = None
463483
else:
464484
self.temp_file.seek(0)

gguf-py/gguf/lazy.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import numpy as np
88
from numpy.typing import DTypeLike
9+
from .utility import LocalTensorRange
910

1011

1112
logger = logging.getLogger(__name__)
@@ -20,10 +21,11 @@ def __getattr__(self, name: str) -> Any:
2021
return type(self)._wrap_fn(
2122
(lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
2223
use_self=self,
24+
data_noop=name in ("view", "reshape", "squeeze", "unsqueeze"),
2325
)
2426
elif isinstance(meta_attr, self._tensor_type):
2527
# e.g. self.T with torch.Tensor should still be wrapped
26-
return type(self)._wrap_fn(lambda s: getattr(s, name))(self)
28+
return type(self)._wrap_fn(lambda s: getattr(s, name), use_self=self)()
2729
else:
2830
# no need to wrap non-tensor properties,
2931
# and they likely don't depend on the actual contents of the tensor
@@ -39,8 +41,9 @@ def mk_wrap(op_name: str, *, meta_noop: bool = False):
3941
def wrapped_special_op(self, *args, **kwargs):
4042
return type(self)._wrap_fn(
4143
getattr(type(self)._tensor_type, op_name),
44+
use_self=self,
4245
meta_noop=meta_noop,
43-
)(self, *args, **kwargs)
46+
)(*args, **kwargs)
4447
return wrapped_special_op
4548

4649
# special methods bypass __getattr__, so they need to be added manually
@@ -76,14 +79,16 @@ class LazyBase(ABC, metaclass=LazyMeta):
7679
_args: tuple
7780
_kwargs: dict[str, Any]
7881
_func: Callable[[Any], Any] | None
82+
_ranges: tuple[LocalTensorRange, ...]
7983

80-
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
84+
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None, ranges: tuple[LocalTensorRange, ...] = ()):
8185
super().__init__()
8286
self._meta = meta
8387
self._data = data
8488
self._args = args
8589
self._kwargs = kwargs if kwargs is not None else {}
8690
self._func = func
91+
self._ranges = ranges
8792
assert self._func is not None or self._data is not None
8893

8994
def __init_subclass__(cls) -> None:
@@ -107,7 +112,7 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
107112
return o
108113

109114
@classmethod
110-
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
115+
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False, data_noop: bool = False) -> Callable[[Any], Any]:
111116
def wrapped_fn(*args, **kwargs):
112117
if kwargs is None:
113118
kwargs = {}
@@ -116,6 +121,8 @@ def wrapped_fn(*args, **kwargs):
116121
meta_args = LazyBase._recurse_apply(args, lambda t: t._meta)
117122
# TODO: maybe handle tensors in kwargs too
118123

124+
ranges = use_self._ranges if use_self is not None and data_noop else ()
125+
119126
if isinstance(meta_noop, bool) and not meta_noop:
120127
try:
121128
res = fn(*meta_args, **kwargs)
@@ -138,7 +145,7 @@ def wrapped_fn(*args, **kwargs):
138145
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
139146

140147
if isinstance(res, cls._tensor_type):
141-
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
148+
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn, ranges=ranges)
142149
elif isinstance(res, tuple) and all(isinstance(t, cls._tensor_type) for t in res):
143150
# share the evaluation between lazy tuple elements
144151
shared_args: list = [args, None]
@@ -214,7 +221,8 @@ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) ->
214221
def astype(self, dtype, *args, **kwargs):
215222
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
216223
full_args = (self, dtype,) + args
217-
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
224+
ranges = self._ranges if self._meta.dtype == dtype else ()
225+
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)), ranges=ranges)
218226

219227
def tofile(self, *args, **kwargs):
220228
eager = LazyNumpyTensor.to_eager(self)

gguf-py/gguf/utility.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from __future__ import annotations
22

33
from dataclasses import dataclass
4+
from io import BufferedReader, BufferedWriter
45
from pathlib import Path
56
from typing import Literal
67

78
import os
89
import json
10+
import logging
911
import numpy as np
1012

13+
logger = logging.getLogger(__name__)
14+
1115

1216
def fill_templated_filename(filename: str, output_type: str | None) -> str:
1317
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
@@ -277,6 +281,83 @@ class LocalTensorRange:
277281
size: int
278282

279283

284+
def best_alignment_offset(ranges: tuple[LocalTensorRange, ...], alignment: int):
285+
hist: dict[int, int] = {}
286+
287+
for r in ranges:
288+
align_offset = r.offset % alignment
289+
if align_offset not in hist:
290+
hist[align_offset] = 0
291+
hist[align_offset] += r.size
292+
293+
best_offset = 0
294+
best_size = 0
295+
for offset, size in hist.items():
296+
if size > best_size:
297+
best_size = size
298+
best_offset = offset
299+
return best_offset
300+
301+
# (assuming this is only called where os.copy_file_range is present)
302+
#
303+
# Copy tensor ranges using os.copy_file_range with aligned offsets and sizes
304+
# to make it more likely that copy-on-write is used where possible.
305+
# Block alignment is necessary for BTRFS and XFS (and likely for ZFS too).
306+
def copy_tensor_ranges(fout: BufferedWriter, ranges: tuple[LocalTensorRange, ...], alignment: int = 4096):
307+
assert len(ranges) > 0
308+
dst_offset = fout.tell()
309+
assert dst_offset % alignment == 0, dst_offset % alignment
310+
align_offset = best_alignment_offset(ranges, alignment)
311+
if len(ranges) == 1:
312+
r = ranges[0]
313+
with open(r.filename, "rb") as src:
314+
offset_src = r.offset - align_offset
315+
offset_src_end = r.offset + r.size
316+
if offset_src_end % alignment != 0:
317+
offset_src_end += alignment - (offset_src_end % alignment)
318+
size = offset_src_end - offset_src
319+
os.copy_file_range(src.fileno(), fout.fileno(), size, offset_src, dst_offset)
320+
dst_offset += r.size + align_offset
321+
else:
322+
# All ranges need to have the same alignment offset
323+
# Non-consecutive ranges need a patch block in between when the alignment offset is non-zero
324+
src_files: dict[Path, BufferedReader] = {}
325+
for r in ranges:
326+
if r.filename not in src_files:
327+
src_files[r.filename] = open(r.filename, "rb")
328+
329+
for i, r in enumerate(ranges):
330+
this_align_offset = r.offset % alignment
331+
src = src_files[r.filename]
332+
if this_align_offset != align_offset:
333+
logger.debug(f"copy-on-write can't be used ({i}/{len(ranges)})")
334+
if i > 0 and dst_offset % alignment != 0:
335+
# Write the correct data between blocks even when they are non-consecutive
336+
extra_size = alignment - (dst_offset % alignment)
337+
src.seek(r.offset)
338+
buf = src.read(extra_size)
339+
fout.seek(dst_offset)
340+
fout.write(buf)
341+
dst_offset += extra_size
342+
assert dst_offset % alignment == 0, dst_offset % alignment
343+
offset_src = r.offset + extra_size
344+
else:
345+
# TODO: is this always correct?
346+
offset_src = r.offset - align_offset
347+
348+
offset_src_end = r.offset + r.size
349+
if offset_src_end % alignment != 0:
350+
offset_src_end += alignment - (offset_src_end % alignment)
351+
size = offset_src_end - offset_src
352+
os.copy_file_range(src.fileno(), fout.fileno(), size, offset_src, dst_offset)
353+
dst_offset += r.size
354+
355+
for f in src_files.values():
356+
f.close()
357+
358+
fout.seek(dst_offset)
359+
360+
280361
@dataclass
281362
class LocalTensor:
282363
dtype: str

0 commit comments

Comments
 (0)