Skip to content

Commit 05872ac

Browse files
convert : fix big-endian conversion (#17431)
* Fix convert_hf_to_gguf.py script on s390x Assume converted model data is originally little-endian. Byteswap data on s390x after reading it to put values in correct presentation for any transformation needed, like calculating weight tensors. Then byteswap data to little-endian before passing it to GGUFWriter while GGUFWriter will byteswap data back to big endian if big endian output is requested. byteswap(inplace=True) calls don't work with lazy tensor and array wrappers. Use byteswap with copying data to workaround this behaviour. * Make GGUFWriter accept tensors in native endianness instead of little-endian With this change if no byteswapping is actually needed, 2 excessive byteswaps can be omitted on s390x * Fix byteswapping in convert_hf_to_gguf.py for remote models
1 parent 55ab25c commit 05872ac

File tree

2 files changed

+42
-6
lines changed

2 files changed

+42
-6
lines changed

convert_hf_to_gguf.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10061,6 +10061,25 @@ class LazyTorchTensor(gguf.LazyBase):
1006110061
torch.uint8: np.uint8,
1006210062
}
1006310063

10064+
# only used when byteswapping data. Only correct size is needed
10065+
_dtype_byteswap_map: dict[torch.dtype, type] = {
10066+
torch.float64: np.float64,
10067+
torch.float32: np.float32,
10068+
torch.bfloat16: np.float16,
10069+
torch.float16: np.float16,
10070+
torch.int64: np.int64,
10071+
torch.uint64: np.uint64,
10072+
torch.int32: np.int32,
10073+
torch.uint32: np.uint32,
10074+
torch.int16: np.int16,
10075+
torch.uint16: np.uint16,
10076+
torch.int8: np.int8,
10077+
torch.uint8: np.uint8,
10078+
torch.bool: np.uint8,
10079+
torch.float8_e4m3fn: np.uint8,
10080+
torch.float8_e5m2: np.uint8,
10081+
}
10082+
1006410083
# used for safetensors slices
1006510084
# ref: https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/src/lib.rs#L1046
1006610085
# TODO: uncomment U64, U32, and U16, ref: https://github.com/pytorch/pytorch/issues/58734
@@ -10104,19 +10123,31 @@ def from_safetensors_slice(cls, st_slice: Any) -> Tensor:
1010410123
@classmethod
1010510124
def from_local_tensor(cls, t: gguf.utility.LocalTensor) -> Tensor:
1010610125
def load_tensor(tensor: gguf.utility.LocalTensor) -> Tensor:
10126+
def byteswap_tensor(tensor: np.ndarray, dtype: type) -> np.ndarray:
10127+
if sys.byteorder == 'big':
10128+
# switch data back to big endian
10129+
tensor = tensor.view(dtype).byteswap(inplace=False)
10130+
return tensor
1010710131
dtype = cls._dtype_str_map[tensor.dtype]
10108-
return torch.from_numpy(tensor.mmap_bytes()).view(dtype).reshape(tensor.shape)
10132+
numpy_dtype = cls._dtype_byteswap_map[dtype]
10133+
return torch.from_numpy(byteswap_tensor(tensor.mmap_bytes(), numpy_dtype)).view(dtype).reshape(tensor.shape)
1010910134
dtype = cls._dtype_str_map[t.dtype]
1011010135
shape = t.shape
1011110136
lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(t,), func=lambda r: load_tensor(r))
1011210137
return cast(torch.Tensor, lazy)
1011310138

1011410139
@classmethod
1011510140
def from_remote_tensor(cls, remote_tensor: gguf.utility.RemoteTensor):
10141+
def byteswap_tensor(tensor: np.ndarray, dtype: type) -> np.ndarray:
10142+
if sys.byteorder == 'big':
10143+
# switch data back to big endian
10144+
tensor = tensor.view(dtype).byteswap(inplace=False)
10145+
return tensor
1011610146
dtype = cls._dtype_str_map[remote_tensor.dtype]
10147+
numpy_dtype = cls._dtype_byteswap_map[dtype]
1011710148
shape = remote_tensor.shape
1011810149
meta = cls.meta_with_dtype_and_shape(dtype, shape)
10119-
lazy = cls(meta=meta, args=(remote_tensor,), func=lambda r: torch.frombuffer(r.data(), dtype=dtype).reshape(shape))
10150+
lazy = cls(meta=meta, args=(remote_tensor,), func=lambda r: torch.from_numpy(byteswap_tensor(np.frombuffer(r.data(), dtype=numpy_dtype), numpy_dtype)).view(dtype).reshape(shape))
1012010151
return cast(torch.Tensor, lazy)
1012110152

1012210153
@classmethod

gguf-py/gguf/gguf_writer.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import shutil
66
import struct
7+
import sys
78
import tempfile
89
from dataclasses import dataclass
910
from enum import Enum, auto
@@ -372,8 +373,10 @@ def add_tensor(
372373
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
373374
raw_dtype: GGMLQuantizationType | None = None,
374375
) -> None:
375-
if self.endianess == GGUFEndian.BIG:
376-
tensor.byteswap(inplace=True)
376+
if (self.endianess == GGUFEndian.BIG and sys.byteorder != 'big') or \
377+
(self.endianess == GGUFEndian.LITTLE and sys.byteorder != 'little'):
378+
# Don't byteswap inplace since lazy copies cannot handle it
379+
tensor = tensor.byteswap(inplace=False)
377380
if self.use_temp_file and self.temp_file is None:
378381
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
379382
fp.seek(0)
@@ -399,8 +402,10 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
399402
raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
400403
assert self.fout is not None
401404

402-
if self.endianess == GGUFEndian.BIG:
403-
tensor.byteswap(inplace=True)
405+
if (self.endianess == GGUFEndian.BIG and sys.byteorder != 'big') or \
406+
(self.endianess == GGUFEndian.LITTLE and sys.byteorder != 'little'):
407+
# Don't byteswap inplace since lazy copies cannot handle it
408+
tensor = tensor.byteswap(inplace=False)
404409

405410
file_id = -1
406411
for i, tensors in enumerate(self.tensors):

0 commit comments

Comments
 (0)