Skip to content

Commit 7adc79c

Browse files
gguf-py : add support for endian conversion of BF16 data (ggml-org#16594)
BF16 requires special handling in this script while it's a 2-bytes data, but view is 1-byte by default. Switch to correct view before attempting byteswapping. With this change correctly byteswapping models like Meta-Llama-3-8B-Instruct-bf16-GGUF should be possible.
1 parent 466c191 commit 7adc79c

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

gguf-py/gguf/scripts/gguf_convert_endian.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
9191
tensor.tensor_type not in (
9292
gguf.GGMLQuantizationType.F32,
9393
gguf.GGMLQuantizationType.F16,
94+
gguf.GGMLQuantizationType.BF16,
9495
):
9596
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
9697
logger.info(f"* Preparing to convert from {file_endian} to {order}")
@@ -148,6 +149,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
148149

149150
# restore old shape in case it's ever used
150151
tensor.data.resize(oldshape)
152+
elif tensor.tensor_type == gguf.GGMLQuantizationType.BF16:
153+
# Special case for BF16
154+
# It is 2-bytes data, but by default view loads it as 1-byte data.
155+
# Change to correct view before byteswapping.
156+
tensor.data.view(dtype=np.uint16).byteswap(inplace=True)
151157
else:
152158
# Handle other tensor types
153159
tensor.data.byteswap(inplace=True)

0 commit comments

Comments
 (0)