Skip to content

Commit 3ed07ff

Browse files
Make byteswapping more universal
Now little-endian systems can load big-endian models too. Model endianness heuristic is based on guessing using model version field. Additional fixes for testsuite after removing capability to write non-native endian files.
1 parent f1a4b69 commit 3ed07ff

File tree

7 files changed

+132
-90
lines changed

7 files changed

+132
-90
lines changed

examples/gguf-split/gguf-split.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,14 +328,20 @@ struct split_strategy {
328328
const char * t_name = gguf_get_tensor_name(ctx_out, i);
329329
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
330330
auto n_bytes = ggml_nbytes(t);
331+
auto n_elements = ggml_nelements(t) / ggml_blck_size(t->type);
331332
read_buf.resize(n_bytes);
332333

333334
// calculate offset
334335
auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
335336
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
336337

338+
ggml_byteswap_t byteswap_func = nullptr;
339+
if (gguf_needs_byteswap(ctx_gguf)) {
340+
byteswap_func = ggml_get_type_traits(t->type)->byteswap;
341+
}
342+
337343
// copy tensor from input to output file
338-
copy_file_to_file(f_input, fout, offset, n_bytes);
344+
copy_file_to_file(f_input, fout, offset, n_bytes, n_elements, byteswap_func);
339345
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
340346
}
341347

@@ -346,13 +352,18 @@ struct split_strategy {
346352
}
347353
}
348354

349-
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
355+
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len, const size_t elements, ggml_byteswap_t byteswap_func) {
350356
// TODO: detect OS and use copy_file_range() here for better performance
351357
if (read_buf.size() < len) {
352358
read_buf.resize(len);
353359
}
354360
f_in.seekg(in_offset);
355361
f_in.read((char *)read_buf.data(), len);
362+
363+
if (byteswap_func != nullptr) {
364+
byteswap_func(read_buf.data(), elements);
365+
}
366+
356367
f_out.write((const char *)read_buf.data(), len);
357368
}
358369
};
@@ -541,6 +552,13 @@ static void gguf_merge(const split_params & split_params) {
541552
f_input.seekg(offset);
542553
f_input.read((char *)read_data.data(), n_bytes);
543554

555+
if (gguf_needs_byteswap(ctx_gguf)) {
556+
auto byteswap = ggml_get_type_traits(t->type)->byteswap;
557+
if (byteswap != nullptr) {
558+
byteswap(read_data.data(), ggml_nelements(t) / ggml_blck_size(t->type));
559+
}
560+
}
561+
544562
// write tensor data + padding
545563
fout.write((const char *)read_data.data(), n_bytes);
546564
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);

ggml/include/gguf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@ extern "C" {
197197
// writes the meta data to pointer "data"
198198
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
199199

200+
// returns true if gguf file needs byteswapping when reading. byteswapping for writing not implemented
201+
GGML_API bool gguf_needs_byteswap(const struct gguf_context * ctx);
202+
200203
#ifdef __cplusplus
201204
}
202205
#endif

ggml/src/ggml-impl.h

Lines changed: 17 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@
2929
#endif
3030

3131
#if defined(__gnu_linux__)
32-
#include <endian.h>
32+
#include <byteswap.h>
3333
#else // defined(__gnu_linux__)
34-
#define le64toh(x) (x)
35-
#define le32toh(x) (x)
36-
#define le16toh(x) (x)
34+
#define bswap_16(x) (x)
35+
#define bswap_32(x) (x)
36+
#define bswap_64(x) (x)
3737
#endif // defined(__gnu_linux__)
3838

3939
#ifdef __cplusplus
@@ -562,68 +562,46 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
562562
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
563563

564564
// endianness conversion
565-
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
566-
#define ggml_convert_from_le16(x) GGML_UNUSED(x)
567-
#define ggml_convert_from_le32(x) GGML_UNUSED(x)
568-
#define ggml_convert_from_le64(x) GGML_UNUSED(x)
569-
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
570-
static inline void ggml_convert_from_le16(void * value) {
571-
*((uint16_t*)value) = le16toh(*((uint16_t*)value));
565+
static inline void ggml_bswap16(void * value) {
566+
*((uint16_t*)value) = bswap_16(*((uint16_t*)value));
572567
}
573568

574-
static inline void ggml_convert_from_le32(void * value) {
575-
*((uint32_t*)value) = le32toh(*((uint32_t*)value));
569+
static inline void ggml_bswap32(void * value) {
570+
*((uint32_t*)value) = bswap_32(*((uint32_t*)value));
576571
}
577572

578-
static inline void ggml_convert_from_le64(void * value) {
579-
*((uint64_t*)value) = le64toh(*((uint64_t*)value));
573+
static inline void ggml_bswap64(void * value) {
574+
*((uint64_t*)value) = bswap_64(*((uint64_t*)value));
580575
}
581-
#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
582-
#error Unexpected or undefined __BYTE_ORDER__
583-
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
584-
585-
#define ggml_convert_to_le16(x) ggml_convert_from_le16(x)
586-
#define ggml_convert_to_le32(x) ggml_convert_from_le32(x)
587-
#define ggml_convert_to_le64(x) ggml_convert_from_le64(x)
588576

589577
#ifdef __cplusplus
590578
}
591579
#endif
592580

593581
#ifdef __cplusplus
594582
#include <vector>
595-
596-
// endianness conversion
597-
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
598-
#define ggml_convert_from_le(x) GGML_UNUSED(x)
599-
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
600583
#include <type_traits>
601584

602585
template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
603-
static inline void ggml_convert_from_le(T * value)
586+
static inline void ggml_bswap(T * value)
604587
{
605588
GGML_UNUSED(value);
606589
}
607590

608591
template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
609-
static inline void ggml_convert_from_le(T * value) {
610-
ggml_convert_from_le16(value);
592+
static inline void ggml_bswap(T * value) {
593+
ggml_bswap16(value);
611594
}
612595

613596
template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
614-
static inline void ggml_convert_from_le(T * value) {
615-
ggml_convert_from_le32(value);
597+
static inline void ggml_bswap(T * value) {
598+
ggml_bswap32(value);
616599
}
617600

618601
template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
619-
static inline void ggml_convert_from_le(T * value) {
620-
ggml_convert_from_le64(value);
602+
static inline void ggml_bswap(T * value) {
603+
ggml_bswap64(value);
621604
}
622-
#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
623-
#error Unexpected or undefined __BYTE_ORDER__
624-
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
625-
626-
#define ggml_convert_to_le(x) ggml_convert_from_le(x)
627605

628606
// expose GGUF internals for test code
629607
GGML_API size_t gguf_type_size(enum gguf_type type);

ggml/src/ggml.c

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6564,179 +6564,179 @@ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, cons
65646564
static void ggml_byteswap_i16(void * restrict buffer, size_t elements) {
65656565
uint16_t *data_ptr = (uint16_t*) buffer;
65666566
for (size_t i = 0; i < elements; ++i) {
6567-
ggml_convert_from_le16(data_ptr + i);
6567+
ggml_bswap16(data_ptr + i);
65686568
}
65696569
}
65706570

65716571
static void ggml_byteswap_i32(void * restrict buffer, size_t elements) {
65726572
uint32_t *data_ptr = (uint32_t*) buffer;
65736573
for (size_t i = 0; i < elements; ++i) {
6574-
ggml_convert_from_le32(data_ptr + i);
6574+
ggml_bswap32(data_ptr + i);
65756575
}
65766576
}
65776577

65786578
static void ggml_byteswap_i64(void * restrict buffer, size_t elements) {
65796579
uint64_t *data_ptr = (uint64_t*) buffer;
65806580
for (size_t i = 0; i < elements; ++i) {
6581-
ggml_convert_from_le64(data_ptr + i);
6581+
ggml_bswap64(data_ptr + i);
65826582
}
65836583
}
65846584

65856585
static void ggml_byteswap_q4_0(void * restrict buffer, size_t elements) {
65866586
block_q4_0 *data_ptr = (block_q4_0*) buffer;
65876587
for (size_t i = 0; i < elements; ++i) {
6588-
ggml_convert_from_le16(&(data_ptr[i].d));
6588+
ggml_bswap16(&(data_ptr[i].d));
65896589
}
65906590
}
65916591

65926592
static void ggml_byteswap_q4_1(void * restrict buffer, size_t elements) {
65936593
block_q4_1 *data_ptr = (block_q4_1*) buffer;
65946594
for (size_t i = 0; i < elements; ++i) {
6595-
ggml_convert_from_le16(&(data_ptr[i].d));
6596-
ggml_convert_from_le16(&(data_ptr[i].m));
6595+
ggml_bswap16(&(data_ptr[i].d));
6596+
ggml_bswap16(&(data_ptr[i].m));
65976597
}
65986598
}
65996599

66006600
static void ggml_byteswap_q5_0(void * restrict buffer, size_t elements) {
66016601
block_q5_0 *data_ptr = (block_q5_0*) buffer;
66026602
for (size_t i = 0; i < elements; ++i) {
6603-
ggml_convert_from_le16(&(data_ptr[i].d));
6603+
ggml_bswap16(&(data_ptr[i].d));
66046604
}
66056605
}
66066606

66076607
static void ggml_byteswap_q5_1(void * restrict buffer, size_t elements) {
66086608
block_q5_1 *data_ptr = (block_q5_1*) buffer;
66096609
for (size_t i = 0; i < elements; ++i) {
6610-
ggml_convert_from_le16(&(data_ptr[i].d));
6611-
ggml_convert_from_le16(&(data_ptr[i].m));
6610+
ggml_bswap16(&(data_ptr[i].d));
6611+
ggml_bswap16(&(data_ptr[i].m));
66126612
}
66136613
}
66146614

66156615
static void ggml_byteswap_q8_0(void * restrict buffer, size_t elements) {
66166616
block_q8_0 *data_ptr = (block_q8_0*) buffer;
66176617
for (size_t i = 0; i < elements; ++i) {
6618-
ggml_convert_from_le16(&(data_ptr[i].d));
6618+
ggml_bswap16(&(data_ptr[i].d));
66196619
}
66206620
}
66216621

66226622
static void ggml_byteswap_q8_1(void * restrict buffer, size_t elements) {
66236623
block_q8_1 *data_ptr = (block_q8_1*) buffer;
66246624
for (size_t i = 0; i < elements; ++i) {
6625-
ggml_convert_from_le16(&(data_ptr[i].d));
6626-
ggml_convert_from_le16(&(data_ptr[i].s));
6625+
ggml_bswap16(&(data_ptr[i].d));
6626+
ggml_bswap16(&(data_ptr[i].s));
66276627
}
66286628
}
66296629

66306630
static void ggml_byteswap_q2_k(void * restrict buffer, size_t elements) {
66316631
block_q2_K *data_ptr = (block_q2_K*) buffer;
66326632
for (size_t i = 0; i < elements; ++i) {
6633-
ggml_convert_from_le16(&(data_ptr[i].d));
6634-
ggml_convert_from_le16(&(data_ptr[i].dmin));
6633+
ggml_bswap16(&(data_ptr[i].d));
6634+
ggml_bswap16(&(data_ptr[i].dmin));
66356635
}
66366636
}
66376637

66386638
static void ggml_byteswap_q3_k(void * restrict buffer, size_t elements) {
66396639
block_q3_K *data_ptr = (block_q3_K*) buffer;
66406640
for (size_t i = 0; i < elements; ++i) {
6641-
ggml_convert_from_le16(&(data_ptr[i].d));
6641+
ggml_bswap16(&(data_ptr[i].d));
66426642
}
66436643
}
66446644

66456645
static void ggml_byteswap_q4_k(void * restrict buffer, size_t elements) {
66466646
block_q4_K *data_ptr = (block_q4_K*) buffer;
66476647
for (size_t i = 0; i < elements; ++i) {
6648-
ggml_convert_from_le16(&(data_ptr[i].d));
6649-
ggml_convert_from_le16(&(data_ptr[i].dmin));
6648+
ggml_bswap16(&(data_ptr[i].d));
6649+
ggml_bswap16(&(data_ptr[i].dmin));
66506650
}
66516651
}
66526652

66536653
static void ggml_byteswap_q5_k(void * restrict buffer, size_t elements) {
66546654
block_q5_K *data_ptr = (block_q5_K*) buffer;
66556655
for (size_t i = 0; i < elements; ++i) {
6656-
ggml_convert_from_le16(&(data_ptr[i].d));
6657-
ggml_convert_from_le16(&(data_ptr[i].dmin));
6656+
ggml_bswap16(&(data_ptr[i].d));
6657+
ggml_bswap16(&(data_ptr[i].dmin));
66586658
}
66596659
}
66606660

66616661
static void ggml_byteswap_q6_k(void * restrict buffer, size_t elements) {
66626662
block_q6_K *data_ptr = (block_q6_K*) buffer;
66636663
for (size_t i = 0; i < elements; ++i) {
6664-
ggml_convert_from_le16(&(data_ptr[i].d));
6664+
ggml_bswap16(&(data_ptr[i].d));
66656665
}
66666666
}
66676667

66686668
static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) {
66696669
block_iq2_xxs *data_ptr = (block_iq2_xxs*) buffer;
66706670
for (size_t i = 0; i < elements; ++i) {
6671-
ggml_convert_from_le16(&(data_ptr[i].d));
6671+
ggml_bswap16(&(data_ptr[i].d));
66726672
for (size_t j = 0; j < QK_K/8; ++j) {
6673-
ggml_convert_from_le16(&(data_ptr[i].qs[j]));
6673+
ggml_bswap16(&(data_ptr[i].qs[j]));
66746674
}
66756675
}
66766676
}
66776677

66786678
static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) {
66796679
block_iq2_xs *data_ptr = (block_iq2_xs*) buffer;
66806680
for (size_t i = 0; i < elements; ++i) {
6681-
ggml_convert_from_le16(&(data_ptr[i].d));
6681+
ggml_bswap16(&(data_ptr[i].d));
66826682
for (size_t j = 0; j < QK_K/8; ++j) {
6683-
ggml_convert_from_le16(&(data_ptr[i].qs[j]));
6683+
ggml_bswap16(&(data_ptr[i].qs[j]));
66846684
}
66856685
}
66866686
}
66876687

66886688
static void ggml_byteswap_iq3_xxs(void * restrict buffer, size_t elements) {
66896689
block_iq3_xxs *data_ptr = (block_iq3_xxs*) buffer;
66906690
for (size_t i = 0; i < elements; ++i) {
6691-
ggml_convert_from_le16(&(data_ptr[i].d));
6691+
ggml_bswap16(&(data_ptr[i].d));
66926692
}
66936693
}
66946694

66956695
static void ggml_byteswap_iq3_s(void * restrict buffer, size_t elements) {
66966696
block_iq3_s *data_ptr = (block_iq3_s*) buffer;
66976697
for (size_t i = 0; i < elements; ++i) {
6698-
ggml_convert_from_le16(&(data_ptr[i].d));
6698+
ggml_bswap16(&(data_ptr[i].d));
66996699
}
67006700
}
67016701

67026702
static void ggml_byteswap_iq2_s(void * restrict buffer, size_t elements) {
67036703
block_iq2_s *data_ptr = (block_iq2_s*) buffer;
67046704
for (size_t i = 0; i < elements; ++i) {
6705-
ggml_convert_from_le16(&(data_ptr[i].d));
6705+
ggml_bswap16(&(data_ptr[i].d));
67066706
}
67076707
}
67086708

67096709
static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) {
67106710
block_iq1_s *data_ptr = (block_iq1_s*) buffer;
67116711
for (size_t i = 0; i < elements; ++i) {
6712-
ggml_convert_from_le16(&(data_ptr[i].d));
6712+
ggml_bswap16(&(data_ptr[i].d));
67136713
for (size_t j = 0; j < QK_K/32; ++j) {
6714-
ggml_convert_from_le16(&(data_ptr[i].qh[j]));
6714+
ggml_bswap16(&(data_ptr[i].qh[j]));
67156715
}
67166716
}
67176717
}
67186718

67196719
static void ggml_byteswap_iq4_nl(void * restrict buffer, size_t elements) {
67206720
block_iq4_nl *data_ptr = (block_iq4_nl*) buffer;
67216721
for (size_t i = 0; i < elements; ++i) {
6722-
ggml_convert_from_le16(&(data_ptr[i].d));
6722+
ggml_bswap16(&(data_ptr[i].d));
67236723
}
67246724
}
67256725

67266726
static void ggml_byteswap_iq4_xs(void * restrict buffer, size_t elements) {
67276727
block_iq4_xs *data_ptr = (block_iq4_xs*) buffer;
67286728
for (size_t i = 0; i < elements; ++i) {
6729-
ggml_convert_from_le16(&(data_ptr[i].d));
6730-
ggml_convert_from_le16(&(data_ptr[i].scales_h));
6729+
ggml_bswap16(&(data_ptr[i].d));
6730+
ggml_bswap16(&(data_ptr[i].scales_h));
67316731
}
67326732
}
67336733

67346734
static void ggml_byteswap_q8_k(void * restrict buffer, size_t elements) {
67356735
block_q8_K *data_ptr = (block_q8_K*) buffer;
67366736
for (size_t i = 0; i < elements; ++i) {
6737-
ggml_convert_from_le32(&(data_ptr[i].d));
6737+
ggml_bswap32(&(data_ptr[i].d));
67386738
for (size_t j = 0; j < QK_K/16; ++j) {
6739-
ggml_convert_from_le16(&(data_ptr[i].bsums[j]));
6739+
ggml_bswap16(&(data_ptr[i].bsums[j]));
67406740
}
67416741
}
67426742
}
@@ -6762,13 +6762,13 @@ static void ggml_byteswap_q4_0_8x8(void * restrict buffer, size_t elements) {
67626762
static void ggml_byteswap_tq1_0(void * restrict buffer, size_t elements) {
67636763
block_tq1_0 *data_ptr = (block_tq1_0*) buffer;
67646764
for (size_t i = 0; i < elements; ++i) {
6765-
ggml_convert_from_le16(&(data_ptr[i].d));
6765+
ggml_bswap16(&(data_ptr[i].d));
67666766
}
67676767
}
67686768

67696769
static void ggml_byteswap_tq2_0(void * restrict buffer, size_t elements) {
67706770
block_tq2_0 *data_ptr = (block_tq2_0*) buffer;
67716771
for (size_t i = 0; i < elements; ++i) {
6772-
ggml_convert_from_le16(&(data_ptr[i].d));
6772+
ggml_bswap16(&(data_ptr[i].d));
67736773
}
67746774
}

0 commit comments

Comments
 (0)