1919logger = logging .getLogger ("gguf-convert-endian" )
2020
2121
22+ def byteswap_q4_0 (tensor , block_offs ):
23+ # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
24+
25+ # Byte-Swap f16 sized delta field
26+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
27+ delta .byteswap (inplace = True )
28+
29+
30+ def byteswap_q8_0 (tensor , block_offs ):
31+ # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
32+
33+ # Byte-Swap f16 sized delta field
34+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
35+ delta .byteswap (inplace = True )
36+
37+
38+ def byteswap_q4_k (tensor , block_offs ):
39+ # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
40+
41+ # Byte-Swap f16 sized fields
42+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
43+ delta .byteswap (inplace = True )
44+
45+ delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
46+ delta .byteswap (inplace = True )
47+
48+
49+ def byteswap_q6_k (tensor , block_offs ):
50+ # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
51+
52+ # Byte-Swap f16 sized field
53+ delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
54+ delta .byteswap (inplace = True )
55+
56+
57+ byteswap_tensors = {
58+ gguf .GGMLQuantizationType .Q4_0 : {
59+ "block_size" : 18 , # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
60+ "byteswap_func" : byteswap_q4_0 ,
61+ },
62+ gguf .GGMLQuantizationType .Q8_0 : {
63+ "block_size" : 34 , # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
64+ "byteswap_func" : byteswap_q8_0 ,
65+ },
66+ gguf .GGMLQuantizationType .Q4_K : {
67+ "block_size" : 144 , # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant>
68+ "byteswap_func" : byteswap_q4_k ,
69+ },
70+ gguf .GGMLQuantizationType .Q6_K : {
71+ "block_size" : 210 , # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant>
72+ "byteswap_func" : byteswap_q6_k ,
73+ },
74+ }
75+
76+
2277def convert_byteorder (reader : gguf .GGUFReader , args : argparse .Namespace ) -> None :
2378 file_endian = reader .endianess .name
2479 if reader .byte_order == 'S' :
@@ -32,13 +87,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
3287 sys .exit (0 )
3388 logger .info ("* Checking tensors for conversion compatibility" )
3489 for tensor in reader .tensors :
35- if tensor .tensor_type not in (
36- gguf .GGMLQuantizationType .F32 ,
37- gguf .GGMLQuantizationType .F16 ,
38- gguf .GGMLQuantizationType .Q8_0 ,
39- gguf .GGMLQuantizationType .Q4_K ,
40- gguf .GGMLQuantizationType .Q6_K ,
41- ):
90+ if tensor .tensor_type not in byteswap_tensors and \
91+ tensor .tensor_type not in (
92+ gguf .GGMLQuantizationType .F32 ,
93+ gguf .GGMLQuantizationType .F16 ,
94+ ):
4295 raise ValueError (f"Cannot handle type { tensor .tensor_type .name } for tensor { repr (tensor .name )} " )
4396 logger .info (f"* Preparing to convert from { file_endian } to { order } " )
4497 if args .dry_run :
@@ -72,78 +125,29 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
72125 part .byteswap (inplace = True )
73126
74127 # Byte-swap tensor data if necessary
75- if tensor .tensor_type == gguf .GGMLQuantizationType .Q8_0 :
76- # Handle Q8_0 tensor blocks (block_q8_0)
77- # Specific handling of block_q8_0 is required.
78- # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
79-
80- block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
81-
82- n_blocks = len (tensor .data ) // block_size
83- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
84- block_offs = block_num * block_size
85-
86- # Byte-Swap f16 sized delta field
87- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
88- delta .byteswap (inplace = True )
89-
90- # Byte-Swap Q8 weights
91- if block_num % 100000 == 0 :
92- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
93-
94- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q4_K :
95- # Handle Q4_K tensor blocks (block_q4_k)
96- # Specific handling of block_q4_k is required.
97- # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
98-
128+ if tensor .tensor_type in byteswap_tensors :
99129 # first flatten structure
130+ oldshape = tensor .data .shape
100131 newshape = 1
101132 for i in tensor .data .shape :
102133 newshape *= i
103134
104135 tensor .data .resize (newshape )
105136
106- block_size = 144
107- n_blocks = len (tensor .data ) // block_size
108- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
109- block_offs = block_num * block_size
110-
111- # Byte-Swap f16 sized fields
112- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
113- delta .byteswap (inplace = True )
114-
115- delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
116- delta .byteswap (inplace = True )
117-
118- # Byte-Swap
119- if block_num % 100000 == 0 :
120- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
121-
122- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q6_K :
123- # Handle Q6_K tensor blocks (block_q6_k)
124- # Specific handling of block_q6_k is required.
125- # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
126-
127- # first flatten structure
128- newshape = 1
129- for i in tensor .data .shape :
130- newshape *= i
131-
132- tensor .data .resize (newshape )
137+ block_size = byteswap_tensors [tensor .tensor_type ]["block_size" ]
138+ byteswap_func = byteswap_tensors [tensor .tensor_type ]["byteswap_func" ]
133139
134- block_size = 210
135140 n_blocks = len (tensor .data ) // block_size
136141 for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
137142 block_offs = block_num * block_size
138143
139- # Byte-Swap f16 sized field
140- delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
141- delta .byteswap (inplace = True )
144+ byteswap_func (tensor , block_offs )
142145
143- # Byte-Swap
144146 if block_num % 100000 == 0 :
145147 inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
146148
149+ # restore old shape in case it's ever used
150+ tensor .data .resize (oldshape )
147151 else :
148152 # Handle other tensor types
149153 tensor .data .byteswap (inplace = True )
0 commit comments