19
19
logger = logging .getLogger ("gguf-convert-endian" )
20
20
21
21
22
+ def byteswap_q4_0 (tensor , block_offs ):
23
+ # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
24
+
25
+ # Byte-Swap f16 sized delta field
26
+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
27
+ delta .byteswap (inplace = True )
28
+
29
+
30
+ def byteswap_q8_0 (tensor , block_offs ):
31
+ # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
32
+
33
+ # Byte-Swap f16 sized delta field
34
+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
35
+ delta .byteswap (inplace = True )
36
+
37
+
38
+ def byteswap_q4_k (tensor , block_offs ):
39
+ # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
40
+
41
+ # Byte-Swap f16 sized fields
42
+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
43
+ delta .byteswap (inplace = True )
44
+
45
+ delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
46
+ delta .byteswap (inplace = True )
47
+
48
+
49
+ def byteswap_q6_k (tensor , block_offs ):
50
+ # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
51
+
52
+ # Byte-Swap f16 sized field
53
+ delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
54
+ delta .byteswap (inplace = True )
55
+
56
+
57
+ byteswap_tensors = {
58
+ gguf .GGMLQuantizationType .Q4_0 : {
59
+ "block_size" : 18 , # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
60
+ "byteswap_func" : byteswap_q4_0 ,
61
+ },
62
+ gguf .GGMLQuantizationType .Q8_0 : {
63
+ "block_size" : 34 , # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
64
+ "byteswap_func" : byteswap_q8_0 ,
65
+ },
66
+ gguf .GGMLQuantizationType .Q4_K : {
67
+ "block_size" : 144 , # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant>
68
+ "byteswap_func" : byteswap_q4_k ,
69
+ },
70
+ gguf .GGMLQuantizationType .Q6_K : {
71
+ "block_size" : 210 , # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant>
72
+ "byteswap_func" : byteswap_q6_k ,
73
+ },
74
+ }
75
+
76
+
22
77
def convert_byteorder (reader : gguf .GGUFReader , args : argparse .Namespace ) -> None :
23
78
file_endian = reader .endianess .name
24
79
if reader .byte_order == 'S' :
@@ -32,13 +87,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
32
87
sys .exit (0 )
33
88
logger .info ("* Checking tensors for conversion compatibility" )
34
89
for tensor in reader .tensors :
35
- if tensor .tensor_type not in (
36
- gguf .GGMLQuantizationType .F32 ,
37
- gguf .GGMLQuantizationType .F16 ,
38
- gguf .GGMLQuantizationType .Q8_0 ,
39
- gguf .GGMLQuantizationType .Q4_K ,
40
- gguf .GGMLQuantizationType .Q6_K ,
41
- ):
90
+ if tensor .tensor_type not in byteswap_tensors and \
91
+ tensor .tensor_type not in (
92
+ gguf .GGMLQuantizationType .F32 ,
93
+ gguf .GGMLQuantizationType .F16 ,
94
+ ):
42
95
raise ValueError (f"Cannot handle type { tensor .tensor_type .name } for tensor { repr (tensor .name )} " )
43
96
logger .info (f"* Preparing to convert from { file_endian } to { order } " )
44
97
if args .dry_run :
@@ -72,78 +125,29 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
72
125
part .byteswap (inplace = True )
73
126
74
127
# Byte-swap tensor data if necessary
75
- if tensor .tensor_type == gguf .GGMLQuantizationType .Q8_0 :
76
- # Handle Q8_0 tensor blocks (block_q8_0)
77
- # Specific handling of block_q8_0 is required.
78
- # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
79
-
80
- block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
81
-
82
- n_blocks = len (tensor .data ) // block_size
83
- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
84
- block_offs = block_num * block_size
85
-
86
- # Byte-Swap f16 sized delta field
87
- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
88
- delta .byteswap (inplace = True )
89
-
90
- # Byte-Swap Q8 weights
91
- if block_num % 100000 == 0 :
92
- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
93
-
94
- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q4_K :
95
- # Handle Q4_K tensor blocks (block_q4_k)
96
- # Specific handling of block_q4_k is required.
97
- # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
98
-
128
+ if tensor .tensor_type in byteswap_tensors :
99
129
# first flatten structure
130
+ oldshape = tensor .data .shape
100
131
newshape = 1
101
132
for i in tensor .data .shape :
102
133
newshape *= i
103
134
104
135
tensor .data .resize (newshape )
105
136
106
- block_size = 144
107
- n_blocks = len (tensor .data ) // block_size
108
- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
109
- block_offs = block_num * block_size
110
-
111
- # Byte-Swap f16 sized fields
112
- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
113
- delta .byteswap (inplace = True )
114
-
115
- delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
116
- delta .byteswap (inplace = True )
117
-
118
- # Byte-Swap
119
- if block_num % 100000 == 0 :
120
- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
121
-
122
- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q6_K :
123
- # Handle Q6_K tensor blocks (block_q6_k)
124
- # Specific handling of block_q6_k is required.
125
- # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
126
-
127
- # first flatten structure
128
- newshape = 1
129
- for i in tensor .data .shape :
130
- newshape *= i
131
-
132
- tensor .data .resize (newshape )
137
+ block_size = byteswap_tensors [tensor .tensor_type ]["block_size" ]
138
+ byteswap_func = byteswap_tensors [tensor .tensor_type ]["byteswap_func" ]
133
139
134
- block_size = 210
135
140
n_blocks = len (tensor .data ) // block_size
136
141
for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
137
142
block_offs = block_num * block_size
138
143
139
- # Byte-Swap f16 sized field
140
- delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
141
- delta .byteswap (inplace = True )
144
+ byteswap_func (tensor , block_offs )
142
145
143
- # Byte-Swap
144
146
if block_num % 100000 == 0 :
145
147
inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
146
148
149
+ # restore old shape in case it's ever used
150
+ tensor .data .resize (oldshape )
147
151
else :
148
152
# Handle other tensor types
149
153
tensor .data .byteswap (inplace = True )
0 commit comments