1919logger  =  logging .getLogger ("gguf-convert-endian" )
2020
2121
22+ def  byteswap_q4_0 (tensor , block_offs ):
23+     # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations. 
24+ 
25+     # Byte-Swap f16 sized delta field 
26+     delta  =  tensor .data [block_offs :block_offs  +  2 ].view (dtype = np .uint16 )
27+     delta .byteswap (inplace = True )
28+ 
29+ 
30+ def  byteswap_q8_0 (tensor , block_offs ):
31+     # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations. 
32+ 
33+     # Byte-Swap f16 sized delta field 
34+     delta  =  tensor .data [block_offs :block_offs  +  2 ].view (dtype = np .uint16 )
35+     delta .byteswap (inplace = True )
36+ 
37+ 
38+ def  byteswap_q4_k (tensor , block_offs ):
39+     # Each block_q4_k consists of 2 f16 values followed by 140 int8 values. 
40+ 
41+     # Byte-Swap f16 sized fields 
42+     delta  =  tensor .data [block_offs :block_offs  +  2 ].view (dtype = np .uint16 )
43+     delta .byteswap (inplace = True )
44+ 
45+     delta  =  tensor .data [block_offs  +  2 :block_offs  +  4 ].view (dtype = np .uint16 )
46+     delta .byteswap (inplace = True )
47+ 
48+ 
49+ def  byteswap_q6_k (tensor , block_offs ):
50+     # Each block_q6_k consists of 208 int8 values followed by 1 f16 value. 
51+ 
52+     # Byte-Swap f16 sized field 
53+     delta  =  tensor .data [block_offs  +  208 :block_offs  +  210 ].view (dtype = np .uint16 )
54+     delta .byteswap (inplace = True )
55+ 
56+ 
57+ byteswap_tensors  =  {
58+     gguf .GGMLQuantizationType .Q4_0 : {
59+         "block_size" : 18 , # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant> 
60+         "byteswap_func" : byteswap_q4_0 ,
61+     },
62+     gguf .GGMLQuantizationType .Q8_0 : {
63+         "block_size" : 34 , # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant> 
64+         "byteswap_func" : byteswap_q8_0 ,
65+     },
66+     gguf .GGMLQuantizationType .Q4_K : {
67+         "block_size" : 144 , # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant> 
68+         "byteswap_func" : byteswap_q4_k ,
69+     },
70+     gguf .GGMLQuantizationType .Q6_K : {
71+         "block_size" : 210 , # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant> 
72+         "byteswap_func" : byteswap_q6_k ,
73+     },
74+ }
75+ 
76+ 
2277def  convert_byteorder (reader : gguf .GGUFReader , args : argparse .Namespace ) ->  None :
2378    file_endian  =  reader .endianess .name 
2479    if  reader .byte_order  ==  'S' :
@@ -32,13 +87,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
3287        sys .exit (0 )
3388    logger .info ("* Checking tensors for conversion compatibility" )
3489    for  tensor  in  reader .tensors :
35-         if  tensor .tensor_type  not  in 
36-             gguf .GGMLQuantizationType .F32 ,
37-             gguf .GGMLQuantizationType .F16 ,
38-             gguf .GGMLQuantizationType .Q8_0 ,
39-             gguf .GGMLQuantizationType .Q4_K ,
40-             gguf .GGMLQuantizationType .Q6_K ,
41-         ):
90+         if  tensor .tensor_type  not  in byteswap_tensors  and  \
91+            tensor .tensor_type  not  in 
92+                 gguf .GGMLQuantizationType .F32 ,
93+                 gguf .GGMLQuantizationType .F16 ,
94+            ):
4295            raise  ValueError (f"Cannot handle type { tensor .tensor_type .name } { repr (tensor .name )}  )
4396    logger .info (f"* Preparing to convert from { file_endian } { order }  )
4497    if  args .dry_run :
@@ -72,78 +125,29 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
72125            part .byteswap (inplace = True )
73126
74127        # Byte-swap tensor data if necessary 
75-         if  tensor .tensor_type  ==  gguf .GGMLQuantizationType .Q8_0 :
76-             # Handle Q8_0 tensor blocks (block_q8_0) 
77-             # Specific handling of block_q8_0 is required. 
78-             # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations. 
79- 
80-             block_size  =  34  # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant> 
81- 
82-             n_blocks  =  len (tensor .data ) //  block_size 
83-             for  block_num  in  (inner_pbar  :=  tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
84-                 block_offs  =  block_num  *  block_size 
85- 
86-                 # Byte-Swap f16 sized delta field 
87-                 delta  =  tensor .data [block_offs :block_offs  +  2 ].view (dtype = np .uint16 )
88-                 delta .byteswap (inplace = True )
89- 
90-                 # Byte-Swap Q8 weights 
91-                 if  block_num  %  100000  ==  0 :
92-                     inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks  -  block_num ) //  n_blocks }  )
93- 
94-         elif  tensor .tensor_type  ==  gguf .GGMLQuantizationType .Q4_K :
95-             # Handle Q4_K tensor blocks (block_q4_k) 
96-             # Specific handling of block_q4_k is required. 
97-             # Each block_q4_k consists of 2 f16 values followed by 140 int8 values. 
98- 
128+         if  tensor .tensor_type  in  byteswap_tensors :
99129            # first flatten structure 
130+             oldshape  =  tensor .data .shape 
100131            newshape  =  1 
101132            for  i  in  tensor .data .shape :
102133                newshape  *=  i 
103134
104135            tensor .data .resize (newshape )
105136
106-             block_size  =  144 
107-             n_blocks  =  len (tensor .data ) //  block_size 
108-             for  block_num  in  (inner_pbar  :=  tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
109-                 block_offs  =  block_num  *  block_size 
110- 
111-                 # Byte-Swap f16 sized fields 
112-                 delta  =  tensor .data [block_offs :block_offs  +  2 ].view (dtype = np .uint16 )
113-                 delta .byteswap (inplace = True )
114- 
115-                 delta  =  tensor .data [block_offs  +  2 :block_offs  +  4 ].view (dtype = np .uint16 )
116-                 delta .byteswap (inplace = True )
117- 
118-                 # Byte-Swap 
119-                 if  block_num  %  100000  ==  0 :
120-                     inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks  -  block_num ) //  n_blocks }  )
121- 
122-         elif  tensor .tensor_type  ==  gguf .GGMLQuantizationType .Q6_K :
123-             # Handle Q6_K tensor blocks (block_q6_k) 
124-             # Specific handling of block_q6_k is required. 
125-             # Each block_q6_k consists of 208 int8 values followed by 1 f16 value. 
126- 
127-             # first flatten structure 
128-             newshape  =  1 
129-             for  i  in  tensor .data .shape :
130-                 newshape  *=  i 
131- 
132-             tensor .data .resize (newshape )
137+             block_size     =  byteswap_tensors [tensor .tensor_type ]["block_size" ]
138+             byteswap_func  =  byteswap_tensors [tensor .tensor_type ]["byteswap_func" ]
133139
134-             block_size  =  210 
135140            n_blocks  =  len (tensor .data ) //  block_size 
136141            for  block_num  in  (inner_pbar  :=  tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
137142                block_offs  =  block_num  *  block_size 
138143
139-                 # Byte-Swap f16 sized field 
140-                 delta  =  tensor .data [block_offs  +  208 :block_offs  +  210 ].view (dtype = np .uint16 )
141-                 delta .byteswap (inplace = True )
144+                 byteswap_func (tensor , block_offs )
142145
143-                 # Byte-Swap 
144146                if  block_num  %  100000  ==  0 :
145147                    inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks  -  block_num ) //  n_blocks }  )
146148
149+             # restore old shape in case it's ever used 
150+             tensor .data .resize (oldshape )
147151        else :
148152            # Handle other tensor types 
149153            tensor .data .byteswap (inplace = True )
0 commit comments