@@ -35,6 +35,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
3535 if tensor .tensor_type not in (
3636 gguf .GGMLQuantizationType .F32 ,
3737 gguf .GGMLQuantizationType .F16 ,
38+ gguf .GGMLQuantizationType .Q4_0 ,
3839 gguf .GGMLQuantizationType .Q8_0 ,
3940 gguf .GGMLQuantizationType .Q4_K ,
4041 gguf .GGMLQuantizationType .Q6_K ,
@@ -72,11 +73,48 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
7273 part .byteswap (inplace = True )
7374
7475 # Byte-swap tensor data if necessary
75- if tensor .tensor_type == gguf .GGMLQuantizationType .Q8_0 :
76+ if tensor .tensor_type == gguf .GGMLQuantizationType .Q4_0 :
77+ # Handle Q4_0 tensor blocks (block_q4_0)
78+ # Specific handling of block_q4_0 is required.
79+ # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
80+
81+ # first flatten structure
82+ oldshape = tensor .data .shape
83+ newshape = 1
84+ for i in tensor .data .shape :
85+ newshape *= i
86+
87+ tensor .data .resize (newshape )
88+
89+ block_size = 18 # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
90+
91+ n_blocks = len (tensor .data ) // block_size
92+ for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
93+ block_offs = block_num * block_size
94+
95+ # Byte-Swap f16 sized delta field
96+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
97+ delta .byteswap (inplace = True )
98+
99+ # Byte-Swap Q8 weights
100+ if block_num % 100000 == 0 :
101+ inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
102+
103+ # restore old shape in case it's ever used
104+ tensor .data .resize (oldshape )
105+ elif tensor .tensor_type == gguf .GGMLQuantizationType .Q8_0 :
76106 # Handle Q8_0 tensor blocks (block_q8_0)
77107 # Specific handling of block_q8_0 is required.
78108 # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
79109
110+ # first flatten structure
111+ oldshape = tensor .data .shape
112+ newshape = 1
113+ for i in tensor .data .shape :
114+ newshape *= i
115+
116+ tensor .data .resize (newshape )
117+
80118 block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
81119
82120 n_blocks = len (tensor .data ) // block_size
@@ -91,12 +129,15 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
91129 if block_num % 100000 == 0 :
92130 inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
93131
132+ # restore old shape in case it's ever used
133+ tensor .data .resize (oldshape )
94134 elif tensor .tensor_type == gguf .GGMLQuantizationType .Q4_K :
95135 # Handle Q4_K tensor blocks (block_q4_k)
96136 # Specific handling of block_q4_k is required.
97137 # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
98138
99139 # first flatten structure
140+ oldshape = tensor .data .shape
100141 newshape = 1
101142 for i in tensor .data .shape :
102143 newshape *= i
@@ -119,12 +160,15 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
119160 if block_num % 100000 == 0 :
120161 inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
121162
163+ # restore old shape in case it's ever used
164+ tensor .data .resize (oldshape )
122165 elif tensor .tensor_type == gguf .GGMLQuantizationType .Q6_K :
123166 # Handle Q6_K tensor blocks (block_q6_k)
124167 # Specific handling of block_q6_k is required.
125168 # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
126169
127170 # first flatten structure
171+ oldshape = tensor .data .shape
128172 newshape = 1
129173 for i in tensor .data .shape :
130174 newshape *= i
@@ -144,6 +188,8 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
144188 if block_num % 100000 == 0 :
145189 inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
146190
191+ # restore old shape in case it's ever used
192+ tensor .data .resize (oldshape )
147193 else :
148194 # Handle other tensor types
149195 tensor .data .byteswap (inplace = True )
0 commit comments