@@ -43,6 +43,8 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
4343 gguf .GGMLQuantizationType .F32 ,
4444 gguf .GGMLQuantizationType .F16 ,
4545 gguf .GGMLQuantizationType .Q8_0 ,
46+ gguf .GGMLQuantizationType .Q4_K ,
47+ gguf .GGMLQuantizationType .Q6_K ,
4648 ):
4749 raise ValueError (f"Cannot handle type { tensor .tensor_type .name } for tensor { repr (tensor .name )} " )
4850 logger .info (f"* Preparing to convert from { file_endian .upper ()} to { order .upper ()} " )
@@ -96,6 +98,59 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
9698 if block_num % 100000 == 0 :
9799 inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
98100
101+ elif tensor .tensor_type == gguf .GGMLQuantizationType .Q4_K :
102+ # Handle Q4_K tensor blocks (block_q4_k)
103+ # Specific handling of block_q4_k is required.
104+ # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
105+
106+ # first flatten structure
107+ newshape = 1
108+ for i in tensor .data .shape :
109+ newshape *= i
110+
111+ tensor .data .resize (newshape )
112+
113+ block_size = 144
114+ n_blocks = len (tensor .data ) // block_size
115+ for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
116+ block_offs = block_num * block_size
117+
118+ # Byte-Swap f16 sized fields
119+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
120+ delta .byteswap (inplace = True )
121+
122+ delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
123+ delta .byteswap (inplace = True )
124+
125+ # Byte-Swap
126+ if block_num % 100000 == 0 :
127+ inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
128+
129+ elif tensor .tensor_type == gguf .GGMLQuantizationType .Q6_K :
130+ # Handle Q6_K tensor blocks (block_q6_k)
131+ # Specific handling of block_q6_k is required.
132+ # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
133+
134+ # first flatten structure
135+ newshape = 1
136+ for i in tensor .data .shape :
137+ newshape *= i
138+
139+ tensor .data .resize (newshape )
140+
141+ block_size = 210
142+ n_blocks = len (tensor .data ) // block_size
143+ for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
144+ block_offs = block_num * block_size
145+
146+ # Byte-Swap f16 sized field
147+ delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
148+ delta .byteswap (inplace = True )
149+
150+ # Byte-Swap
151+ if block_num % 100000 == 0 :
152+ inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
153+
99154 else :
100155 # Handle other tensor types
101156 tensor .data .byteswap (inplace = True )
0 commit comments