@@ -23,7 +23,7 @@ def dequantize_tensor(tensor, dtype=None, dequant_dtype=None):
2323 return dequantize (tensor .data , qtype , oshape , dtype = dequant_dtype ).to (dtype )
2424 else :
2525 # this is incredibly slow
26- tqdm .write (f"Falling back to numpy dequant for qtype: { qtype } " )
26+ tqdm .write (f"Falling back to numpy dequant for qtype: { getattr ( qtype , 'name' , repr ( qtype )) } " )
2727 new = gguf .quants .dequantize (tensor .cpu ().numpy (), qtype )
2828 return torch .from_numpy (new ).to (tensor .device , dtype = dtype )
2929
@@ -48,6 +48,10 @@ def to_uint32(x):
4848 x = x .view (torch .uint8 ).to (torch .int32 )
4949 return (x [:, 0 ] | x [:, 1 ] << 8 | x [:, 2 ] << 16 | x [:, 3 ] << 24 ).unsqueeze (1 )
5050
51+ def to_uint16 (x ):
52+ x = x .view (torch .uint8 ).to (torch .int32 )
53+ return (x [:, 0 ] | x [:, 1 ] << 8 ).unsqueeze (1 )
54+
5155def split_block_dims (blocks , * args ):
5256 n_max = blocks .shape [1 ]
5357 dims = list (args ) + [n_max - sum (args )]
@@ -233,6 +237,53 @@ def dequantize_blocks_Q2_K(blocks, block_size, type_size, dtype=None):
233237
234238 return qs .reshape ((n_blocks , - 1 ))
235239
240+ # IQ quants
241+ KVALUES = torch .tensor ([- 127 , - 104 , - 83 , - 65 , - 49 , - 35 , - 22 , - 10 , 1 , 13 , 25 , 38 , 53 , 69 , 89 , 113 ], dtype = torch .int8 )
242+
243+ def dequantize_blocks_IQ4_NL (blocks , block_size , type_size , dtype = None ):
244+ n_blocks = blocks .shape [0 ]
245+
246+ d , qs = split_block_dims (blocks , 2 )
247+ d = d .view (torch .float16 ).to (dtype )
248+
249+ qs = qs .reshape ((n_blocks , - 1 , 1 , block_size // 2 )) >> torch .tensor ([0 , 4 ], device = d .device , dtype = torch .uint8 ).reshape ((1 , 1 , 2 , 1 ))
250+ qs = (qs & 0x0F ).reshape ((n_blocks , - 1 , 1 )).to (torch .int32 )
251+
252+ kvalues = KVALUES .to (qs .device ).expand (* qs .shape [:- 1 ], 16 )
253+ qs = torch .gather (kvalues , dim = - 1 , index = qs ).reshape ((n_blocks , - 1 ))
254+ del kvalues # should still be view, but just to be safe
255+
256+ return (d * qs )
257+
258+ def dequantize_blocks_IQ4_XS (blocks , block_size , type_size , dtype = None ):
259+ n_blocks = blocks .shape [0 ]
260+ d , scales_h , scales_l , qs = split_block_dims (blocks , 2 , 2 , QK_K // 64 )
261+ d = d .view (torch .float16 ).to (dtype )
262+ scales_h = to_uint16 (scales_h )
263+
264+ shift_a = torch .tensor ([0 , 4 ], device = d .device , dtype = torch .uint8 ).reshape ((1 , 1 , 2 ))
265+ shift_b = torch .tensor ([2 * i for i in range (QK_K // 32 )], device = d .device , dtype = torch .uint8 ).reshape ((1 , - 1 , 1 ))
266+
267+ scales_l = scales_l .reshape ((n_blocks , - 1 , 1 )) >> shift_a .reshape ((1 , 1 , 2 ))
268+ scales_h = scales_h .reshape ((n_blocks , - 1 , 1 )) >> shift_b .reshape ((1 , - 1 , 1 ))
269+
270+ scales_l = scales_l .reshape ((n_blocks , - 1 )) & 0x0F
271+ scales_h = scales_h .reshape ((n_blocks , - 1 )).to (torch .uint8 ) & 0x03
272+
273+ scales = (scales_l | (scales_h << 4 )).to (torch .int8 ) - 32
274+ dl = (d * scales .to (dtype )).reshape ((n_blocks , - 1 , 1 ))
275+
276+ qs = qs .reshape ((n_blocks , - 1 , 1 , 16 )) >> shift_a .reshape ((1 , 1 , 2 , 1 ))
277+ qs = qs .reshape ((n_blocks , - 1 , 32 , 1 )) & 0x0F
278+
279+ kvalues = KVALUES .to (qs .device ).expand (* qs .shape [:- 1 ], 16 )
280+ qs = torch .gather (kvalues , dim = - 1 , index = qs .to (torch .int32 )).reshape ((n_blocks , - 1 , 32 ))
281+ del kvalues # see IQ4_NL
282+ del shift_a
283+ del shift_b
284+
285+ return (dl * qs ).reshape ((n_blocks , - 1 ))
286+
236287dequantize_functions = {
237288 gguf .GGMLQuantizationType .BF16 : dequantize_blocks_BF16 ,
238289 gguf .GGMLQuantizationType .Q8_0 : dequantize_blocks_Q8_0 ,
@@ -245,4 +296,6 @@ def dequantize_blocks_Q2_K(blocks, block_size, type_size, dtype=None):
245296 gguf .GGMLQuantizationType .Q4_K : dequantize_blocks_Q4_K ,
246297 gguf .GGMLQuantizationType .Q3_K : dequantize_blocks_Q3_K ,
247298 gguf .GGMLQuantizationType .Q2_K : dequantize_blocks_Q2_K ,
299+ gguf .GGMLQuantizationType .IQ4_NL : dequantize_blocks_IQ4_NL ,
300+ gguf .GGMLQuantizationType .IQ4_XS : dequantize_blocks_IQ4_XS ,
248301}
0 commit comments