@@ -428,13 +428,16 @@ def dequantize_blocks_Q2_K(blocks, block_size, type_size, dtype=None):
428428def  dequantize_blocks_BF16 (blocks , block_size , type_size , dtype = None ):
429429    return  (blocks .view (torch .int16 ).to (torch .int32 ) <<  16 ).view (torch .float32 )
430430
431+ 
431432# this part from calcuis (gguf.org) 
432433# more info: https://github.com/calcuis/gguf-connector/blob/main/src/gguf_connector/quant2c.py 
433434
435+ 
434436def  dequantize_blocks_IQ4_NL (blocks , block_size , type_size , dtype = None ):
435437    kvalues  =  torch .tensor (
436438        [- 127 , - 104 , - 83 , - 65 , - 49 , - 35 , - 22 , - 10 , 1 , 13 , 25 , 38 , 53 , 69 , 89 , 113 ],
437-         dtype = torch .float32 , device = blocks .device 
439+         dtype = torch .float32 ,
440+         device = blocks .device ,
438441    )
439442    n_blocks  =  blocks .shape [0 ]
440443    d , qs  =  split_block_dims (blocks , 2 )
@@ -449,19 +452,23 @@ def dequantize_blocks_IQ4_NL(blocks, block_size, type_size, dtype=None):
449452    qs  =  qs .squeeze (- 1 ).to (dtype )
450453    return  d  *  qs 
451454
455+ 
452456def  dequantize_blocks_IQ4_XS (blocks , block_size , type_size , dtype = None ):
453457    kvalues  =  torch .tensor (
454458        [- 127 , - 104 , - 83 , - 65 , - 49 , - 35 , - 22 , - 10 , 1 , 13 , 25 , 38 , 53 , 69 , 89 , 113 ],
455-         dtype = torch .float32 , device = blocks .device 
459+         dtype = torch .float32 ,
460+         device = blocks .device ,
456461    )
457462    n_blocks  =  blocks .shape [0 ]
458463    d , scales_h , scales_l , qs  =  split_block_dims (blocks , 2 , 2 , QK_K  //  64 )
459464    d  =  d .view (torch .float16 ).to (dtype )
460465    scales_h  =  scales_h .view (torch .int16 )
461466    scales_l  =  scales_l .reshape ((n_blocks , - 1 , 1 )) >>  torch .tensor (
462-         [0 , 4 ], device = blocks .device , dtype = torch .uint8 ).reshape ((1 , 1 , 2 ))
467+         [0 , 4 ], device = blocks .device , dtype = torch .uint8 
468+     ).reshape ((1 , 1 , 2 ))
463469    scales_h  =  scales_h .reshape ((n_blocks , 1 , - 1 )) >>  torch .tensor (
464-         [2  *  i  for  i  in  range (QK_K  //  32 )], device = blocks .device , dtype = torch .uint8 ).reshape ((1 , - 1 , 1 ))
470+         [2  *  i  for  i  in  range (QK_K  //  32 )], device = blocks .device , dtype = torch .uint8 
471+     ).reshape ((1 , - 1 , 1 ))
465472    scales_l  =  scales_l .reshape ((n_blocks , - 1 )) &  0x0F 
466473    scales_h  =  scales_h .reshape ((n_blocks , - 1 )) &  0x03 
467474    scales  =  (scales_l  |  (scales_h  <<  4 )) -  32 
@@ -475,6 +482,7 @@ def dequantize_blocks_IQ4_XS(blocks, block_size, type_size, dtype=None):
475482    qs  =  qs .squeeze (- 1 ).to (dtype )
476483    return  (dl  *  qs ).reshape (n_blocks , - 1 )
477484
485+ 
478486GGML_QUANT_SIZES  =  gguf .GGML_QUANT_SIZES 
479487dequantize_functions  =  {
480488    gguf .GGMLQuantizationType .IQ4_NL : dequantize_blocks_IQ4_NL ,
0 commit comments