@@ -242,7 +242,6 @@ def create_fp8_map(signed=True, exponent_bits=5, precision_bits=2, total_bits=8)
242242 assert e + p == total_bits - has_sign
243243 # the exponent is biased to 2^(e-1) -1 == 0
244244 evalues = []
245- pvalues = []
246245 for i , val in enumerate (range (- (2 ** (exponent_bits - has_sign )), 2 ** (exponent_bits - has_sign ), 1 )):
247246 evalues .append (2 ** val )
248247
@@ -1388,8 +1387,6 @@ def optimizer_update_8bit_blockwise(
13881387 gnorm_scale : float = 1.0 ,
13891388 skip_zeros = False ,
13901389) -> None :
1391- optim_func = None
1392-
13931390 is_on_gpu ([p , g , state1 , state2 , qmap1 , qmap2 , absmax1 , absmax2 ])
13941391
13951392 torch .ops .bitsandbytes .optimizer_update_8bit_blockwise (
@@ -2139,7 +2136,7 @@ def spmm_coo(
21392136 assert cooA .values .numel () == nnz
21402137 assert cooA .cols == B .shape [0 ]
21412138
2142- transposed_B = False if B .is_contiguous () else True
2139+ transposed_B = not B .is_contiguous ()
21432140
21442141 ldb = B .stride ()[(1 if transposed_B else 0 )]
21452142 ldc = B .shape [1 ]
@@ -2188,12 +2185,7 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
21882185 assert cooA .values .numel () == nnz
21892186 assert cooA .cols == B .shape [0 ], f"{ cooA .cols } vs { B .shape } "
21902187
2191- transposed_B = False if B .is_contiguous () else True
2192-
2193- ldb = B .stride ()[(1 if transposed_B else 0 )]
2194- ldc = B .shape [1 ]
2195-
2196- values , counts = torch .unique (cooA .rowidx , return_counts = True )
2188+ _ , counts = torch .unique (cooA .rowidx , return_counts = True )
21972189 offset = counts .cumsum (0 ).int ()
21982190 max_count , max_idx = torch .sort (counts , descending = True )
21992191 max_idx = max_idx .int ()
@@ -2213,11 +2205,8 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
22132205 cnnz_rows = ct .c_int32 (counts .numel ())
22142206 cnnz = ct .c_int32 (cooA .nnz )
22152207 crowsA = ct .c_int32 (cooA .rows )
2216- ccolsA = ct .c_int32 (cooA .cols )
22172208 crowsB = ct .c_int32 (B .shape [1 ])
22182209 ccolsB = ct .c_int32 (B .shape [1 ])
2219- cldb = ct .c_int32 (ldb )
2220- cldc = ct .c_int32 (ldc )
22212210
22222211 with _cuda_device_of (B ):
22232212 is_on_gpu ([cooA .rowidx , cooA .colidx , cooA .values , B , out , dequant_stats ])
0 commit comments