@@ -242,7 +242,6 @@ def create_fp8_map(signed=True, exponent_bits=5, precision_bits=2, total_bits=8)
242242 assert e + p == total_bits - has_sign
243243 # the exponent is biased to 2^(e-1) -1 == 0
244244 evalues = []
245- pvalues = []
246245 for i , val in enumerate (range (- (2 ** (exponent_bits - has_sign )), 2 ** (exponent_bits - has_sign ), 1 )):
247246 evalues .append (2 ** val )
248247
@@ -1365,8 +1364,6 @@ def optimizer_update_8bit_blockwise(
13651364 gnorm_scale : float = 1.0 ,
13661365 skip_zeros = False ,
13671366) -> None :
1368- optim_func = None
1369-
13701367 is_on_gpu ([p , g , state1 , state2 , qmap1 , qmap2 , absmax1 , absmax2 ])
13711368
13721369 torch .ops .bitsandbytes .optimizer_update_8bit_blockwise (
@@ -2116,7 +2113,7 @@ def spmm_coo(
21162113 assert cooA .values .numel () == nnz
21172114 assert cooA .cols == B .shape [0 ]
21182115
2119- transposed_B = False if B .is_contiguous () else True
2116+ transposed_B = not B .is_contiguous ()
21202117
21212118 ldb = B .stride ()[(1 if transposed_B else 0 )]
21222119 ldc = B .shape [1 ]
@@ -2165,12 +2162,7 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
21652162 assert cooA .values .numel () == nnz
21662163 assert cooA .cols == B .shape [0 ], f"{ cooA .cols } vs { B .shape } "
21672164
2168- transposed_B = False if B .is_contiguous () else True
2169-
2170- ldb = B .stride ()[(1 if transposed_B else 0 )]
2171- ldc = B .shape [1 ]
2172-
2173- values , counts = torch .unique (cooA .rowidx , return_counts = True )
2165+ _ , counts = torch .unique (cooA .rowidx , return_counts = True )
21742166 offset = counts .cumsum (0 ).int ()
21752167 max_count , max_idx = torch .sort (counts , descending = True )
21762168 max_idx = max_idx .int ()
@@ -2190,11 +2182,8 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
21902182 cnnz_rows = ct .c_int32 (counts .numel ())
21912183 cnnz = ct .c_int32 (cooA .nnz )
21922184 crowsA = ct .c_int32 (cooA .rows )
2193- ccolsA = ct .c_int32 (cooA .cols )
21942185 crowsB = ct .c_int32 (B .shape [1 ])
21952186 ccolsB = ct .c_int32 (B .shape [1 ])
2196- cldb = ct .c_int32 (ldb )
2197- cldc = ct .c_int32 (ldc )
21982187
21992188 with _cuda_device_of (B ):
22002189 is_on_gpu ([cooA .rowidx , cooA .colidx , cooA .values , B , out , dequant_stats ])
0 commit comments