Skip to content

Commit 2813571

Browse files
Cleanup some deprecated functions.
1 parent 04482ff commit 2813571

File tree

1 file changed

+0
-133
lines changed

1 file changed

+0
-133
lines changed

bitsandbytes/functional.py

Lines changed: 0 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,6 @@ def create_quantile_map(A, total_bits=8):
431431
return q
432432

433433

434-
@deprecated("This function is deprecated and will be removed in a future version.", category=FutureWarning)
435-
def get_special_format_str():
436-
return "row"
437-
438-
439434
def is_on_gpu(tensors: Iterable[Optional[torch.Tensor]]):
440435
"""Verifies that the input tensors are all on the same device.
441436
@@ -472,11 +467,6 @@ def is_on_gpu(tensors: Iterable[Optional[torch.Tensor]]):
472467
return on_gpu
473468

474469

475-
@deprecated("This function is deprecated and will be removed in a future release.", category=FutureWarning)
476-
def get_tensor_stream(tensor: Tensor) -> torch.cuda.Stream:
477-
return torch.cuda.current_stream(tensor.device)
478-
479-
480470
def _get_tensor_stream(tensor: Tensor) -> ct.c_void_p:
481471
# We use the raw stream for performance reasons.
482472
return ct.c_void_p(torch._C._cuda_getCurrentRawStream(tensor.device.index))
@@ -2251,27 +2241,6 @@ def batched_igemm(
22512241
return out
22522242

22532243

2254-
@deprecated(
2255-
"igemmlt is deprecated and will be removed in a future release. Please use int8_linear_matmul instead.",
2256-
category=FutureWarning,
2257-
)
2258-
def igemmlt(
2259-
A: torch.Tensor,
2260-
B: torch.Tensor,
2261-
SA: Tuple[torch.Size, str],
2262-
SB: Tuple[torch.Size, str],
2263-
out: Optional[torch.Tensor] = None,
2264-
Sout: Optional[Tuple[torch.Size, str]] = None,
2265-
dtype=torch.int32,
2266-
):
2267-
if SA is not None and SA[1] != "row":
2268-
raise NotImplementedError(f"Only row-major format inputs are supported, but got format `{SA[1]}`")
2269-
if SB is not None and SB[1] != "row":
2270-
raise NotImplementedError(f"Only row-major format is supported for matrix B, but got format `{SB[1]}`")
2271-
result = int8_linear_matmul(A, B, out=out, dtype=dtype)
2272-
return result, (result.shape, "row")
2273-
2274-
22752244
def int8_linear_matmul(A: torch.Tensor, B: torch.Tensor, out: Optional[torch.Tensor] = None, dtype=torch.int32):
22762245
"""Performs an 8-bit integer matrix multiplication.
22772246
@@ -2316,20 +2285,6 @@ def int8_mm_dequant(
23162285
return torch.ops.bitsandbytes.int8_mm_dequant(A, row_stats, col_stats, out, bias)
23172286

23182287

2319-
@deprecated("mm_dequant is deprecated. Please use int8_mm_dequant() instead.", category=FutureWarning)
2320-
def mm_dequant(
2321-
A: torch.Tensor,
2322-
quant_state: Optional[Tuple[torch.Size, str]], # Not used
2323-
row_stats: torch.Tensor,
2324-
col_stats: torch.Tensor,
2325-
out: Optional[torch.Tensor] = None,
2326-
new_row_stats=None, # Not used
2327-
new_col_stats=None, # Not used
2328-
bias: Optional[torch.Tensor] = None,
2329-
):
2330-
return int8_mm_dequant(A, row_stats, col_stats, out, bias)
2331-
2332-
23332288
def get_colrow_absmax(
23342289
A: torch.Tensor,
23352290
row_stats: Optional[torch.Tensor] = None,
@@ -2505,72 +2460,6 @@ def coo_zeros(rows, cols, nnz, device, dtype=torch.half):
25052460
return COOSparseTensor(rows, cols, nnz, rowidx, colidx, values)
25062461

25072462

2508-
@deprecated("This function is deprecated. Please use `int8_double_quant` instead.", category=FutureWarning)
2509-
def double_quant(
2510-
A: torch.Tensor,
2511-
col_stats: Optional[torch.Tensor] = None,
2512-
row_stats: Optional[torch.Tensor] = None,
2513-
out_col: Optional[torch.Tensor] = None,
2514-
out_row: Optional[torch.Tensor] = None,
2515-
threshold=0.0,
2516-
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[COOSparseTensor]]:
2517-
"""Determine the quantization statistics for input matrix `A` in accordance to the `LLM.int8()` algorithm.
2518-
2519-
The statistics are determined both row-wise and column-wise (transposed).
2520-
2521-
For more information, see the [LLM.int8() paper](https://arxiv.org/abs/2208.07339).
2522-
2523-
<Tip warning={true}>
2524-
This function exists for backwards compatibility only. It is advised to use [`int8_double_quant`] instead.
2525-
The difference is that this function will return a [`COOSparseTensor`] for outliers instead of a column index.
2526-
</Tip>
2527-
2528-
Args:
2529-
A (`torch.Tensor` with dtype `torch.float16`): The input matrix.
2530-
col_stats (`torch.Tensor`, *optional*): A pre-allocated tensor to hold the column-wise quantization scales.
2531-
row_stats (`torch.Tensor`, *optional*): A pre-allocated tensor to hold the row-wise quantization scales.
2532-
out_col (`torch.Tensor`, *optional*): A pre-allocated tensor to hold the column-wise quantized data.
2533-
out_row (`torch.Tensor`, *optional*): A pre-allocated tensor to hold the row-wise quantized data.
2534-
threshold (`float`, *optional*):
2535-
An optional threshold for sparse decomposition of outlier features.
2536-
2537-
No outliers are held back when 0.0. Defaults to 0.0.
2538-
2539-
Returns:
2540-
`Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]`: A tuple containing the quantized tensor and relevant statistics.
2541-
- `torch.Tensor` with dtype `torch.int8`: The row-wise quantized data.
2542-
- `torch.Tensor` with dtype `torch.int8`: The column-wise quantized data.
2543-
- `torch.Tensor` with dtype `torch.float32`: The row-wise quantization scales.
2544-
- `torch.Tensor` with dtype `torch.float32`: The column-wise quantization scales.
2545-
- `COOSparseTensor`, *optional*: A structure representing the outlier values from the input tensor.
2546-
"""
2547-
2548-
coo_tensor = None
2549-
quant_row, quant_col, row_stats, col_stats, outlier_cols = int8_double_quant(
2550-
A,
2551-
col_stats,
2552-
row_stats,
2553-
out_col,
2554-
out_row,
2555-
threshold=threshold,
2556-
)
2557-
2558-
if threshold > 0.0 and outlier_cols is not None:
2559-
# Build a COO tensor including all of the outlier columns.
2560-
outlier_rows = torch.arange(0, A.shape[0], device=A.device, dtype=torch.int32)
2561-
outliers = A[:, outlier_cols]
2562-
coo_tensor = COOSparseTensor(
2563-
A.shape[0],
2564-
A.shape[1],
2565-
outliers.numel(),
2566-
outlier_rows.repeat_interleave(outliers.size(1)),
2567-
outlier_cols.repeat(outliers.size(0)).int(),
2568-
outliers,
2569-
)
2570-
2571-
return quant_row, quant_col, row_stats, col_stats.flatten().float(), coo_tensor
2572-
2573-
25742463
def int8_double_quant(
25752464
A: torch.Tensor,
25762465
col_stats: Optional[torch.Tensor] = None,
@@ -2992,21 +2881,6 @@ def vectorwise_mm_dequant(xq, S1, S2, dtype=torch.half, quant_type="vector"):
29922881
return None
29932882

29942883

2995-
@deprecated("This function is deprecated and will be removed in a future release.", category=FutureWarning)
2996-
def dequant_min_max(xq, A, B, SA, SB, dtype=torch.half):
2997-
offset = B.float().t().sum(0) * (SA[0] + SA[1])
2998-
x = xq.float()
2999-
if len(xq.shape) == 2 and len(SB.shape) == 3:
3000-
SB = SB.squeeze(0)
3001-
if len(SB.shape) == 2:
3002-
x *= SB.t() / 127
3003-
else:
3004-
x *= SB / 127
3005-
x *= SA[1] / 127
3006-
x += offset
3007-
return x.to(dtype)
3008-
3009-
30102884
@deprecated("This function is deprecated and will be removed in a future release.", category=FutureWarning)
30112885
def extract_outliers(A, SA, idx):
30122886
shapeA = SA[0]
@@ -3031,10 +2905,3 @@ def extract_outliers(A, SA, idx):
30312905
post_call(prev_device)
30322906

30332907
return out
3034-
3035-
3036-
@deprecated("This function is deprecated and will be removed in a future release.", category=FutureWarning)
3037-
def pipeline_test(A, batch_size):
3038-
out = torch.zeros_like(A)
3039-
lib.cpipeline_test(get_ptr(A), get_ptr(out), ct.c_size_t(A.numel()), ct.c_size_t(batch_size))
3040-
return out

0 commit comments

Comments
 (0)