We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d3658c5 commit d180d8eCopy full SHA for d180d8e
bitsandbytes/backends/cpu_xpu_common.py
@@ -369,8 +369,9 @@ def quantize_4bit_impl(
369
out_uint8[abs_scaled_A > key] = val
370
out_uint8 += sign.to(torch.uint8) * 8
371
elif quant_type == "int8":
372
- for i in range(len(INT8_QUANT_TABLE)):
373
- out_uint8[scaled_A > INT8_QUANT_TABLE[i]] = i
+ map = torch.tensor(INT8_QUANT_TABLE, device=scaled_A.device)
+ diff = torch.abs(scaled_A.unsqueeze(-1) - map)
374
+ out_uint8 = torch.argmin(diff, dim=-1).to(torch.uint8).to(scaled_A.device)
375
376
if quant_type == "int8":
377
out = out_uint8
0 commit comments