Skip to content

Commit 70a3f30

Browse files
metascroyfacebook-github-bot
authored andcommitted
Temp fix to unblock diff train
Summary: Temp fix to unblock diff train Reviewed By: lucylq Differential Revision: D75966594
1 parent 7d0e954 commit 70a3f30

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

backends/vulkan/_passes/int4_weight_only_quantizer.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,28 @@
77
import torch
88
import torch.nn.functional as F
99

10-
from torchao.quantization.GPTQ.GPTQ import _check_linear_int4_k
1110
from torchao.quantization.unified import Quantizer
1211
from torchao.quantization.utils import groupwise_affine_quantize_tensor
1312

13+
# TODO: import from from torchao.quantization.GPTQ.GPTQ import _check_linear_int4_k
14+
# Once diff train catches up
15+
def _check_linear_int4_k(k, group_size=1, inner_k_tiles=None):
16+
"""
17+
Check if the dimensions are compatible with int4 quantization.
18+
19+
Args:
20+
k: The dimension size to check
21+
group_size: The group size for quantization
22+
inner_k_tiles: The inner k tiles size
23+
24+
Returns:
25+
bool: Whether the dimensions are compatible
26+
"""
27+
k_divisible_by_group_size = k % group_size == 0
28+
if inner_k_tiles is not None:
29+
k_divisible_by_16_times_inner_k_tiles = k % (inner_k_tiles * 16) == 0
30+
return k_divisible_by_group_size and k_divisible_by_16_times_inner_k_tiles
31+
return k_divisible_by_group_size
1432

1533
# This module is copied from torchao.quantization.GPTQ.WeightOnlyInt4Linear with
1634
# changes at the annotated lines.

0 commit comments

Comments
 (0)