@@ -40,15 +40,19 @@ class IntxUnpackedTensor(TorchAOBaseTensor):
40
40
This format is inteded for torch.export use cases.
41
41
42
42
Tensor Attributes:
43
- _data: int data for
44
- scale: (K/group_size, N) for 2D Tensor, (B, N, K/group_size) for 3D Tensor, where B is batch size,
45
- dtype is the same as the original Tensor dtype
46
- zero_point: (K/group_size, N) for 2D Tensor, (B, N, K/group_size) for 3D Tensor, where B is batch size,
47
- dtype is the same as the original Tensor dtype
43
+ int_data: int data for quantization.
44
+ dtype is int8
45
+ Shape is the same as original Tensor: (n, k) for 2D tensor
46
+ scale: block scales for quantization
47
+ dtype is the same as the original Tensor dtype.
48
+ Shape is (n // block_size[0], k // block_size[1]) for 2D tensor
49
+ zero_point: block zero points for quantization
50
+ dtype is the same as the original Tensor dtype or int8
51
+ Shape is (n // block_size[0], k // block_size[1]) for 2D tensor
48
52
49
53
Non-Tensor Attributes:
54
+ bit_width: the bit width for quantization (can be 1 - 8)
50
55
block_size: the block size for quantization, representing the granularity, for example groupwise quantization will have block_size (1, group_size)
51
- shape: the shape of the original Tensor
52
56
"""
53
57
54
58
tensor_data_attrs = ["int_data" , "scale" , "zero_point" ]
0 commit comments