File tree Expand file tree Collapse file tree 4 files changed +42
-9
lines changed Expand file tree Collapse file tree 4 files changed +42
-9
lines changed Original file line number Diff line number Diff line change @@ -152,11 +152,14 @@ def kernel(self):
152
152
and self .quantization_mode == "gptq"
153
153
):
154
154
return self .quantized_kernel
155
+ kernel = self ._kernel
156
+ if self .quantization_mode == "int4" :
157
+ kernel = quantizers .unpack_int4 (kernel , self ._orig_input_dim )
155
158
if self .lora_enabled :
156
- return self ._kernel + (
157
- self .lora_alpha / self .lora_rank
158
- ) * ops . matmul ( self . lora_kernel_a , self . lora_kernel_b )
159
- return self . _kernel
159
+ return kernel + ( self .lora_alpha / self . lora_rank ) * ops . matmul (
160
+ self .lora_kernel_a , self .lora_kernel_b
161
+ )
162
+ return kernel
160
163
161
164
def call (self , inputs , training = None ):
162
165
x = ops .matmul (inputs , self .kernel )
Original file line number Diff line number Diff line change 11
11
from keras .src import models
12
12
from keras .src import ops
13
13
from keras .src import optimizers
14
+ from keras .src import quantizers
14
15
from keras .src import random
15
16
from keras .src import saving
16
17
from keras .src import testing
@@ -976,3 +977,13 @@ def test_gptq_serialization(self):
976
977
new_layer = layers .Dense .from_config (config )
977
978
new_layer .build ((None , 8 ))
978
979
self .assertEqual (new_layer .quantization_mode , "gptq" )
980
+
981
+ def test_int4_kernel_returns_unpacked_form (self ):
982
+ """Test that the `kernel` property returns the unpacked int4 kernel."""
983
+ layer = layers .Dense (units = 2 )
984
+ layer .build ((None , 2 ))
985
+ layer .quantize ("int4" )
986
+ packed_kernel = layer ._kernel
987
+ self .assertAllClose (
988
+ layer .kernel , quantizers .unpack_int4 (packed_kernel , 2 )
989
+ )
Original file line number Diff line number Diff line change @@ -214,11 +214,16 @@ def kernel(self):
214
214
and self .quantization_mode == "gptq"
215
215
):
216
216
return self .quantized_kernel
217
+ kernel = self ._kernel
218
+ if self .quantization_mode == "int4" :
219
+ kernel = quantizers .unpack_int4 (
220
+ kernel , self ._orig_length_along_pack_axis , self ._int4_pack_axis
221
+ )
217
222
if self .lora_enabled :
218
- return self ._kernel + (
219
- self .lora_alpha / self .lora_rank
220
- ) * ops . matmul ( self . lora_kernel_a , self . lora_kernel_b )
221
- return self . _kernel
223
+ return kernel + ( self .lora_alpha / self . lora_rank ) * ops . matmul (
224
+ self .lora_kernel_a , self .lora_kernel_b
225
+ )
226
+ return kernel
222
227
223
228
def compute_output_shape (self , _ ):
224
229
return self .full_output_shape
Original file line number Diff line number Diff line change 11
11
from keras .src import models
12
12
from keras .src import ops
13
13
from keras .src import optimizers
14
+ from keras .src import quantizers
14
15
from keras .src import random
15
16
from keras .src import saving
16
17
from keras .src import testing
@@ -552,7 +553,7 @@ def test_quantize(self, quantization_mode):
552
553
"btd,df->btf" ,
553
554
(None , 4 ),
554
555
(1 , 2 , 4 ),
555
- 2e -3 ,
556
+ 3e -3 ,
556
557
),
557
558
)
558
559
def test_quantize_with_specific_equations (
@@ -1036,3 +1037,16 @@ def test_gptq_serialization(self):
1036
1037
new_layer = layers .EinsumDense .from_config (config )
1037
1038
new_layer .build ((None , 3 ))
1038
1039
self .assertEqual (new_layer .quantization_mode , "gptq" )
1040
+
1041
+ def test_int4_kernel_returns_unpacked_form (self ):
1042
+ """Test that the `kernel` property returns the unpacked int4 kernel."""
1043
+ layer = layers .EinsumDense (
1044
+ equation = "ab,bc->ac" ,
1045
+ output_shape = (2 ,),
1046
+ )
1047
+ layer .build ((None , 2 ))
1048
+ layer .quantize ("int4" )
1049
+ packed_kernel = layer ._kernel
1050
+ self .assertAllClose (
1051
+ layer .kernel , quantizers .unpack_int4 (packed_kernel , 2 )
1052
+ )
You can’t perform that action at this time.
0 commit comments