File tree Expand file tree Collapse file tree 1 file changed +5
-13
lines changed
examples/apple/coreml/llama Expand file tree Collapse file tree 1 file changed +5
-13
lines changed Original file line number Diff line number Diff line change 1919 replace_linear_with_split_linear ,
2020)
2121from executorch .examples .models .llama .source_transformation .quantize import (
22- EmbeddingQuantHandler ,
22+ get_quant_embedding_transform ,
2323)
2424
2525from executorch .exir .backend .utils import format_delegated_graph
@@ -116,18 +116,10 @@ def main() -> None:
116116 ] # dtype for model/inputs
117117
118118 if export_args .embedding_quantize :
119- bitwidth , group_size = export_args .embedding_quantize .split ("," )
120- if group_size == "none" or group_size == "None" or group_size == "0" :
121- group_size = None
122- else :
123- group_size = int (group_size )
124- bitwidth = int (bitwidth )
125- model = EmbeddingQuantHandler (
126- model ,
127- bitwidth = bitwidth ,
128- group_size = group_size ,
129- packed = (bitwidth in [2 , 4 ]),
130- ).quantized_model ()
119+ quantize_embedding = get_quant_embedding_transform (
120+ export_args .embedding_quantize
121+ )
122+ quantize_embedding (model )
131123
132124 if export_args .target_split_size is not None :
133125 replace_linear_with_split_linear (
You can’t perform that action at this time.
0 commit comments