Skip to content

Commit ff85fce

Browse files
tarun292facebook-github-bot
authored andcommitted
Move get_quantizer_and_quant_params to quantizer_lib (#11056)
Summary: Move get_quantizer_and_quant_params to quantizer_lib to extensions/ so that it's easier to create recipes Reviewed By: larryliu0820 Differential Revision: D75179679
1 parent 14c3b31 commit ff85fce

File tree

4 files changed

+30
-40
lines changed

4 files changed

+30
-40
lines changed

examples/models/llama/eval_llama_lib.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,9 @@
1212
import torch
1313

1414
from datasets import load_dataset
15-
from executorch.examples.models.llama.export_llama_lib import (
16-
get_quantizer_and_quant_params,
17-
)
1815

1916
from executorch.extension.llm.export.builder import LLMEdgeManager
17+
from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
2018
from lm_eval.evaluator import simple_evaluate
2119
from pytorch_tokenizers import get_tokenizer
2220
from pytorch_tokenizers.llama2c import Llama2cTokenizer as SentencePieceTokenizer

examples/models/llama/export_llama_lib.py

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,7 @@
4343
get_xnnpack_partitioner,
4444
)
4545

46-
from executorch.extension.llm.export.quantizer_lib import (
47-
get_coreml_quantizer,
48-
get_pt2e_quantization_params,
49-
get_pt2e_quantizers,
50-
get_qnn_quantizer,
51-
get_vulkan_quantizer,
52-
)
46+
from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
5347
from executorch.util.activation_memory_profiler import generate_memory_trace
5448

5549
from ..model_factory import EagerModelFactory
@@ -726,32 +720,6 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:
726720
return edge_manager
727721

728722

729-
def get_quantizer_and_quant_params(args):
730-
pt2e_quant_params = get_pt2e_quantization_params(
731-
args.pt2e_quantize, args.quantization_mode
732-
)
733-
quantizers = get_pt2e_quantizers(pt2e_quant_params, args.so_library)
734-
quant_dtype = None
735-
if args.qnn and args.pt2e_quantize:
736-
assert len(quantizers) == 0, "Should not enable both xnnpack and qnn"
737-
qnn_quantizer, quant_dtype = get_qnn_quantizer(
738-
args.pt2e_quantize, args.quantization_mode
739-
)
740-
quantizers.append(qnn_quantizer)
741-
if args.coreml and args.pt2e_quantize:
742-
assert len(quantizers) == 0, "Should not enable both xnnpack / qnn and coreml"
743-
coreml_quantizer = get_coreml_quantizer(args.pt2e_quantize)
744-
quantizers.append(coreml_quantizer)
745-
if args.vulkan and args.pt2e_quantize:
746-
assert (
747-
len(quantizers) == 0
748-
), "Should not enable both vulkan and other quantizers"
749-
vulkan_quantizer = get_vulkan_quantizer(args.pt2e_quantize)
750-
quantizers.append(vulkan_quantizer)
751-
logging.info(f"Applying quantizers: {quantizers}")
752-
return pt2e_quant_params, quantizers, quant_dtype
753-
754-
755723
def _qmode_type(value):
756724
choices = ["int8", "8da4w", "8da4w-gptq", "vulkan_4w"]
757725
patterns = [r"torchao:8da(\d+)w", r"torchao:fpa(\d+)w"]

examples/models/llava/export_llava.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@
1616
get_symmetric_quantization_config,
1717
XNNPACKQuantizer,
1818
)
19-
from executorch.examples.models.llama.export_llama_lib import (
20-
build_args_parser,
21-
get_quantizer_and_quant_params,
22-
)
19+
from executorch.examples.models.llama.export_llama_lib import build_args_parser
2320
from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
2421
replace_kv_cache_with_custom_kv_cache,
2522
)
@@ -44,6 +41,7 @@
4441
HintBasedSymShapeEvalPass,
4542
)
4643
from executorch.extension.llm.export.builder import DType, LLMEdgeManager
44+
from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
4745
from executorch.util.activation_memory_profiler import generate_memory_trace
4846
from pytorch_tokenizers.llama2c import Llama2cTokenizer as Tokenizer
4947
from torch.export import Dim

extension/llm/export/quantizer_lib.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,3 +277,29 @@ def get_vulkan_quantizer(pt2e_quantize: str):
277277

278278
quantizer = VulkanQuantizer().set_global(config)
279279
return quantizer
280+
281+
282+
def get_quantizer_and_quant_params(args):
283+
pt2e_quant_params = get_pt2e_quantization_params(
284+
args.pt2e_quantize, args.quantization_mode
285+
)
286+
quantizers = get_pt2e_quantizers(pt2e_quant_params, args.so_library)
287+
quant_dtype = None
288+
if args.qnn and args.pt2e_quantize:
289+
assert len(quantizers) == 0, "Should not enable both xnnpack and qnn"
290+
qnn_quantizer, quant_dtype = get_qnn_quantizer(
291+
args.pt2e_quantize, args.quantization_mode
292+
)
293+
quantizers.append(qnn_quantizer)
294+
if args.coreml and args.pt2e_quantize:
295+
assert len(quantizers) == 0, "Should not enable both xnnpack / qnn and coreml"
296+
coreml_quantizer = get_coreml_quantizer(args.pt2e_quantize)
297+
quantizers.append(coreml_quantizer)
298+
if args.vulkan and args.pt2e_quantize:
299+
assert (
300+
len(quantizers) == 0
301+
), "Should not enable both vulkan and other quantizers"
302+
vulkan_quantizer = get_vulkan_quantizer(args.pt2e_quantize)
303+
quantizers.append(vulkan_quantizer)
304+
logging.info(f"Applying quantizers: {quantizers}")
305+
return pt2e_quant_params, quantizers, quant_dtype

0 commit comments

Comments
 (0)