diff --git a/backends/apple/coreml/TARGETS b/backends/apple/coreml/TARGETS index 487bb2da4fa..4916a3fa8a9 100644 --- a/backends/apple/coreml/TARGETS +++ b/backends/apple/coreml/TARGETS @@ -58,6 +58,20 @@ runtime.python_library( ], ) +runtime.python_library( + name = "recipes", + srcs = glob([ + "recipes/*.py", + ]), + deps = [ + ":partitioner", + ":quantizer", + ], + visibility = [ + "@EXECUTORCH_CLIENTS", + ], +) + runtime.cxx_python_extension( name = "executorchcoreml", srcs = [ diff --git a/backends/apple/coreml/recipes/__init__.py b/backends/apple/coreml/recipes/__init__.py new file mode 100644 index 00000000000..c2ef6802e7d --- /dev/null +++ b/backends/apple/coreml/recipes/__init__.py @@ -0,0 +1,5 @@ +from .recipes import iphone_coreml_et_recipe + +__all__ = [ + "iphone_coreml_et_recipe", +] diff --git a/backends/apple/coreml/recipes/recipes.py b/backends/apple/coreml/recipes/recipes.py new file mode 100644 index 00000000000..1365a6859df --- /dev/null +++ b/backends/apple/coreml/recipes/recipes.py @@ -0,0 +1,69 @@ +# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +# pyre-strict +import torch + +from executorch.exir import ExportRecipe +from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig + +def iphone_coreml_et_recipe(ios: int = 17, compute_unit: str = "CPU_ONLY") -> ExportRecipe: + import coremltools as ct + from coremltools.optimize.torch.quantization.quantization_config import ( + LinearQuantizerConfig, + QuantizationScheme, + ) + from executorch.backends.apple.coreml.compiler import CoreMLBackend + from executorch.backends.apple.coreml.partition import CoreMLPartitioner + from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer + + # TODO: Add compute precision, compute unit, and model type + quantization_config = LinearQuantizerConfig.from_dict( + { + "global_config": { + "quantization_scheme": QuantizationScheme.affine, + "activation_dtype": torch.quint8, + "weight_dtype": torch.qint8, + "weight_per_channel": True, + } + } + ) + minimum_deployment_target = { + 15: ct.target.iOS15, + 16: ct.target.iOS16, + 17: ct.target.iOS17, + 18: ct.target.iOS18, + }[ios] + compute_unit_types = ["CPU_ONLY", "CPU_AND_NE", "CPU_AND_GPU", "ALL"] + assert ( + compute_unit in compute_unit_types + ), f"Invalid compute unit: {compute_unit}, should be one of {compute_unit_types}" + if compute_unit == "CPU_ONLY": + compute_unit_specs = ct.ComputeUnit[ct.ComputeUnit.CPU_ONLY.name.upper()] + elif compute_unit == "CPU_AND_NE": + compute_unit_specs = ct.ComputeUnit[ct.ComputeUnit.CPU_AND_NE.name.upper()] + elif compute_unit == "CPU_AND_GPU": + compute_unit_specs = ct.ComputeUnit[ct.ComputeUnit.CPU_AND_GPU.name.upper()] + else: # compute_unit == "ALL" + compute_unit_specs = ct.ComputeUnit[ct.ComputeUnit.ALL.name.upper()] + compile_specs = CoreMLBackend.generate_compile_specs( + minimum_deployment_target=minimum_deployment_target, + compute_precision=ct.precision(ct.precision.FLOAT16.value), + compute_unit=compute_unit_specs, + model_type=CoreMLBackend.MODEL_TYPE.MODEL, + ) + take_over_mutable_buffer = minimum_deployment_target >= ct.target.iOS18 + partitioner = CoreMLPartitioner( + compile_specs=compile_specs, + take_over_mutable_buffer=take_over_mutable_buffer, + ) + return ExportRecipe( + "iphone_coreml", + quantizer=CoreMLQuantizer(quantization_config), + partitioners=[partitioner], + edge_compile_config=EdgeCompileConfig( + _check_ir_validity=False, + _skip_dim_order=True, + ), + edge_transform_passes=[], + executorch_backend_config=ExecutorchBackendConfig(), + ) diff --git a/extension/llm/export/TARGETS b/extension/llm/export/TARGETS index 35ff63ac3a6..27ebbb7e9d8 100644 --- a/extension/llm/export/TARGETS +++ b/extension/llm/export/TARGETS @@ -28,6 +28,7 @@ runtime.python_library( "//caffe2:torch", "//executorch/backends/apple/coreml:backend", "//executorch/backends/apple/coreml:partitioner", + "//executorch/backends/apple/coreml:recipes", "//executorch/backends/apple/mps:partitioner", "//executorch/backends/qualcomm/partition:partition", "//executorch/backends/qualcomm/quantizer:quantizer",