diff --git a/optimum/exporters/executorch/integrations.py b/optimum/exporters/executorch/integrations.py index 23e6819a..c4e0ad93 100644 --- a/optimum/exporters/executorch/integrations.py +++ b/optimum/exporters/executorch/integrations.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import Dict +from typing import Dict, Optional import torch from packaging.version import parse @@ -173,13 +173,15 @@ class VisionEncoderExportableModule(torch.nn.Module): This module ensures that the exported model is compatible with ExecuTorch. """ - def __init__(self, model): + def __init__(self, model, model_id: Optional[str] = None): super().__init__() self.model = model self.config = model.config # Metadata to be recorded in the pte model file self.metadata = save_config_to_constant_methods(model.config, model.generation_config) + self.model_id = model_id + def forward(self, pixel_values): print(f"DEBUG: pixel_values: {pixel_values.shape}") print(f"DEBUG: forward: {self.model.method_meta('forward')}") @@ -187,11 +189,19 @@ def forward(self, pixel_values): def export(self, pixel_values=None) -> Dict[str, ExportedProgram]: if pixel_values is None: - batch_size = 1 - num_channels = self.config.num_channels - height = self.config.image_size - width = self.config.image_size - pixel_values = torch.rand(batch_size, num_channels, height, width) + model_to_pixel_values_size = { + "microsoft/resnet-50": [1, 3, 224, 224], + } + if self.model_id in model_to_pixel_values_size: + # If an explicit shape is provided for this model, use it + pixel_values = torch.rand(*model_to_pixel_values_size[self.model_id]) + else: + # If no explicit shape is provided for this model, infer a shape from config + batch_size = 1 + num_channels = self.config.num_channels + height = self.config.image_size + width = self.config.image_size + pixel_values = torch.rand(batch_size, num_channels, height, width) with torch.no_grad(): return { diff --git a/optimum/exporters/executorch/recipes/coreml.py b/optimum/exporters/executorch/recipes/coreml.py index 439ed8b9..0508f9c6 100644 --- a/optimum/exporters/executorch/recipes/coreml.py +++ b/optimum/exporters/executorch/recipes/coreml.py @@ -96,7 +96,7 @@ def _lower_to_executorch( ], compile_config=EdgeCompileConfig( _check_ir_validity=False, - _skip_dim_order=False, + _skip_dim_order=True, ), constant_methods=metadata, ).to_executorch( diff --git a/optimum/exporters/executorch/tasks/image_classification.py b/optimum/exporters/executorch/tasks/image_classification.py index 97c8162a..b62905c1 100644 --- a/optimum/exporters/executorch/tasks/image_classification.py +++ b/optimum/exporters/executorch/tasks/image_classification.py @@ -39,4 +39,4 @@ def load_image_classification_model(model_name_or_path: str, **kwargs) -> Vision """ eager_model = AutoModelForImageClassification.from_pretrained(model_name_or_path, **kwargs).to("cpu").eval() - return VisionEncoderExportableModule(eager_model) + return VisionEncoderExportableModule(eager_model, model_name_or_path) diff --git a/tests/models/test_modeling_resnet50.py b/tests/models/test_modeling_resnet50.py new file mode 100644 index 00000000..f72c4eed --- /dev/null +++ b/tests/models/test_modeling_resnet50.py @@ -0,0 +1,82 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys +import tempfile +import unittest + +import pytest +import torch +from transformers.testing_utils import slow + +from optimum.executorch import ExecuTorchModelForImageClassification + +from ..utils import check_close_recursively + + +is_not_macos = sys.platform != "darwin" + + +class ExecuTorchModelIntegrationTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @slow + @pytest.mark.run_slow + def test_vit_export_to_executorch(self): + model_id = "microsoft/resnet-50" + task = "image-classification" + recipe = "xnnpack" + with tempfile.TemporaryDirectory() as tempdir: + subprocess.run( + f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch", + shell=True, + check=True, + ) + self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte")) + + @slow + @pytest.mark.run_slow + @pytest.mark.skipif(is_not_macos, reason="Only runs on MacOS") + def test_vit_image_classification_coreml_fp32_cpu(self): + model_id = "microsoft/resnet-50" + + batch_size = 1 + num_channels = 3 + height = 224 + width = 224 + pixel_values = torch.rand(batch_size, num_channels, height, width) + + # Test fetching and lowering the model to ExecuTorch + import coremltools as ct + + et_model = ExecuTorchModelForImageClassification.from_pretrained( + model_id=model_id, + recipe="coreml", + recipe_kwargs={"compute_precision": ct.precision.FLOAT32, "compute_units": ct.ComputeUnit.CPU_ONLY}, + ) + et_output = et_model.forward(pixel_values) + + # Reference (using XNNPACK as reference because eager model currently segfaults in a PyTorch kernel) + et_xnnpack = ExecuTorchModelForImageClassification.from_pretrained( + model_id=model_id, + recipe="xnnpack", + ) + et_xnnpack_output = et_xnnpack.forward(pixel_values) + + # Compare with reference + self.assertTrue(check_close_recursively(et_output, et_xnnpack_output))