diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index bc9bbb8bae0..1333f481866 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -232,21 +232,24 @@ test_model_with_qnn() { # @param should_test If true, build and test the model using the coreml_executor_runner. test_model_with_coreml() { local should_test="$1" + local test_with_pybindings="$2" + local dtype="$3" if [[ "${BUILD_TOOL}" != "cmake" ]]; then echo "coreml only supports cmake." exit 1 fi - DTYPE=float16 + RUN_WITH_PYBINDINGS="" + if [[ "${test_with_pybindings}" == true ]]; then + echo \"Running with pybindings\" + export RUN_WITH_PYBINDINGS="--run_with_pybindings" + fi - "${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}" --use_partitioner + "${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision ${dtype} --use_partitioner ${RUN_WITH_PYBINDINGS} EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit) if [ -n "$EXPORTED_MODEL" ]; then - EXPORTED_MODEL_WITH_DTYPE="${EXPORTED_MODEL%.pte}_${DTYPE}.pte" - mv "$EXPORTED_MODEL" "$EXPORTED_MODEL_WITH_DTYPE" - EXPORTED_MODEL="$EXPORTED_MODEL_WITH_DTYPE" echo "OK exported model: $EXPORTED_MODEL" else echo "[error] failed to export model: no .pte file found" @@ -303,7 +306,15 @@ elif [[ "${BACKEND}" == *"coreml"* ]]; then if [[ "${BACKEND}" == *"test"* ]]; then should_test_coreml=true fi - test_model_with_coreml "${should_test_coreml}" + test_with_pybindings=false + if [[ "${BACKEND}" == *"pybind"* ]]; then + test_with_pybindings=true + fi + dtype=float16 + if [[ "${BACKEND}" == *"float32"* ]]; then + dtype=float32 + fi + test_model_with_coreml "${should_test_coreml}" "${test_with_pybindings}" "${dtype}" if [[ $? -eq 0 ]]; then prepare_artifacts_upload fi diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index d7205514a68..8857029d96b 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -18,8 +18,8 @@ concurrency: cancel-in-progress: true jobs: - test-models-macos: - name: test-models-macos + test-models-macos-cpu: + name: test-models-macos-cpu uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: matrix: @@ -568,10 +568,12 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" - test-apple-model: - name: test-apple-model + test-models-macos-coreml: + name: test-models-macos-coreml uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: + matrix: + model: [dl3, edsr, efficient_sam, emformer_join, emformer_transcribe, ic3, ic4, mobilebert, mv2, mv3, resnet50, vit, w2l] fail-fast: false with: runner: macos-m1-stable @@ -580,7 +582,23 @@ jobs: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout: 90 script: | + MODEL_NAME=${{ matrix.model }} BUILD_TOOL=cmake + BACKEND="coreml-pybind" + + + # Set model specific overrides + if [[ "${MODEL_NAME}" == "mobilebert" ]]; then + # See https://github.com/pytorch/executorch/issues/12907 + # mobilebert has nan output on FP16, and high MSE on fp32, so we disable runtime test now + BACKEND="coreml" + fi + + if [[ "${MODEL_NAME}" == "efficient_sam" ]]; then + # See https://github.com/pytorch/executorch/issues/12906 + # efficient_sam fails to run on CoreML + BACKEND="coreml" + fi bash .ci/scripts/setup-conda.sh @@ -589,13 +607,28 @@ jobs: PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh echo "Finishing installing coreml." - # Build and test coreml model - MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) - for MODEL_NAME in "${MODELS[@]}"; do - echo "::group::Exporting coreml model: $MODEL_NAME" - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" - echo "::endgroup::" + PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" + + test-models-macos-mps: + name: test-models-macos-mps + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + strategy: + fail-fast: false + with: + runner: macos-m1-stable + python-version: '3.11' + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + BUILD_TOOL=cmake + bash .ci/scripts/setup-conda.sh + + # Setup MacOS dependencies as there is no Docker support on MacOS atm + PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" + # Build and test mps model + for MODEL_NAME in mv3 ic4 resnet50 edsr mobilebert w2l; do echo "::group::Exporting mps model: $MODEL_NAME" PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" echo "::endgroup::" diff --git a/backends/apple/coreml/scripts/generate_test_models.sh b/backends/apple/coreml/scripts/generate_test_models.sh index 001ba362393..6a73d697379 100755 --- a/backends/apple/coreml/scripts/generate_test_models.sh +++ b/backends/apple/coreml/scripts/generate_test_models.sh @@ -22,7 +22,7 @@ cd "$EXECUTORCH_ROOT_PATH" MODELS=("add" "add_mul" "mul" "mv3") for MODEL in "${MODELS[@]}" do - echo "Executorch: Generating $MODEL model" + echo "Executorch: Generating $MODEL model" # TODO: Don't use the script in examples directory. python3 -m examples.apple.coreml.scripts.export --model_name "$MODEL" --save_processed_bytes mv -f "$MODEL""_coreml_all.pte" "$COREML_DIR_PATH/runtime/test/models" @@ -36,7 +36,7 @@ COMPILE_MODELS=("add_mul") echo "Executorch: Generating compiled model" for MODEL in "${COMPILE_MODELS[@]}" do - echo "Executorch: Generating compiled $MODEL model" + echo "Executorch: Generating compiled $MODEL model" python3 -m examples.apple.coreml.scripts.export --model_name "$MODEL" --compile mv -f "$MODEL""_compiled_coreml_all.pte" "$COREML_DIR_PATH/runtime/test/models" done diff --git a/examples/apple/coreml/scripts/export.py b/examples/apple/coreml/scripts/export.py index b9acc3b8fb9..0b5f64d13c2 100644 --- a/examples/apple/coreml/scripts/export.py +++ b/examples/apple/coreml/scripts/export.py @@ -3,6 +3,7 @@ # Please refer to the license found in the LICENSE file in the root directory of the source tree. import argparse +import collections import copy import pathlib @@ -23,8 +24,7 @@ from executorch.exir import to_edge from executorch.exir.backend.backend_api import to_backend - -from torch.export import export +from executorch.extension.export_util.utils import save_pte_program REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent.parent.parent EXAMPLES_DIR = REPO_ROOT / "examples" @@ -41,7 +41,16 @@ ) -def parse_args() -> argparse.ArgumentParser: +def is_fbcode(): + return not hasattr(torch.version, "git_version") + + +_CAN_RUN_WITH_PYBINDINGS = (sys.platform == "darwin") and not is_fbcode() +if _CAN_RUN_WITH_PYBINDINGS: + from executorch.runtime import Runtime + + +def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument( @@ -82,9 +91,12 @@ def parse_args() -> argparse.ArgumentParser: required=False, default=False, ) + parser.add_argument( + "--run_with_pybindings", + action=argparse.BooleanOptionalAction, + ) args = parser.parse_args() - # pyre-fixme[7]: Expected `ArgumentParser` but got `Namespace`. return args @@ -95,7 +107,8 @@ def partition_module_to_coreml(module): def lower_module_to_coreml(module, compile_specs, example_inputs): module = module.eval() edge = to_edge( - export(module, example_inputs, strict=True), compile_config=_EDGE_COMPILE_CONFIG + torch.export.export(module, example_inputs, strict=True), + compile_config=_EDGE_COMPILE_CONFIG, ) # All of the subsequent calls on the edge_dialect_graph generated above (such as delegation or # to_executorch()) are done in place and the graph is also modified in place. For debugging purposes @@ -115,24 +128,23 @@ def lower_module_to_coreml(module, compile_specs, example_inputs): def export_lowered_module_to_executorch_program(lowered_module, example_inputs): lowered_module(*example_inputs) exec_prog = to_edge( - export(lowered_module, example_inputs, strict=True), + torch.export.export(lowered_module, example_inputs, strict=True), compile_config=_EDGE_COMPILE_CONFIG, ).to_executorch(config=exir.ExecutorchBackendConfig(extract_delegate_segments=True)) return exec_prog -def save_executorch_program(exec_prog, model_name, compute_unit): - buffer = exec_prog.buffer - filename = f"{model_name}_coreml_{compute_unit}.pte" - print(f"Saving exported program to {filename}") - with open(filename, "wb") as file: - file.write(buffer) - return +def get_pte_base_name(args: argparse.Namespace) -> str: + pte_name = args.model_name + if args.compile: + pte_name += "_compiled" + pte_name = f"{pte_name}_coreml_{args.compute_unit}" + return pte_name -def save_processed_bytes(processed_bytes, model_name, compute_unit): - filename = f"{model_name}_coreml_{compute_unit}.bin" +def save_processed_bytes(processed_bytes, base_name: str): + filename = f"{base_name}.bin" print(f"Saving processed bytes to {filename}") with open(filename, "wb") as file: file.write(processed_bytes) @@ -154,6 +166,37 @@ def generate_compile_specs_from_args(args): ) +def run_with_pybindings(executorch_program, eager_reference, example_inputs, precision): + if not _CAN_RUN_WITH_PYBINDINGS: + raise RuntimeError("Cannot run with pybindings on this platform.") + + dtype = { + "float32": torch.float32, + "float16": torch.float16, + }[precision] + + runtime = Runtime.get() + program = runtime.load_program(executorch_program.buffer) + method = program.load_method("forward") + et_outputs = method.execute(*example_inputs)[0] + eager_outputs = eager_reference(*example_inputs) + if isinstance(eager_outputs, collections.OrderedDict): + eager_outputs = eager_outputs["out"] + if isinstance(eager_outputs, list | tuple): + eager_outputs = eager_outputs[0] + + mse = ((et_outputs - eager_outputs) ** 2).mean().sqrt() + print(f"Mean square error: {mse}") + assert mse < 0.1, "Mean square error is too high." + + if dtype == torch.float32: + assert torch.allclose( + et_outputs, eager_outputs, atol=1e-02, rtol=1e-02 + ), f"""Outputs do not match eager reference: + \tet_outputs (first 5)={et_outputs.reshape(-1)[0:5]} + \teager_outputs (first 5)={eager_outputs.reshape(-1)[0:5]}""" + + def main(): args = parse_args() @@ -170,49 +213,65 @@ def main(): f"Valid compute units are {valid_compute_units}." ) - model, example_inputs, _, dynamic_shapes = EagerModelFactory.create_model( - *MODEL_NAME_TO_MODEL[args.model_name] + model, example_args, example_kwargs, dynamic_shapes = ( + EagerModelFactory.create_model(*MODEL_NAME_TO_MODEL[args.model_name]) ) if not args.dynamic_shapes: dynamic_shapes = None compile_specs = generate_compile_specs_from_args(args) - lowered_module = None - + pte_base_name = get_pte_base_name(args) if args.use_partitioner: - model.eval() - exir_program_aten = torch.export.export( - model, example_inputs, dynamic_shapes=dynamic_shapes, strict=True - ) - - edge_program_manager = exir.to_edge(exir_program_aten) - edge_copy = copy.deepcopy(edge_program_manager) - partitioner = CoreMLPartitioner( - skip_ops_for_coreml_delegation=None, compile_specs=compile_specs + model = model.eval() + assert not args.generate_etrecord, "ETRecord is not supported with partitioner" + ep = torch.export.export( + model, + args=example_args, + kwargs=example_kwargs, + dynamic_shapes=dynamic_shapes, ) - delegated_program_manager = edge_program_manager.to_backend(partitioner) - exec_program = delegated_program_manager.to_executorch( - config=exir.ExecutorchBackendConfig(extract_delegate_segments=True) + print(ep) + delegated_program = exir.to_edge_transform_and_lower( + ep, + partitioner=[CoreMLPartitioner(compile_specs=compile_specs)], ) + exec_program = delegated_program.to_executorch() + save_pte_program(exec_program, pte_base_name) + if args.run_with_pybindings: + run_with_pybindings( + executorch_program=exec_program, + eager_reference=model, + example_inputs=example_args, + precision=args.compute_precision, + ) else: lowered_module, edge_copy = lower_module_to_coreml( module=model, - example_inputs=example_inputs, + example_inputs=example_args, compile_specs=compile_specs, ) exec_program = export_lowered_module_to_executorch_program( lowered_module, - example_inputs, - ) - - model_name = f"{args.model_name}_compiled" if args.compile else args.model_name - save_executorch_program(exec_program, model_name, args.compute_unit) - generate_etrecord(f"{args.model_name}_coreml_etrecord.bin", edge_copy, exec_program) - - if args.save_processed_bytes and lowered_module is not None: - save_processed_bytes( - lowered_module.processed_bytes, args.model_name, args.compute_unit + example_args, ) + save_pte_program(exec_program, pte_base_name) + if args.generate_etrecord: + generate_etrecord( + f"{args.model_name}_coreml_etrecord.bin", edge_copy, exec_program + ) + + if args.save_processed_bytes: + save_processed_bytes( + lowered_module.processed_bytes, + pte_base_name, + ) + if args.run_with_pybindings: + run_with_pybindings( + executorch_program=exec_program, + eager_reference=model, + example_inputs=example_args, + precision=args.compute_precision, + ) if __name__ == "__main__":