pytorch
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 5 additions & 0 deletions b/‎.ci/docker/build.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.ci/docker/common/install_zephyr.sh‎
Lines changed: 92 additions & 0 deletions b/‎.ci/docker/common/install_zephyr.sh‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 5 additions & 0 deletions b/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/build-presets.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-presets.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/docker-builds.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/docker-builds.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 104 additions & 0 deletions b/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 104 additions & 0 deletions
@@ -43,6 +43,10 @@ case "${IMAGE_NAME}" in
     ARM_SDK=yes
     CLANG_VERSION=12
     ;;
+  executorch-ubuntu-22.04-zephyr-sdk)
+    ZEPHYR_SDK=yes
+    GCC_VERSION=11
+    ;;
   executorch-ubuntu-22.04-qnn-sdk)
     QNN_SDK=yes
     CLANG_VERSION=12
@@ -87,6 +91,7 @@ docker build \
   --build-arg "LINTRUNNER=${LINTRUNNER:-}" \
   --build-arg "BUILD_DOCS=${BUILD_DOCS}" \
   --build-arg "ARM_SDK=${ARM_SDK:-}" \
+  --build-arg "ZEPHYR_SDK=${ZEPHYR_SDK:-}" \
   --build-arg "QNN_SDK=${QNN_SDK:-}" \
   --build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
   --build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
 
@@ -0,0 +1,92 @@
+
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+# Double check if the NDK version is set
+[ -n "${ZEPHYR_SDK}" ]
+
+install_prerequiresites() {
+    rm /var/lib/dpkg/info/libc-bin.*
+    apt-get clean
+    apt-get -y update
+    apt-get install -y libc-bin
+    apt-get -y update
+    apt-get clean
+    apt-get install --no-install-recommends -y dos2unix
+    apt-get install --no-install-recommends -y ca-certificates
+    apt-get install -y --reinstall libc-bin
+    apt-get install --no-install-recommends -y file
+    apt-get install --no-install-recommends -y locales
+    apt-get install --no-install-recommends -y git
+    apt-get install --no-install-recommends -y build-essential
+    apt-get install --no-install-recommends -y cmake
+    apt-get install --no-install-recommends -y ninja-build gperf
+    apt-get install --no-install-recommends -y device-tree-compiler
+    apt-get install --no-install-recommends -y wget
+    apt-get install --no-install-recommends -y curl
+    apt-get install --no-install-recommends -y xz-utils
+    apt-get install --no-install-recommends -y dos2unix
+    apt-get install --no-install-recommends -y vim
+    apt-get install --no-install-recommends -y nano
+    apt-get install --no-install-recommends -y mc
+    apt-get install --no-install-recommends -y openssh-server
+    apt-get install -y gdb
+
+    # Zephyr SDK relies on python 3.12
+    apt install software-properties-common -y
+    add-apt-repository ppa:deadsnakes/ppa -y
+    apt update
+    apt install -y python3.12 python3.12-dev python3.12-venv python3-pip
+    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
+
+    # Upgrade cmake ot 3.24
+    apt update
+    apt install cmake
+    apt install software-properties-common lsb-release
+    apt update
+    test -f /usr/share/doc/kitware-archive-keyring/copyright || \
+        wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
+    "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" | tee /etc/apt/sources.list.d/kitware.list > /dev/null
+    apt update
+    apt install cmake
+
+    # Install additional required software for Zephyr
+    apt install --no-install-recommends -y ccache \
+        dfu-util \
+        python3-setuptools \
+        python3-tk \
+        python3-wheel \
+        make \
+        gcc \
+        libsdl2-dev \
+        libmagic1 \
+        xterm \
+        telnet \
+        net-tools
+    apt install --no-install-recommends -y gcc-multilib g++-multilib
+    apt-get clean -y
+    apt-get autoremove --purge -y
+    rm -rf /var/lib/apt/lists/*
+    wget https://apt.kitware.com/kitware-archive.sh && \
+        chmod +x kitware-archive.sh && \
+        ./kitware-archive.sh && \
+        rm -f kitware-archive.sh
+    useradd -d /home/zephyruser -m -s /bin/bash zephyruser
+}
+
+install_sdk() {
+    wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.16.0/zephyr-sdk-0.16.0_linux-x86_64.tar.xz && \
+         tar -xf zephyr-sdk-0.16.0_linux-x86_64.tar.xz && \
+         rm -f zephyr-sdk-0.16.0_linux-x86_64.tar.xz && \
+         cd zephyr-sdk-0.16.0/ && \
+         ./setup.sh -c -t arm-zephyr-eabi
+}
+
+install_prerequiresites
+install_sdk
@@ -84,6 +84,11 @@ RUN rm install_android.sh
 
 ARG ARM_SDK
 
+ARG ZEPHYR_SDK
+COPY ./common/install_zephyr.sh install_zephyr.sh
+RUN if [ -n "${ZEPHYR_SDK}" ]; then bash ./install_zephyr.sh; fi
+RUN rm install_zephyr.sh
+
 ARG QNN_SDK
 
 ARG MEDIATEK_SDK
 
@@ -44,7 +44,7 @@ jobs:
       job-name: build
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+      docker-image: ci-image:executorch-ubuntu-22.04-zephyr-sdk
       submodules: recursive
       timeout: 90
       script: |
 
@@ -37,6 +37,7 @@ jobs:
           executorch-ubuntu-22.04-clang12,
           executorch-ubuntu-22.04-linter,
           executorch-ubuntu-22.04-arm-sdk,
+          executorch-ubuntu-22.04-zephyr-sdk,
           executorch-ubuntu-22.04-qnn-sdk,
           executorch-ubuntu-22.04-mediatek-sdk,
           executorch-ubuntu-22.04-clang12-android
 
@@ -28,6 +28,8 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.WARNING)
 
+from executorch.backends.apple.coreml.compiler.torch_ops import *  # noqa: F401, F403
+
 
 class COMPILE_SPEC_KEYS(Enum):
     COMPUTE_UNITS = "compute_units"
 
@@ -0,0 +1,104 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This file registers torch ops that are not yet in coremltools, or are in a more recent version of
+# coremltools than is used by ExecuTorch.  Each op registered here should have a link to a PR in coremltools that adds
+# the op to the coremltools library.
+
+import torch as _torch
+from coremltools import _logger as logger
+from coremltools.converters.mil.frontend import _utils
+from coremltools.converters.mil.frontend.torch.ops import (
+    _get_inputs,
+    NUM_TO_NUMPY_DTYPE,
+    NUM_TO_TORCH_DTYPE,
+    transpose,
+    unbind,
+)
+
+from coremltools.converters.mil.frontend.torch.torch_op_registry import (
+    register_torch_op,
+)
+from coremltools.converters.mil.mil import types
+
+
+# https://github.com/apple/coremltools/pull/2556
+@register_torch_op(override=False)
+def transpose_copy(context, node):
+    transpose(context, node)
+
+
+# https://github.com/apple/coremltools/pull/2557
+@register_torch_op(override=False)
+def unbind_copy(context, node):
+    unbind(context, node)
+
+
+# https://github.com/apple/coremltools/pull/2558
+@register_torch_op(
+    torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
+    override=False,
+)
+def dequantize_affine(context, node):
+    inputs = _get_inputs(context, node, expected=[7, 8])
+    int_data = inputs[0].val
+    block_size = inputs[1].val
+    scale = inputs[2].val
+    zero_point = (
+        inputs[3].val if inputs[3] is not None and inputs[3].val is not None else None
+    )
+    # I do not think we need to worry about input_dtype b/c it gets cast to int4/int8
+    # For now, we just check that it is int8 or int32
+    input_dtype = inputs[4].val  # noqa: F841
+    assert NUM_TO_TORCH_DTYPE[input_dtype] in [
+        _torch.int8,
+        _torch.int32,
+    ], "input_dtype should be int8 or int32"
+
+    quant_min = inputs[5].val
+    quant_max = inputs[6].val
+
+    assert len(int_data.shape) == 2, "dequantize_affine only supports rank 2 inputs"
+
+    assert len(int_data.shape) == len(
+        block_size
+    ), "block_size must have the same length as int_data.shape"
+    assert block_size[0] == 1, "block_size[0] must be 1"
+    group_size = block_size[1]
+    k = int_data.shape[1]
+    assert k % group_size == 0, "k must be divisible by group_size"
+    scales_per_row = k // group_size
+    scale = scale.reshape(-1, scales_per_row)
+    if zero_point is not None:
+        zero_point = zero_point.reshape(-1, scales_per_row)
+
+    # TODO: I don't know if CoreML can make use of this
+    # We could add a cast op to the output, but I'm pretty CoreML will remove this during a later pass
+    # For now, we just log a warning
+    out_np_dtype = None
+    if len(inputs) > 7:
+        out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[7].val]
+        logger.warning(
+            f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
+        )
+
+    if quant_min == -8 and quant_max == 7:
+        quantized_np_dtype = types.nptype_from_builtin(types.string_to_builtin("int4"))
+    elif quant_min == -128 and quant_max == 127:
+        quantized_np_dtype = types.nptype_from_builtin(types.string_to_builtin("int8"))
+    else:
+        raise ValueError(
+            f"Unsupported quantization range: {quant_min} to {quant_max}.  CoreML only supports 4-bit and 8-bit quantization."
+        )
+
+    output = _utils._construct_constexpr_dequant_op(
+        int_data.astype(quantized_np_dtype),
+        zero_point,
+        scale,
+        axis=-1,
+        name=node.name,
+    )
+    context.add(output, node.name)