pytorch
diff --git a/‎.ci/scripts/setup-qnn-deps.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/setup-qnn-deps.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 77 additions & 0 deletions b/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎.ci/scripts/test_backend_linux.sh‎
Lines changed: 1 addition & 3 deletions b/‎.ci/scripts/test_backend_linux.sh‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 0 additions & 2 deletions b/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 38 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 0 deletions b/‎LICENSE‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/hifi/operators/op_quantized_add_asym8sxasym8s_asym8s_per_tensor_out.cpp‎
Lines changed: 11 additions & 18 deletions b/‎backends/cadence/hifi/operators/op_quantized_add_asym8sxasym8s_asym8s_per_tensor_out.cpp‎
Lines changed: 11 additions & 18 deletions
diff --git a/‎backends/cadence/hifi/operators/op_quantized_add_asym8uxasym8u_asym8u_per_tensor_out.cpp‎
Lines changed: 11 additions & 18 deletions b/‎backends/cadence/hifi/operators/op_quantized_add_asym8uxasym8u_asym8u_per_tensor_out.cpp‎
Lines changed: 11 additions & 18 deletions
@@ -11,4 +11,4 @@ source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_q
 
 setup_libcpp 12
 setup_android_ndk
-install_qnn
+install_qnn
@@ -0,0 +1,77 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+
+download_ai_lite_core() {
+  API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download"
+  API_KEY="kn10SoSY3hkC-9Qny5TqD2mnqVrlupv3krnjLeBt5cY"
+
+  VERSION="0.5"
+  OS_NAME="Ubuntu 22.04"
+  OUT_FILE="/tmp/exynos-ai-litecore-v${VERSION}.tar.gz"
+  TARGET_PATH="/tmp/exynos_ai_lite_core"
+
+  mkdir -p ${TARGET_PATH}
+  # Presigned issue URL
+  JSON_RESP=$(curl -sS -G \
+    --location --fail --retry 3 \
+    -H "apikey: ${API_KEY}" \
+    --data-urlencode "version=${VERSION}" \
+    --data-urlencode "os=${OS_NAME}" \
+    "${API_BASE}")
+
+  DOWNLOAD_URL=$(echo "$JSON_RESP" | sed -n 's/.*"data":[[:space:]]*"\([^"]*\)".*/\1/p')
+
+  if [[ -z "$DOWNLOAD_URL" ]]; then
+    echo "Failed to extract download URL"
+    echo "$JSON_RESP"
+    exit 1
+  fi
+
+  # Download LiteCore
+  curl -sS -L --fail --retry 3 \
+    --output "$OUT_FILE" \
+    "$DOWNLOAD_URL"
+
+  echo "Download done: $OUT_FILE"
+
+
+  tar -C "${TARGET_PATH}" --strip-components=1 -xzvf "${OUT_FILE}"
+
+  export EXYNOS_AI_LITECORE_ROOT=${TARGET_PATH}
+  export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${EXYNOS_AI_LITECORE_ROOT}/lib/x86_64-linux
+}
+
+install_enn_backend() {
+  NDK_INSTALLATION_DIR=/opt/ndk
+  rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
+  ANDROID_NDK_VERSION=r27b
+
+  pushd .
+  cd /tmp
+  curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
+  unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
+
+  # Print the content for manual verification
+  ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
+  sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
+  popd
+  # build Exynos backend
+  export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
+  bash backends/samsung/build.sh --build all
+  # set env variable
+  export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
+  export PYTHONPATH=${PYTHONPATH:-}:${EXECUTORCH_ROOT}/..
+}
+
+AI_LITE_CORE_VERSION=0.5.0
+
+download_ai_lite_core ${AI_LITE_CORE_VERSION}
+install_enn_backend
@@ -30,10 +30,8 @@ if [[ "$FLOW" == *qnn* ]]; then
     # Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is
     # cleaned up.
     PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-    PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-    PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+    PYTHON_EXECUTABLE=python source .ci/scripts/build-qnn-sdk.sh
     QNN_X86_LIB_DIR=`realpath build-x86/lib/`
-    QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029"
     export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
 
     # TODO Get SDK root from install scripts
 
@@ -9,8 +9,6 @@ set -euxo pipefail
 
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
-# Source QNN configuration
-source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/qnn_config.sh"
 # Download QNN_SDK. If already downloaded, export environment path
 source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
 install_qnn
 
@@ -292,7 +292,7 @@ jobs:
                       export.output_name="${OUT_ET_MODEL_NAME}.pte"
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
                 elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
-                    export QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072
+                    export QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724
                     export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
                     export PYTHONPATH=$(pwd)/..
 
@@ -432,7 +432,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
 
         mkdir -p aar-out
-        PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
+        PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
         mkdir -p extension/benchmark/android/benchmark/app/libs
         cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
         pushd extension/benchmark/android/benchmark
 
@@ -866,6 +866,41 @@ jobs:
         PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh mobilenetv2
 
 
+  test-samsung-models-linux:
+    name: test-samsung-models-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.2xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-gcc9
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        set -ex
+
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # Setup python
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+        # Setup Samsung SDK (AI Lite Core) and install enn backend
+        source .ci/scripts/setup-samsung-linux-deps.sh
+
+        # Test models serially
+        models="mv2 ic3 resnet18 resnet50"
+        for model in $models; do
+          python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
+        done
+
+        # Test ops
+        python -m unittest discover -s backends/samsung/test/ops -p "test_*.py"
+
+
   test-vulkan-models-linux:
     name: test-vulkan-models-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -933,10 +968,13 @@ jobs:
         PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
         ./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
         ./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
+        ./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
+        ./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
 
         # Run e2e testing for selected operators. More operators will be tested via this
         # route in the future.
         python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
+        python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"
 
   nxp-build-test:
     name: nxp-build-test
 
@@ -571,6 +571,11 @@ if(EXECUTORCH_BUILD_QNN)
   list(APPEND _executorch_backends qnn_executorch_backend)
 endif()
 
+if(EXECUTORCH_BUILD_ENN)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/samsung)
+  list(APPEND _executorch_backends enn_backend)
+endif()
+
 if(EXECUTORCH_BUILD_XNNPACK)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
   list(APPEND _executorch_backends xnnpack_backend)
@@ -817,6 +822,10 @@ if(EXECUTORCH_BUILD_PYBIND)
     list(APPEND _dep_libs qnn_executorch_backend)
   endif()
 
+  if(EXECUTORCH_BUILD_ENN)
+    list(APPEND _dep_libs enn_backend)
+  endif()
+
   if(EXECUTORCH_BUILD_XNNPACK)
     # need to explicitly specify XNNPACK and xnnpack-microkernels-prod here
     # otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
 
@@ -8,6 +8,7 @@ Copyright (c) Qualcomm Innovation Center, Inc.
 Copyright (c) 2023 Apple Inc.
 Copyright (c) 2024 MediaTek Inc.
 Copyright 2023 NXP
+Copyright (c) 2025 Samsung Electronics Co. LTD
 
 Redistribution and use in source and binary forms, with or without modification,
 are permitted provided that the following conditions are met:
 
@@ -16,6 +16,8 @@ namespace native {
 
 using ::executorch::aten::Tensor;
 using ::executorch::runtime::KernelRuntimeContext;
+using ::impl::reference::kernels::dequantize;
+using ::impl::reference::kernels::quantize;
 
 void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
     KernelRuntimeContext& ctx,
@@ -61,25 +63,19 @@ void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
     }
   } /* if Y is a scalar Tensor */
   else if (Y_numel == 1) {
-    float y =
-        kernels::dequantize<int8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
+    float y = dequantize<int8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
     for (size_t i = 0; i < X_numel; ++i) {
-      float x =
-          kernels::dequantize<int8_t>(X_data[i], X_scale_f, X_zero_point_i32);
+      float x = dequantize<int8_t>(X_data[i], X_scale_f, X_zero_point_i32);
       float z = x + y;
-      out_data[i] =
-          kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
+      out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
     }
   } /* if X is a scalar Tensor */
   else if (X_numel == 1) {
-    float x =
-        kernels::dequantize<int8_t>(X_data[0], X_scale_f, X_zero_point_i32);
+    float x = dequantize<int8_t>(X_data[0], X_scale_f, X_zero_point_i32);
     for (size_t i = 0; i < Y_numel; ++i) {
-      float y =
-          kernels::dequantize<int8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
+      float y = dequantize<int8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
       float z = x + y;
-      out_data[i] =
-          kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
+      out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
     }
   } /* other broadcasting cases */
   else {
@@ -162,13 +158,10 @@ void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
       }
 
       /* Apply the operation */
-      float x = kernels::dequantize<int8_t>(
-          X_data[X_idx], X_scale_f, X_zero_point_i32);
-      float y = kernels::dequantize<int8_t>(
-          Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
+      float x = dequantize<int8_t>(X_data[X_idx], X_scale_f, X_zero_point_i32);
+      float y = dequantize<int8_t>(Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
       float z = x + y;
-      out_data[i] =
-          kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
+      out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
     }
   }
 }
 
@@ -16,6 +16,8 @@ namespace native {
 
 using ::executorch::aten::Tensor;
 using ::executorch::runtime::KernelRuntimeContext;
+using ::impl::reference::kernels::dequantize;
+using ::impl::reference::kernels::quantize;
 
 void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
     KernelRuntimeContext& ctx,
@@ -61,25 +63,19 @@ void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
     }
   } /* if Y is a scalar Tensor */
   else if (Y_numel == 1) {
-    float y =
-        kernels::dequantize<uint8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
+    float y = dequantize<uint8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
     for (size_t i = 0; i < X_numel; ++i) {
-      float x =
-          kernels::dequantize<uint8_t>(X_data[i], X_scale_f, X_zero_point_i32);
+      float x = dequantize<uint8_t>(X_data[i], X_scale_f, X_zero_point_i32);
       float z = x + y;
-      out_data[i] =
-          kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
+      out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
     }
   } /* if X is a scalar Tensor */
   else if (X_numel == 1) {
-    float x =
-        kernels::dequantize<uint8_t>(X_data[0], X_scale_f, X_zero_point_i32);
+    float x = dequantize<uint8_t>(X_data[0], X_scale_f, X_zero_point_i32);
     for (size_t i = 0; i < Y_numel; ++i) {
-      float y =
-          kernels::dequantize<uint8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
+      float y = dequantize<uint8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
       float z = x + y;
-      out_data[i] =
-          kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
+      out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
     }
   } /* other broadcasting cases */
   else {
@@ -162,13 +158,10 @@ void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
       }
 
       /* Apply the operation */
-      float x = kernels::dequantize<uint8_t>(
-          X_data[X_idx], X_scale_f, X_zero_point_i32);
-      float y = kernels::dequantize<uint8_t>(
-          Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
+      float x = dequantize<uint8_t>(X_data[X_idx], X_scale_f, X_zero_point_i32);
+      float y = dequantize<uint8_t>(Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
       float z = x + y;
-      out_data[i] =
-          kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
+      out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
     }
   }
 }