Skip to content

Commit db6c37c

Browse files
committed
Merge remote-tracking branch 'origin/main' into start-pos-api-llava-2
2 parents f8971cc + de0ff26 commit db6c37c

File tree

146 files changed

+6730
-1704
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+6730
-1704
lines changed

.ci/scripts/setup-qnn-deps.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_q
1111

1212
setup_libcpp 12
1313
setup_android_ndk
14-
install_qnn
14+
install_qnn
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# Copyright (c) Samsung Electronics Co. LTD
4+
# All rights reserved.
5+
#
6+
# This source code is licensed under the BSD-style license found in the
7+
# LICENSE file in the root directory of this source tree.
8+
9+
set -ex
10+
11+
12+
download_ai_lite_core() {
13+
API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download"
14+
API_KEY="kn10SoSY3hkC-9Qny5TqD2mnqVrlupv3krnjLeBt5cY"
15+
16+
VERSION="0.5"
17+
OS_NAME="Ubuntu 22.04"
18+
OUT_FILE="/tmp/exynos-ai-litecore-v${VERSION}.tar.gz"
19+
TARGET_PATH="/tmp/exynos_ai_lite_core"
20+
21+
mkdir -p ${TARGET_PATH}
22+
# Presigned issue URL
23+
JSON_RESP=$(curl -sS -G \
24+
--location --fail --retry 3 \
25+
-H "apikey: ${API_KEY}" \
26+
--data-urlencode "version=${VERSION}" \
27+
--data-urlencode "os=${OS_NAME}" \
28+
"${API_BASE}")
29+
30+
DOWNLOAD_URL=$(echo "$JSON_RESP" | sed -n 's/.*"data":[[:space:]]*"\([^"]*\)".*/\1/p')
31+
32+
if [[ -z "$DOWNLOAD_URL" ]]; then
33+
echo "Failed to extract download URL"
34+
echo "$JSON_RESP"
35+
exit 1
36+
fi
37+
38+
# Download LiteCore
39+
curl -sS -L --fail --retry 3 \
40+
--output "$OUT_FILE" \
41+
"$DOWNLOAD_URL"
42+
43+
echo "Download done: $OUT_FILE"
44+
45+
46+
tar -C "${TARGET_PATH}" --strip-components=1 -xzvf "${OUT_FILE}"
47+
48+
export EXYNOS_AI_LITECORE_ROOT=${TARGET_PATH}
49+
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${EXYNOS_AI_LITECORE_ROOT}/lib/x86_64-linux
50+
}
51+
52+
install_enn_backend() {
53+
NDK_INSTALLATION_DIR=/opt/ndk
54+
rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
55+
ANDROID_NDK_VERSION=r27b
56+
57+
pushd .
58+
cd /tmp
59+
curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
60+
unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
61+
62+
# Print the content for manual verification
63+
ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
64+
sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
65+
popd
66+
# build Exynos backend
67+
export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
68+
bash backends/samsung/build.sh --build all
69+
# set env variable
70+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
71+
export PYTHONPATH=${PYTHONPATH:-}:${EXECUTORCH_ROOT}/..
72+
}
73+
74+
AI_LITE_CORE_VERSION=0.5.0
75+
76+
download_ai_lite_core ${AI_LITE_CORE_VERSION}
77+
install_enn_backend

.ci/scripts/test_backend_linux.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,8 @@ if [[ "$FLOW" == *qnn* ]]; then
3030
# Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is
3131
# cleaned up.
3232
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
33-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
34-
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
33+
PYTHON_EXECUTABLE=python source .ci/scripts/build-qnn-sdk.sh
3534
QNN_X86_LIB_DIR=`realpath build-x86/lib/`
36-
QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029"
3735
export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
3836

3937
# TODO Get SDK root from install scripts

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ set -euxo pipefail
99

1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12-
# Source QNN configuration
13-
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/qnn_config.sh"
1412
# Download QNN_SDK. If already downloaded, export environment path
1513
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
1614
install_qnn

.github/workflows/android-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ jobs:
292292
export.output_name="${OUT_ET_MODEL_NAME}.pte"
293293
ls -lh "${OUT_ET_MODEL_NAME}.pte"
294294
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
295-
export QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072
295+
export QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724
296296
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
297297
export PYTHONPATH=$(pwd)/..
298298
@@ -432,7 +432,7 @@ jobs:
432432
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
433433
434434
mkdir -p aar-out
435-
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
435+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
436436
mkdir -p extension/benchmark/android/benchmark/app/libs
437437
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
438438
pushd extension/benchmark/android/benchmark

.github/workflows/pull.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,41 @@ jobs:
866866
PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh mobilenetv2
867867
868868
869+
test-samsung-models-linux:
870+
name: test-samsung-models-linux
871+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
872+
permissions:
873+
id-token: write
874+
contents: read
875+
with:
876+
runner: linux.2xlarge
877+
docker-image: ci-image:executorch-ubuntu-22.04-gcc9
878+
submodules: 'recursive'
879+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
880+
timeout: 90
881+
script: |
882+
set -ex
883+
884+
# The generic Linux job chooses to use base env, not the one setup by the image
885+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
886+
conda activate "${CONDA_ENV}"
887+
888+
# Setup python
889+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
890+
891+
# Setup Samsung SDK (AI Lite Core) and install enn backend
892+
source .ci/scripts/setup-samsung-linux-deps.sh
893+
894+
# Test models serially
895+
models="mv2 ic3 resnet18 resnet50"
896+
for model in $models; do
897+
python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
898+
done
899+
900+
# Test ops
901+
python -m unittest discover -s backends/samsung/test/ops -p "test_*.py"
902+
903+
869904
test-vulkan-models-linux:
870905
name: test-vulkan-models-linux
871906
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -933,10 +968,13 @@ jobs:
933968
PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
934969
./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
935970
./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
971+
./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
972+
./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
936973
937974
# Run e2e testing for selected operators. More operators will be tested via this
938975
# route in the future.
939976
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
977+
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"
940978
941979
nxp-build-test:
942980
name: nxp-build-test

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,11 @@ if(EXECUTORCH_BUILD_QNN)
571571
list(APPEND _executorch_backends qnn_executorch_backend)
572572
endif()
573573

574+
if(EXECUTORCH_BUILD_ENN)
575+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/samsung)
576+
list(APPEND _executorch_backends enn_backend)
577+
endif()
578+
574579
if(EXECUTORCH_BUILD_XNNPACK)
575580
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
576581
list(APPEND _executorch_backends xnnpack_backend)
@@ -817,6 +822,10 @@ if(EXECUTORCH_BUILD_PYBIND)
817822
list(APPEND _dep_libs qnn_executorch_backend)
818823
endif()
819824

825+
if(EXECUTORCH_BUILD_ENN)
826+
list(APPEND _dep_libs enn_backend)
827+
endif()
828+
820829
if(EXECUTORCH_BUILD_XNNPACK)
821830
# need to explicitly specify XNNPACK and xnnpack-microkernels-prod here
822831
# otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Copyright (c) Qualcomm Innovation Center, Inc.
88
Copyright (c) 2023 Apple Inc.
99
Copyright (c) 2024 MediaTek Inc.
1010
Copyright 2023 NXP
11+
Copyright (c) 2025 Samsung Electronics Co. LTD
1112

1213
Redistribution and use in source and binary forms, with or without modification,
1314
are permitted provided that the following conditions are met:

backends/cadence/hifi/operators/op_quantized_add_asym8sxasym8s_asym8s_per_tensor_out.cpp

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ namespace native {
1616

1717
using ::executorch::aten::Tensor;
1818
using ::executorch::runtime::KernelRuntimeContext;
19+
using ::impl::reference::kernels::dequantize;
20+
using ::impl::reference::kernels::quantize;
1921

2022
void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
2123
KernelRuntimeContext& ctx,
@@ -61,25 +63,19 @@ void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
6163
}
6264
} /* if Y is a scalar Tensor */
6365
else if (Y_numel == 1) {
64-
float y =
65-
kernels::dequantize<int8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
66+
float y = dequantize<int8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
6667
for (size_t i = 0; i < X_numel; ++i) {
67-
float x =
68-
kernels::dequantize<int8_t>(X_data[i], X_scale_f, X_zero_point_i32);
68+
float x = dequantize<int8_t>(X_data[i], X_scale_f, X_zero_point_i32);
6969
float z = x + y;
70-
out_data[i] =
71-
kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
70+
out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
7271
}
7372
} /* if X is a scalar Tensor */
7473
else if (X_numel == 1) {
75-
float x =
76-
kernels::dequantize<int8_t>(X_data[0], X_scale_f, X_zero_point_i32);
74+
float x = dequantize<int8_t>(X_data[0], X_scale_f, X_zero_point_i32);
7775
for (size_t i = 0; i < Y_numel; ++i) {
78-
float y =
79-
kernels::dequantize<int8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
76+
float y = dequantize<int8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
8077
float z = x + y;
81-
out_data[i] =
82-
kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
78+
out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
8379
}
8480
} /* other broadcasting cases */
8581
else {
@@ -162,13 +158,10 @@ void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
162158
}
163159

164160
/* Apply the operation */
165-
float x = kernels::dequantize<int8_t>(
166-
X_data[X_idx], X_scale_f, X_zero_point_i32);
167-
float y = kernels::dequantize<int8_t>(
168-
Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
161+
float x = dequantize<int8_t>(X_data[X_idx], X_scale_f, X_zero_point_i32);
162+
float y = dequantize<int8_t>(Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
169163
float z = x + y;
170-
out_data[i] =
171-
kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
164+
out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
172165
}
173166
}
174167
}

backends/cadence/hifi/operators/op_quantized_add_asym8uxasym8u_asym8u_per_tensor_out.cpp

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ namespace native {
1616

1717
using ::executorch::aten::Tensor;
1818
using ::executorch::runtime::KernelRuntimeContext;
19+
using ::impl::reference::kernels::dequantize;
20+
using ::impl::reference::kernels::quantize;
1921

2022
void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
2123
KernelRuntimeContext& ctx,
@@ -61,25 +63,19 @@ void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
6163
}
6264
} /* if Y is a scalar Tensor */
6365
else if (Y_numel == 1) {
64-
float y =
65-
kernels::dequantize<uint8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
66+
float y = dequantize<uint8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
6667
for (size_t i = 0; i < X_numel; ++i) {
67-
float x =
68-
kernels::dequantize<uint8_t>(X_data[i], X_scale_f, X_zero_point_i32);
68+
float x = dequantize<uint8_t>(X_data[i], X_scale_f, X_zero_point_i32);
6969
float z = x + y;
70-
out_data[i] =
71-
kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
70+
out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
7271
}
7372
} /* if X is a scalar Tensor */
7473
else if (X_numel == 1) {
75-
float x =
76-
kernels::dequantize<uint8_t>(X_data[0], X_scale_f, X_zero_point_i32);
74+
float x = dequantize<uint8_t>(X_data[0], X_scale_f, X_zero_point_i32);
7775
for (size_t i = 0; i < Y_numel; ++i) {
78-
float y =
79-
kernels::dequantize<uint8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
76+
float y = dequantize<uint8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
8077
float z = x + y;
81-
out_data[i] =
82-
kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
78+
out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
8379
}
8480
} /* other broadcasting cases */
8581
else {
@@ -162,13 +158,10 @@ void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
162158
}
163159

164160
/* Apply the operation */
165-
float x = kernels::dequantize<uint8_t>(
166-
X_data[X_idx], X_scale_f, X_zero_point_i32);
167-
float y = kernels::dequantize<uint8_t>(
168-
Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
161+
float x = dequantize<uint8_t>(X_data[X_idx], X_scale_f, X_zero_point_i32);
162+
float y = dequantize<uint8_t>(Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
169163
float z = x + y;
170-
out_data[i] =
171-
kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
164+
out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
172165
}
173166
}
174167
}

0 commit comments

Comments
 (0)