Skip to content

Commit 4f121fd

Browse files
authored
Merge branch 'main' into Lokesh9106-patch-1
2 parents f54dc1a + 12d17ef commit 4f121fd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1043
-112
lines changed

.github/workflows/add-unanswered-to-project.yml

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,31 +20,32 @@ jobs:
2020
2121
// List of authors to exclude
2222
const excludedAuthors = new Set([
23-
"nil-is-all", "cbilgin", "kimishpatel", "psiddh", "digantdesai", "SS-JIA", "ahmtox", "mcr229", "shoumikhin",
24-
"manuelcandales", "metascroy", "cccclai", "rohansjoshi", "kirklandsign", "abhinaykukkadapu", "JacobSzwejbka",
25-
"Conarnar", "lucylq", "larryliu0820", "BujSet", "Gasoonjia", "Juntian777", "guangy10", "jackzhxng",
26-
"GregoryComer", "leafs1", "swolchok", "mergennachin", "tarun292", "byjlw", "jathu", "Jack-Khuu", "georgehong",
23+
"nil-is-all", "tanvirislam-meta", "cbilgin", "kimishpatel", "psiddh", "digantdesai", "SS-JIA", "ahmtox", "mcr229",
24+
"shoumikhin", "manuelcandales", "metascroy", "cccclai", "rohansjoshi", "kirklandsign", "abhinaykukkadapu",
25+
"JacobSzwejbka", "Conarnar", "lucylq", "larryliu0820", "BujSet", "Gasoonjia", "Juntian777", "guangy10", "jackzhxng",
26+
"GregoryComer", "leafs1", "swolchok", "mergennachin", "tarun292", "byjlw", "jathu", "Jack-Khuu", "georgehong",
2727
"zhenyan-zhang-meta", "silverguo", "harishs88ss", "AlannaBurke", "dbort", "huydhn", "mcremon-meta", "trivedivivek",
28-
"angelayi", "helunwencser", "hsharma35", "zhxchen17", "iseeyuan", "svekars", "nathanaelsee", "dulinriley", "jerryzh168",
29-
"cmodi-meta", "bigfootjon", "sxu", "ydwu4", "Riandy", "tugsbayasgalan", "bsoyluoglu", "yangw-dev", "YIWENX14",
30-
"namanahuja", "yushangdi", "limintang", "pianpwk", "viveknayakatmeta", "andreanicastro", "JakeStevens",
28+
"angelayi", "helunwencser", "hsharma35", "zhxchen17", "iseeyuan", "svekars", "nathanaelsee", "dulinriley",
29+
"jerryzh168", "cmodi-meta", "bigfootjon", "sxu", "ydwu4", "Riandy", "tugsbayasgalan", "bsoyluoglu", "yangw-dev",
30+
"YIWENX14", "namanahuja", "yushangdi", "limintang", "pianpwk", "viveknayakatmeta", "andreanicastro", "JakeStevens",
3131
"gmagogsfm", "zonglinpeng", "eigen-k", "derekxu", "salilsdesai", "skrtskrtfb", "pssrawat", "r-barnes",
3232
"kalpit-meta-1", "Will-MingLun-Li", "KapJI", "piyengar", "j-bahr", "BoyuanFeng", "fgasperij", "DariusHolmgren",
3333
"sammarden-meta", "kushrast", "meta-emilian", "Rittzz", "jeanschmidt", "copyrightly", "mikekgfb", "vmpuri",
34-
"zonglinpengmeta", "maggiemoss", "aorenste", "hoangminhle98", "Solumin", "meyering", "rchen152",
35-
"AishwaryaSivaraman", "migeed-z", "ebgraham", "Esteb37", "nausicaasnow", "Camyll", "ezyang", "huiyujie",
36-
"dltn", "cjhopman", "blackm00n", "agunapal", "SamGondelman", "Ninja91", "ivayloen", "DrJessop", "rodrigos01meta",
37-
"akrieger", "cmt0", "yiming0416", "ethansfng", "ThomasJannaud", "nirvanagth", "marcinkwiatkowski", "3l1",
38-
"omerjerk", "nitish2112", "yipjustin", "ejnguyen", "andrewor14", "phaiting", "mgiordy", "LeeOHzzZ", "adicatana",
39-
"Polyomino", "ezrilow", "navsud", "YifanShenSZ", "RdoubleA", "Olivia-liu", "Abhi-hpp", "Vysarat", "azad-meta",
40-
"pytorchbot", "pytorchmergebot", "pytorchupdatebot", "facebook-github-bot", "app/dependabot", "Erik-Lundell",
41-
"zingo", "AdrianLundell", "oscarandersson8218", "per", "Sebastian-Larsson", "SaoirseARM", "robell", "mansnils",
42-
"martinlsm", "freddan80", "YufengShi-dudu", "tom-arm", "perheld", "Jerry-Ge", "gggekov", "fumchin", "wwwind",
34+
"zonglinpengmeta", "maggiemoss", "aorenste", "hoangminhle98", "Solumin", "meyering", "rchen152", "AishwaryaSivaraman",
35+
"migeed-z", "ebgraham", "Esteb37", "nausicaasnow", "Camyll", "ezyang", "huiyujie", "dltn", "cjhopman", "blackm00n",
36+
"agunapal", "SamGondelman", "Ninja91", "ivayloen", "DrJessop", "rodrigos01meta", "akrieger", "cmt0", "yiming0416",
37+
"ethansfng", "ThomasJannaud", "nirvanagth", "marcinkwiatkowski", "3l1", "omerjerk", "nitish2112", "yipjustin",
38+
"ejnguyen", "andrewor14", "phaiting", "mgiordy", "LeeOHzzZ", "adicatana", "Polyomino", "ezrilow", "navsud",
39+
"michaelmaitland", "RahulC7", "seyeong-han", "YifanShenSZ", "RdoubleA", "Olivia-liu", "Abhi-hpp", "Vysarat",
40+
"azad-meta", "junpi", "pytorchbot", "pytorchmergebot", "pytorchupdatebot", "facebook-github-bot", "app/dependabot",
41+
"Erik-Lundell", "zingo", "AdrianLundell", "oscarandersson8218", "per", "Sebastian-Larsson", "SaoirseARM", "robell",
42+
"mansnils", "martinlsm", "freddan80", "YufengShi-dudu", "tom-arm", "perheld", "Jerry-Ge", "gggekov", "fumchin", "wwwind",
4343
"benkli01", "Tessil", "maddun01", "Michiel-Olieslagers", "armwaheed", "agrima1304", "emmakujala", "annietllnd",
44-
"haowhsu-quic", "shewu-quic", "winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "thchenqti",
45-
"jethroqti", "cymbalrush", "DenisVieriu97", "billmguo", "StrycekSimon", "jirioc", "robert-kalmar", "skywall",
46-
"MartinPavella", "roman-janik-nxp", "novak-vaclav ", "neuropilot-captain", "dijopaul", "cad-rlc", "cad-audio",
47-
"ynimmaga", "daniil-lyakhov", "emmanuel-ferdman", "cavusmustafa", "Jiseong-oh", "alexdean08"
44+
"MatthiasHertel80", "AlexTawseArm", "jmahbs", "haowhsu-quic", "shewu-quic", "winskuo-quic", "chunit-quic",
45+
"DannyYuyang-quic", "chuntl", "thchenqti", "jethroqti", "chenweng-quic", "cymbalrush", "DenisVieriu97", "billmguo",
46+
"StrycekSimon", "jirioc", "robert-kalmar", "skywall", "MartinPavella", "roman-janik-nxp", "novak-vaclav ",
47+
"neuropilot-captain", "dijopaul", "cad-rlc", "cad-audio", "ynimmaga", "daniil-lyakhov", "emmanuel-ferdman",
48+
"cavusmustafa", "Jiseong-oh", "alexdean08"
4849
]);
4950
5051
async function addItem(contentId, type, number) {

.github/workflows/cuda.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
strategy:
2525
fail-fast: false
2626
matrix:
27-
cuda-version: ["12.6", "12.8", "13.0"]
27+
cuda-version: ["12.6", "12.8", "12.9", "13.0"]
2828

2929
name: test-executorch-cuda-build-${{ matrix.cuda-version }}
3030
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

backends/cadence/fusion_g3/operators/op_add.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ Tensor& add_out(
162162
float alpha_val;
163163
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
164164

165-
if ((a.numel() == 1) && (alpha_val == 1.0)) {
165+
if ((a.numel() == 1) && (alpha_val == 1.0f)) {
166166
XT_KERNEL_CHECK(
167167
ctx,
168168
out,

backends/cadence/hifi/kernels/kernels.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) {
3939
template <typename T>
4040
__attribute__((always_inline)) T
4141
quantize(const float x, float scale, int32_t zero_point) {
42-
constexpr float min_val = std::numeric_limits<T>::min();
43-
constexpr float max_val = std::numeric_limits<T>::max();
42+
constexpr float min_val = static_cast<float>(std::numeric_limits<T>::min());
43+
constexpr float max_val = static_cast<float>(std::numeric_limits<T>::max());
4444
float tmp = roundf(x * scale + zero_point);
4545
return std::max(std::min(tmp, max_val), min_val);
4646
}
@@ -56,8 +56,8 @@ void quantize(
5656
xtfloatx2 scale_vec = (xtfloatx2)scale;
5757
xtfloatx2 zero_vec = XT_FLOAT_SX2(zero_point, 0);
5858

59-
constexpr float min_val = std::numeric_limits<T>::min();
60-
constexpr float max_val = std::numeric_limits<T>::max();
59+
constexpr float min_val = static_cast<float>(std::numeric_limits<T>::min());
60+
constexpr float max_val = static_cast<float>(std::numeric_limits<T>::max());
6161

6262
const xtfloatx2* __restrict__ p0 = (const xtfloatx2* __restrict__)x;
6363
ae_valign va0 = XT_LASX2PP(p0);

backends/cadence/hifi/operators/op_quantized_relu_out.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <executorch/backends/cadence/common/xt_macros.h>
910
#include <executorch/backends/cadence/hifi/kernels/kernels.h>
1011
#include <executorch/runtime/kernel/kernel_includes.h>
1112

12-
using executorch::aten::ScalarType;
13-
using executorch::aten::Tensor;
14-
using torch::executor::KernelRuntimeContext;
15-
1613
namespace impl {
1714
namespace HiFi {
1815
namespace native {
1916

17+
using ::executorch::aten::ScalarType;
18+
using ::executorch::aten::Tensor;
19+
using ::executorch::runtime::KernelRuntimeContext;
20+
2021
void quantized_relu_per_tensor_out(
2122
KernelRuntimeContext& ctx,
2223
const Tensor& input,
@@ -34,7 +35,10 @@ void quantized_relu_per_tensor_out(
3435
const uint8_t* p_in = input.const_data_ptr<uint8_t>();
3536
uint8_t* p_out = output.mutable_data_ptr<uint8_t>();
3637

37-
WORD32 ret_val = xa_nn_vec_relu_asym8u_asym8u(
38+
XT_KERNEL_CHECK(
39+
ctx,
40+
,
41+
xa_nn_vec_relu_asym8u_asym8u,
3842
p_out,
3943
p_in,
4044
_in_zero_point,
@@ -45,15 +49,16 @@ void quantized_relu_per_tensor_out(
4549
255,
4650
input.numel());
4751

48-
ET_CHECK_MSG(ret_val == 0, "An internal error occured");
49-
5052
} else if (input.scalar_type() == executorch::aten::ScalarType::Char) {
51-
const int8_t _in_zero_point = static_cast<int8_t>(in_zero_point);
52-
const int8_t _out_zero_point = static_cast<int8_t>(out_zero_point);
53+
const int _in_zero_point = static_cast<int>(in_zero_point);
54+
const int _out_zero_point = static_cast<int>(out_zero_point);
5355
const int8_t* p_in = input.const_data_ptr<int8_t>();
5456
int8_t* p_out = output.mutable_data_ptr<int8_t>();
5557

56-
WORD32 ret_val = xa_nn_vec_relu_asym8s_asym8s(
58+
XT_KERNEL_CHECK(
59+
ctx,
60+
,
61+
xa_nn_vec_relu_asym8s_asym8s,
5762
p_out,
5863
p_in,
5964
_in_zero_point,
@@ -64,8 +69,6 @@ void quantized_relu_per_tensor_out(
6469
127,
6570
input.numel());
6671

67-
ET_CHECK_MSG(ret_val == 0, "An internal error occured");
68-
6972
} else {
7073
ET_CHECK_MSG(
7174
false,

backends/cadence/hifi/operators/tests/test_op_quantized_relu_out.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ class HiFiQuantizedReluTest : public OperatorTest {
5757

5858
TEST_F(HiFiQuantizedReluTest, MultiDimensionalTest) {
5959
TensorFactory<ScalarType::Char> tf_chars;
60+
TensorFactory<ScalarType::Int> tf_ints;
6061
const std::vector<int32_t> sizes{2, 3, 5, 6};
6162
Tensor quantized_input = tf_chars.full(sizes, -128);
6263
Tensor quantized_output = tf_chars.full(sizes, 100);
6364
Tensor in_zero_point = tf_chars.full({1}, 127);
6465
int64_t out_zero_point = -128;
65-
Tensor out_multiplier =
66-
TensorFactory<ScalarType::Int>().full({1}, 1077952640);
67-
Tensor out_shift = TensorFactory<ScalarType::Int>().full({1}, 5);
66+
Tensor out_multiplier = tf_ints.full({1}, 1077952640);
67+
Tensor out_shift = tf_ints.full({1}, 5);
6868

6969
quantized_relu_out(
7070
quantized_input,
@@ -80,14 +80,14 @@ TEST_F(HiFiQuantizedReluTest, MultiDimensionalTest) {
8080

8181
TEST_F(HiFiQuantizedReluTest, OneDimensionalTest) {
8282
TensorFactory<ScalarType::Char> tf_chars;
83+
TensorFactory<ScalarType::Int> tf_ints;
8384
const std::vector<int32_t> sizes{56};
8485
Tensor quantized_input = tf_chars.full(sizes, -128);
8586
Tensor quantized_output = tf_chars.full(sizes, 100);
8687
Tensor in_zero_point = tf_chars.full({1}, 127);
8788
int64_t out_zero_point = -128;
88-
Tensor out_multiplier =
89-
TensorFactory<ScalarType::Int>().full({1}, 1077952640);
90-
Tensor out_shift = TensorFactory<ScalarType::Int>().full({1}, 5);
89+
Tensor out_multiplier = tf_ints.full({1}, 1077952640);
90+
Tensor out_shift = tf_ints.full({1}, 5);
9191

9292
quantized_relu_out(
9393
quantized_input,

backends/cadence/hifi/third-party/nnlib/targets.bzl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ def define_common_targets():
1313
"@EXECUTORCH_CLIENTS",
1414
],
1515
compatible_with = ["ovr_config//cpu:xtensa"],
16+
compiler_flags = [
17+
"-Wno-pointer-sign",
18+
"-Wno-incompatible-pointer-types-discards-qualifiers",
19+
],
1620
deps = [
1721
"fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib",
1822
],

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_atan2_f32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
******************************************************************************/
2222
#include <float.h>
2323

24-
#include "../include/NatureDSP_Signal_math.h"
24+
#include "NatureDSP_Signal_math.h"
2525
#include "NatureDSP_types.h"
2626
#include "xa_nn_common.h"
2727

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_pow_f32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
2121
******************************************************************************/
2222

23-
#include "../include/NatureDSP_Signal_math.h"
23+
#include "NatureDSP_Signal_math.h"
2424
#include "NatureDSP_types.h"
2525
#include "xa_nn_common.h"
2626

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ WORD32 xa_nn_elm_where_f32xf32_f32(FLOAT32 * __restrict__ p_out,
117117
XT_MOVF_S(a, a2, s);
118118
XT_SSI(a, (xtfloat *)out, 0);
119119
}
120+
return 0;
120121
}
121122

122123
static void internal_elm_where_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p_out,

0 commit comments

Comments
 (0)