Skip to content

Commit a9c9e7b

Browse files
authored
Merge branch 'main' into export-D87519599
2 parents c49c0ae + f84d45b commit a9c9e7b

33 files changed

+392
-33
lines changed

backends/cadence/fusion_g3/operators/op_add.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ Tensor& add_out(
162162
float alpha_val;
163163
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
164164

165-
if ((a.numel() == 1) && (alpha_val == 1.0)) {
165+
if ((a.numel() == 1) && (alpha_val == 1.0f)) {
166166
XT_KERNEL_CHECK(
167167
ctx,
168168
out,

backends/cadence/hifi/kernels/kernels.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) {
3939
template <typename T>
4040
__attribute__((always_inline)) T
4141
quantize(const float x, float scale, int32_t zero_point) {
42-
constexpr float min_val = std::numeric_limits<T>::min();
43-
constexpr float max_val = std::numeric_limits<T>::max();
42+
constexpr float min_val = static_cast<float>(std::numeric_limits<T>::min());
43+
constexpr float max_val = static_cast<float>(std::numeric_limits<T>::max());
4444
float tmp = roundf(x * scale + zero_point);
4545
return std::max(std::min(tmp, max_val), min_val);
4646
}
@@ -56,8 +56,8 @@ void quantize(
5656
xtfloatx2 scale_vec = (xtfloatx2)scale;
5757
xtfloatx2 zero_vec = XT_FLOAT_SX2(zero_point, 0);
5858

59-
constexpr float min_val = std::numeric_limits<T>::min();
60-
constexpr float max_val = std::numeric_limits<T>::max();
59+
constexpr float min_val = static_cast<float>(std::numeric_limits<T>::min());
60+
constexpr float max_val = static_cast<float>(std::numeric_limits<T>::max());
6161

6262
const xtfloatx2* __restrict__ p0 = (const xtfloatx2* __restrict__)x;
6363
ae_valign va0 = XT_LASX2PP(p0);

backends/cadence/hifi/operators/op_quantized_relu_out.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@
99
#include <executorch/backends/cadence/hifi/kernels/kernels.h>
1010
#include <executorch/runtime/kernel/kernel_includes.h>
1111

12-
using executorch::aten::ScalarType;
13-
using executorch::aten::Tensor;
14-
using torch::executor::KernelRuntimeContext;
15-
1612
namespace impl {
1713
namespace HiFi {
1814
namespace native {
1915

16+
using ::executorch::aten::ScalarType;
17+
using ::executorch::aten::Tensor;
18+
using ::executorch::runtime::KernelRuntimeContext;
19+
2020
void quantized_relu_per_tensor_out(
2121
KernelRuntimeContext& ctx,
2222
const Tensor& input,

backends/cadence/hifi/third-party/nnlib/targets.bzl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ def define_common_targets():
1313
"@EXECUTORCH_CLIENTS",
1414
],
1515
compatible_with = ["ovr_config//cpu:xtensa"],
16+
compiler_flags = [
17+
"-Wno-pointer-sign",
18+
"-Wno-incompatible-pointer-types-discards-qualifiers",
19+
],
1620
deps = [
1721
"fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib",
1822
],

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_atan2_f32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
******************************************************************************/
2222
#include <float.h>
2323

24-
#include "../include/NatureDSP_Signal_math.h"
24+
#include "NatureDSP_Signal_math.h"
2525
#include "NatureDSP_types.h"
2626
#include "xa_nn_common.h"
2727

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_pow_f32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
2121
******************************************************************************/
2222

23-
#include "../include/NatureDSP_Signal_math.h"
23+
#include "NatureDSP_Signal_math.h"
2424
#include "NatureDSP_types.h"
2525
#include "xa_nn_common.h"
2626

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ WORD32 xa_nn_elm_where_f32xf32_f32(FLOAT32 * __restrict__ p_out,
117117
XT_MOVF_S(a, a2, s);
118118
XT_SSI(a, (xtfloat *)out, 0);
119119
}
120+
return 0;
120121
}
121122

122123
static void internal_elm_where_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p_out,

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5862,6 +5862,9 @@ def setUp(self):
58625862
"gemma3-1b": TestExampleLLMScript.LlmSpecs(
58635863
SM8650=70, SM8750=100, ppl=23, pte_size=1_200_000_000
58645864
), # 1.2 GB
5865+
"glm-1_5b": TestExampleLLMScript.LlmSpecs(
5866+
SM8650=42, SM8750=52, ppl=21, pte_size=1_100_000_000
5867+
), # 1.1 GB
58655868
"phi_4_mini": TestExampleLLMScript.LlmSpecs(
58665869
SM8650=14, SM8750=19, ppl=12, pte_size=4_000_000_000
58675870
), # 4GB

backends/vulkan/runtime/VulkanBackend.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,12 @@ GraphConfig get_graph_config(ArrayRef<CompileSpec>& compile_specs) {
179179
config.expect_dynamic_shapes = true;
180180
}
181181
}
182+
if (strcmp(spec.key, "warmup_execute_after_compile") == 0) {
183+
ET_CHECK_MSG(value_size == sizeof(uint8_t), "Unexpected value size!");
184+
bool value = getBool(value_data);
185+
186+
config.warmup_execute_after_compile = value;
187+
}
182188
}
183189
#ifdef ET_EVENT_TRACER_ENABLED
184190
config.enable_querypool = true;
@@ -579,6 +585,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
579585

580586
compute_graph->prepack();
581587

588+
compute_graph->optional_warmup_execute();
589+
582590
return Error::Ok;
583591
}
584592

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,12 @@ void ComputeGraph::prepack() {
11071107
}
11081108
}
11091109

1110+
void ComputeGraph::optional_warmup_execute() {
1111+
if (config_.warmup_execute_after_compile) {
1112+
execute();
1113+
}
1114+
}
1115+
11101116
void ComputeGraph::execute() {
11111117
if (deferred_cmd_list_.empty()) {
11121118
context_->flush();

0 commit comments

Comments
 (0)