pytorch
diff --git a/‎backends/cadence/fusion_g3/operators/op_add.cpp‎
Lines changed: 1 addition & 1 deletion b/‎backends/cadence/fusion_g3/operators/op_add.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/hifi/kernels/kernels.cpp‎
Lines changed: 4 additions & 4 deletions b/‎backends/cadence/hifi/kernels/kernels.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎backends/cadence/hifi/operators/op_quantized_relu_out.cpp‎
Lines changed: 4 additions & 4 deletions b/‎backends/cadence/hifi/operators/op_quantized_relu_out.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎backends/cadence/hifi/third-party/nnlib/targets.bzl‎
Lines changed: 4 additions & 0 deletions b/‎backends/cadence/hifi/third-party/nnlib/targets.bzl‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_atan2_f32.c‎
Lines changed: 1 addition & 1 deletion b/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_atan2_f32.c‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_pow_f32.c‎
Lines changed: 1 addition & 1 deletion b/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_pow_f32.c‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/qualcomm/tests/test_qnn_delegate.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/qualcomm/tests/test_qnn_delegate.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/VulkanBackend.cpp‎
Lines changed: 8 additions & 0 deletions b/‎backends/vulkan/runtime/VulkanBackend.cpp‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.cpp‎
Lines changed: 6 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.cpp‎
Lines changed: 6 additions & 0 deletions
@@ -162,7 +162,7 @@ Tensor& add_out(
     float alpha_val;
     torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
 
-    if ((a.numel() == 1) && (alpha_val == 1.0)) {
+    if ((a.numel() == 1) && (alpha_val == 1.0f)) {
       XT_KERNEL_CHECK(
           ctx,
           out,
 
@@ -39,8 +39,8 @@ void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) {
 template <typename T>
 __attribute__((always_inline)) T
 quantize(const float x, float scale, int32_t zero_point) {
-  constexpr float min_val = std::numeric_limits<T>::min();
-  constexpr float max_val = std::numeric_limits<T>::max();
+  constexpr float min_val = static_cast<float>(std::numeric_limits<T>::min());
+  constexpr float max_val = static_cast<float>(std::numeric_limits<T>::max());
   float tmp = roundf(x * scale + zero_point);
   return std::max(std::min(tmp, max_val), min_val);
 }
@@ -56,8 +56,8 @@ void quantize(
   xtfloatx2 scale_vec = (xtfloatx2)scale;
   xtfloatx2 zero_vec = XT_FLOAT_SX2(zero_point, 0);
 
-  constexpr float min_val = std::numeric_limits<T>::min();
-  constexpr float max_val = std::numeric_limits<T>::max();
+  constexpr float min_val = static_cast<float>(std::numeric_limits<T>::min());
+  constexpr float max_val = static_cast<float>(std::numeric_limits<T>::max());
 
   const xtfloatx2* __restrict__ p0 = (const xtfloatx2* __restrict__)x;
   ae_valign va0 = XT_LASX2PP(p0);
 
@@ -9,14 +9,14 @@
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
-using executorch::aten::ScalarType;
-using executorch::aten::Tensor;
-using torch::executor::KernelRuntimeContext;
-
 namespace impl {
 namespace HiFi {
 namespace native {
 
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::KernelRuntimeContext;
+
 void quantized_relu_per_tensor_out(
     KernelRuntimeContext& ctx,
     const Tensor& input,
 
@@ -13,6 +13,10 @@ def define_common_targets():
             "@EXECUTORCH_CLIENTS",
         ],
         compatible_with = ["ovr_config//cpu:xtensa"],
+        compiler_flags = [
+            "-Wno-pointer-sign",
+            "-Wno-incompatible-pointer-types-discards-qualifiers",
+        ],
         deps = [
             "fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib",
         ],
 
@@ -21,7 +21,7 @@
 ******************************************************************************/
 #include <float.h>
 
-#include "../include/NatureDSP_Signal_math.h"
+#include "NatureDSP_Signal_math.h"
 #include "NatureDSP_types.h"
 #include "xa_nn_common.h"
 
 
@@ -20,7 +20,7 @@
 
 ******************************************************************************/
 
-#include "../include/NatureDSP_Signal_math.h"
+#include "NatureDSP_Signal_math.h"
 #include "NatureDSP_types.h"
 #include "xa_nn_common.h"
 
 
@@ -117,6 +117,7 @@ WORD32 xa_nn_elm_where_f32xf32_f32(FLOAT32 * __restrict__ p_out,
         XT_MOVF_S(a, a2, s);
         XT_SSI(a, (xtfloat *)out, 0);
     }
+    return 0;
 }
 
 static void internal_elm_where_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p_out,
 
@@ -5862,6 +5862,9 @@ def setUp(self):
             "gemma3-1b": TestExampleLLMScript.LlmSpecs(
                 SM8650=70, SM8750=100, ppl=23, pte_size=1_200_000_000
             ),  # 1.2 GB
+            "glm-1_5b": TestExampleLLMScript.LlmSpecs(
+                SM8650=42, SM8750=52, ppl=21, pte_size=1_100_000_000
+            ),  # 1.1 GB
             "phi_4_mini": TestExampleLLMScript.LlmSpecs(
                 SM8650=14, SM8750=19, ppl=12, pte_size=4_000_000_000
             ),  # 4GB
 
@@ -179,6 +179,12 @@ GraphConfig get_graph_config(ArrayRef<CompileSpec>& compile_specs) {
         config.expect_dynamic_shapes = true;
       }
     }
+    if (strcmp(spec.key, "warmup_execute_after_compile") == 0) {
+      ET_CHECK_MSG(value_size == sizeof(uint8_t), "Unexpected value size!");
+      bool value = getBool(value_data);
+
+      config.warmup_execute_after_compile = value;
+    }
   }
 #ifdef ET_EVENT_TRACER_ENABLED
   config.enable_querypool = true;
@@ -579,6 +585,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
 
     compute_graph->prepack();
 
+    compute_graph->optional_warmup_execute();
+
     return Error::Ok;
   }
 
 
@@ -1107,6 +1107,12 @@ void ComputeGraph::prepack() {
   }
 }
 
+void ComputeGraph::optional_warmup_execute() {
+  if (config_.warmup_execute_after_compile) {
+    execute();
+  }
+}
+
 void ComputeGraph::execute() {
   if (deferred_cmd_list_.empty()) {
     context_->flush();
Original file line number	Diff line number	Diff line change
`@@ -117,6 +117,7 @@ WORD32 xa_nn_elm_where_f32xf32_f32(FLOAT32 * __restrict__ p_out,`
`117`	`117`	`XT_MOVF_S(a, a2, s);`
`118`	`118`	`XT_SSI(a, (xtfloat *)out, 0);`
`119`	`119`	`}`
	`120`	`+ return 0;`
`120`	`121`	`}`
`121`	`122`
`122`	`123`	`static void internal_elm_where_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p_out,`
Original file line number	Diff line number	Diff line change
`@@ -179,6 +179,12 @@ GraphConfig get_graph_config(ArrayRef<CompileSpec>& compile_specs) {`
`179`	`179`	`config.expect_dynamic_shapes = true;`
`180`	`180`	`}`
`181`	`181`	`}`
	`182`	`+ if (strcmp(spec.key, "warmup_execute_after_compile") == 0) {`
	`183`	`+ ET_CHECK_MSG(value_size == sizeof(uint8_t), "Unexpected value size!");`
	`184`	`+ bool value = getBool(value_data);`
	`185`	`+`
	`186`	`+ config.warmup_execute_after_compile = value;`
	`187`	`+ }`
`182`	`188`	`}`
`183`	`189`	`#ifdef ET_EVENT_TRACER_ENABLED`
`184`	`190`	`config.enable_querypool = true;`
`@@ -579,6 +585,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {`
`579`	`585`
`580`	`586`	`compute_graph->prepack();`
`581`	`587`
	`588`	`+ compute_graph->optional_warmup_execute();`
	`589`	`+`
`582`	`590`	`return Error::Ok;`
`583`	`591`	`}`
`584`	`592`
Original file line number	Diff line number	Diff line change
`@@ -1107,6 +1107,12 @@ void ComputeGraph::prepack() {`
`1107`	`1107`	`}`
`1108`	`1108`	`}`
`1109`	`1109`
	`1110`	`+void ComputeGraph::optional_warmup_execute() {`
	`1111`	`+ if (config_.warmup_execute_after_compile) {`
	`1112`	`+ execute();`
	`1113`	`+ }`
	`1114`	`+}`
	`1115`	`+`
`1110`	`1116`	`void ComputeGraph::execute() {`
`1111`	`1117`	`if (deferred_cmd_list_.empty()) {`
`1112`	`1118`	`context_->flush();`