diff --git a/backends/cadence/fusion_g3/operators/op_add.cpp b/backends/cadence/fusion_g3/operators/op_add.cpp
index 0a7c7e7e035..683323b2662 100644
--- a/backends/cadence/fusion_g3/operators/op_add.cpp
+++ b/backends/cadence/fusion_g3/operators/op_add.cpp
@@ -13,18 +13,27 @@
 #include <executorch/runtime/platform/assert.h>
 #include <xa_nnlib_kernels_api.h>
 
-using exec_aten::Scalar;
-using exec_aten::ScalarType;
-using exec_aten::Tensor;
-using executorch::runtime::canCast;
-using torch::executor::Error;
-using torch::executor::KernelRuntimeContext;
+using ::executorch::aten::Scalar;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::canCast;
+using ::executorch::runtime::Error;
+using ::executorch::runtime::KernelRuntimeContext;
 
 namespace cadence {
 namespace impl {
 namespace G3 {
 namespace native {
 
+#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \
+  const auto ret = kernel(__VA_ARGS__);        \
+  ET_KERNEL_CHECK_MSG(                         \
+      ctx,                                     \
+      ret == 0,                                \
+      InvalidArgument,                         \
+      out,                                     \
+      "Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")");
+
 Tensor& add_out(
     KernelRuntimeContext& ctx,
     const Tensor& a,
@@ -121,13 +130,30 @@ Tensor& add_out(
     torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
 
     if ((a.numel() == 1) && (alpha_val == 1)) {
-      xa_nn_elm_add_scalar_32x32_32(
-          out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_scalar_32x32_32,
+          out_data,
+          inp2_data,
+          inp1_data[0],
+          alpha_val,
+          out.numel());
     } else if (b.numel() == 1) {
-      xa_nn_elm_add_scalar_32x32_32(
-          out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_scalar_32x32_32,
+          out_data,
+          inp1_data,
+          inp2_data[0],
+          alpha_val,
+          out.numel());
     } else if (broadcast) {
-      xa_nn_elm_add_broadcast_5D_32x32_32(
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_broadcast_5D_32x32_32,
           out_data,
           out_shape,
           inp1_data,
@@ -137,8 +163,15 @@ Tensor& add_out(
           max_dim,
           alpha_val);
     } else {
-      xa_nn_elm_add_32x32_32(
-          out_data, inp1_data, inp2_data, alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_32x32_32,
+          out_data,
+          inp1_data,
+          inp2_data,
+          alpha_val,
+          out.numel());
     }
   } else if ((compute_type == ScalarType::Float) && (optimized)) {
     const float* const inp1_data = a.const_data_ptr<float>();
@@ -149,13 +182,30 @@ Tensor& add_out(
     torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
 
     if ((a.numel() == 1) && (alpha_val == 1.0)) {
-      xa_nn_elm_add_scalar_f32xf32_f32(
-          out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_scalar_f32xf32_f32,
+          out_data,
+          inp2_data,
+          inp1_data[0],
+          alpha_val,
+          out.numel());
     } else if (b.numel() == 1) {
-      xa_nn_elm_add_scalar_f32xf32_f32(
-          out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_scalar_f32xf32_f32,
+          out_data,
+          inp1_data,
+          inp2_data[0],
+          alpha_val,
+          out.numel());
     } else if (broadcast) {
-      xa_nn_elm_add_broadcast_5D_f32xf32_f32(
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_broadcast_5D_f32xf32_f32,
           out_data,
           out_shape,
           inp1_data,
@@ -165,8 +215,15 @@ Tensor& add_out(
           max_dim,
           alpha_val);
     } else {
-      xa_nn_elm_add_f32xf32_f32(
-          out_data, inp1_data, inp2_data, alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_f32xf32_f32,
+          out_data,
+          inp1_data,
+          inp2_data,
+          alpha_val,
+          out.numel());
     }
   } else {
     ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
@@ -242,8 +299,15 @@ Tensor& add_scalar_out(
 
     int* const out_data = out.mutable_data_ptr<int>();
 
-    xa_nn_elm_add_scalar_32x32_32(
-        out_data, inp1_data, inp2_val, alpha_val, out.numel());
+    XT_KERNEL_CHECK(
+        ctx,
+        out,
+        xa_nn_elm_add_scalar_32x32_32,
+        out_data,
+        inp1_data,
+        inp2_val,
+        alpha_val,
+        out.numel());
 
   } else if (compute_type == ScalarType::Float) {
     const float* const inp1_data = a.const_data_ptr<float>();
@@ -255,8 +319,15 @@ Tensor& add_scalar_out(
 
     float* const out_data = out.mutable_data_ptr<float>();
 
-    xa_nn_elm_add_scalar_f32xf32_f32(
-        out_data, inp1_data, inp2_val, alpha_val, out.numel());
+    XT_KERNEL_CHECK(
+        ctx,
+        out,
+        xa_nn_elm_add_scalar_f32xf32_f32,
+        out_data,
+        inp1_data,
+        inp2_val,
+        alpha_val,
+        out.numel());
 
   } else {
     ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
diff --git a/backends/cadence/fusion_g3/operators/tests/test_op_add.cpp b/backends/cadence/fusion_g3/operators/tests/test_op_add.cpp
index 06bf4bf4ec1..cbc419d47e1 100644
--- a/backends/cadence/fusion_g3/operators/tests/test_op_add.cpp
+++ b/backends/cadence/fusion_g3/operators/tests/test_op_add.cpp
@@ -10,6 +10,8 @@
 #include <stdio.h>
 
 #include <executorch/backends/cadence/fusion_g3/operators/operators.h>
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
 #include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
@@ -24,24 +26,19 @@ namespace {
 using ::executorch::aten::Scalar;
 using ::executorch::aten::ScalarType;
 using ::executorch::aten::Tensor;
+using ::executorch::aten::TensorImpl;
+using ::executorch::runtime::Error;
 using ::executorch::runtime::KernelRuntimeContext;
 using ::executorch::runtime::runtime_init;
 using ::executorch::runtime::testing::TensorFactory;
-using ::testing::Test;
 
-class FusionG3OperatorTest : public Test {
+class FusionG3OperatorTest : public OperatorTest {
  public:
-  void SetUp() override {
-    runtime_init();
-  }
-
  protected:
   Tensor&
   add_out(const Tensor& a, const Tensor& b, const Scalar& alpha, Tensor& out) {
     return cadence::impl::G3::native::add_out(context_, a, b, alpha, out);
   }
-
-  KernelRuntimeContext context_;
 };
 
 TEST_F(FusionG3OperatorTest, TwoDimFloatTensorAddTest) {
@@ -77,6 +74,26 @@ TEST_F(FusionG3OperatorTest, AddWithBroadcastTest) {
   EXPECT_TENSOR_EQ(out, tf.full(size_a, 2));
 }
 
+TEST_F(FusionG3OperatorTest, KernelCheckTest) {
+  TensorFactory<ScalarType::Float> tf;
+  // Broadcast add.
+  const std::vector<TensorImpl::SizesType> sizeOfA{1, 3, 2, 4}, sizeOfB{2, 4};
+  const Tensor b = tf.ones(sizeOfB);
+  Tensor out = tf.zeros(sizeOfA);
+  // Create a null tensor to force kernel check failure.
+  TensorImpl nullTensorImpl(
+      b.scalar_type(),
+      b.dim(),
+      const_cast<TensorImpl::SizesType*>(b.sizes().data()),
+      // Use nullptr to force kernel check failure.
+      /*data=*/nullptr,
+      const_cast<TensorImpl::DimOrderType*>(b.dim_order().data()));
+  Tensor nullTensor(&nullTensorImpl);
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_, add_out(tf.ones(sizeOfA), nullTensor, 1, out));
+}
+
 } // namespace
 } // namespace native
 } // namespace G3
diff --git a/backends/cadence/runtime/TARGETS b/backends/cadence/runtime/TARGETS
index 95a7bdc3694..4055f1922a1 100644
--- a/backends/cadence/runtime/TARGETS
+++ b/backends/cadence/runtime/TARGETS
@@ -1,3 +1,4 @@
+load(":targets.bzl", "define_common_targets")
 load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
 
 oncall("odai_jarvis")
@@ -22,3 +23,5 @@ python_library(
         "//executorch/exir:lib",
     ],
 )
+
+define_common_targets()
diff --git a/backends/cadence/runtime/et_pal.cpp b/backends/cadence/runtime/et_pal.cpp
new file mode 100644
index 00000000000..fdf058f05b3
--- /dev/null
+++ b/backends/cadence/runtime/et_pal.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#if defined(XTENSA)
+
+#include <stdio.h>
+#include <sys/times.h>
+
+#include <xtensa/sim.h>
+
+#include <executorch/runtime/platform/platform.h>
+
+#define ET_LOG_OUTPUT_FILE stdout
+
+void et_pal_emit_log_message(
+    et_timestamp_t timestamp,
+    et_pal_log_level_t level,
+    const char* filename,
+    ET_UNUSED const char* function,
+    size_t line,
+    const char* message,
+    ET_UNUSED size_t length) {
+  // Not all platforms have ticks == nanoseconds, but this one does.
+  timestamp /= 1000; // To microseconds
+  int us = timestamp % 1000000;
+  timestamp /= 1000000; // To seconds
+  int sec = timestamp % 60;
+  timestamp /= 60; // To minutes
+  int min = timestamp % 60;
+  timestamp /= 60; // To hours
+  int hour = timestamp;
+
+  fprintf(
+      ET_LOG_OUTPUT_FILE,
+      "%c %02d:%02d:%02d.%06d executorch:%s:%d] %s\n",
+      static_cast<char>(level),
+      hour,
+      min,
+      sec,
+      us,
+      filename,
+      static_cast<int>(line),
+      message);
+  fflush(ET_LOG_OUTPUT_FILE);
+}
+
+et_timestamp_t et_pal_current_ticks(void) {
+  struct tms curr_time;
+  times(&curr_time);
+  return curr_time.tms_utime;
+}
+
+void et_pal_init(void) {
+  xt_iss_client_command("all", "enable");
+}
+
+#else
+
+#include <time.h>
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <executorch/runtime/platform/platform.h>
+
+#define ET_LOG_OUTPUT_FILE stderr
+
+#define NSEC_PER_USEC 1000UL
+#define USEC_IN_SEC 1000000UL
+#define NSEC_IN_USEC 1000UL
+#define NSEC_IN_SEC (NSEC_IN_USEC * USEC_IN_SEC)
+
+et_timestamp_t et_pal_current_ticks(void) {
+  struct timespec ts;
+  auto ret = clock_gettime(CLOCK_REALTIME, &ts);
+  if (ret != 0) {
+    fprintf(ET_LOG_OUTPUT_FILE, "Could not get time\n");
+    fflush(ET_LOG_OUTPUT_FILE);
+    std::abort();
+  }
+
+  return ((ts.tv_sec * NSEC_IN_SEC) + (ts.tv_nsec));
+}
+
+#endif
diff --git a/backends/cadence/runtime/targets.bzl b/backends/cadence/runtime/targets.bzl
new file mode 100644
index 00000000000..dabe42ad824
--- /dev/null
+++ b/backends/cadence/runtime/targets.bzl
@@ -0,0 +1,15 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+def define_common_targets():
+    runtime.cxx_library(
+        name = "et_pal",
+        srcs = ["et_pal.cpp"],
+        link_whole = True,
+        visibility = [
+            "//executorch/backends/cadence/...",
+            "@EXECUTORCH_CLIENTS"
+        ],
+        exported_deps = [
+            "//executorch/runtime/platform:platform",
+        ],
+    )
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
index 2dd019e1b3e..18fa646aec4 100644
--- a/kernels/test/targets.bzl
+++ b/kernels/test/targets.bzl
@@ -41,6 +41,29 @@ def define_common_targets():
 
     for aten_kernel in (True, False):
         aten_suffix = "_aten" if aten_kernel else ""
+        runtime.cxx_library(
+            name = "gtest_utils" + aten_suffix,
+            exported_headers=[
+                "TestUtil.h",
+            ],
+            visibility = [
+                "//executorch/kernels/...",
+                "@EXECUTORCH_CLIENTS",
+            ],
+            preprocessor_flags = ["-DUSE_ATEN_LIB"] if aten_kernel else [],
+            exported_deps = [
+                "//executorch/runtime/core:core",
+                "//executorch/runtime/kernel:kernel_includes",
+                "//executorch/test/utils:utils" + aten_suffix,
+                "//executorch/runtime/platform:pal_interface",
+            ],
+            fbcode_exported_deps = [
+                "//common/gtest:gtest",
+            ],
+            xplat_exported_deps = [
+                "//third-party/googletest:gtest_main",
+            ],
+        )
         runtime.cxx_library(
             name = "test_util" + aten_suffix,
             srcs = [
@@ -49,7 +72,6 @@ def define_common_targets():
             ],
             exported_headers = [
                 "BinaryLogicalOpTest.h",
-                "TestUtil.h",
                 "UnaryUfuncRealHBBF16ToFloatHBF16Test.h",
             ],
             visibility = [
@@ -59,6 +81,7 @@ def define_common_targets():
             preprocessor_flags = ["-DUSE_ATEN_LIB"] if aten_kernel else [],
             exported_deps = [
                 ":supported_features_header",
+                ":gtest_utils",
                 "//executorch/runtime/core/exec_aten:lib" + aten_suffix,
                 "//executorch/runtime/core/exec_aten/testing_util:tensor_util" + aten_suffix,
                 "//executorch/runtime/kernel:kernel_includes",