added conditions to call the kernels only when the input datatypes are of the same type. Else, the generic implementation will be called

cmadhira@cadence.com · cmadhira@cadence.com · commit 9edc3edee0e0 · 2025-01-22T17:28:33.000+05:30
diff --git a/backends/cadence/fusion_g3/operators/op_sub.cpp b/backends/cadence/fusion_g3/operators/op_sub.cpp
@@ -34,13 +34,9 @@ Tensor& sub_out(
     const Tensor& b,
     const Scalar& alpha,
     Tensor& out) {
-  // Common Dtype
-  ScalarType common_type =
-      executorch::runtime::promoteTypes(a.scalar_type(), b.scalar_type());
-#ifdef OP_ARG_CHECK
   ScalarType alpha_type =
       torch::executor::native::utils::get_scalar_dtype(alpha);
-
+#ifdef OP_ARG_CHECK
   // Check alpha type
   ET_KERNEL_CHECK(ctx, alpha_type != ScalarType::Bool, InvalidArgument, out);
 
@@ -67,10 +63,6 @@ Tensor& sub_out(
       out);
 #endif
 
-  // Compute Dtype
-  ScalarType compute_type =
-      torch::executor::native::utils::get_compute_type(common_type);
-
   // @lint-ignore CLANGTIDY facebook-hte-CArray
   static constexpr const char op_name[] = "sub.out";
 
@@ -115,11 +107,14 @@ Tensor& sub_out(
     }
   }
 
-  if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) {
+  if (((broadcast == 1) && (max_dim > kTensorDimensionLimit)) ||
+    (!(((a.scalar_type() == ScalarType::Int) || (a.scalar_type() == ScalarType::Float)) &&
+    (a.scalar_type() == b.scalar_type()) && (a.scalar_type() == out.scalar_type())
+     && (a.scalar_type() == alpha_type)))) {
     optimized = 0;
   }
 
-  if ((compute_type == ScalarType::Int) && (optimized)) {
+  if ((a.scalar_type() == ScalarType::Int) && (optimized)) {
     const int* const inp1_data = a.const_data_ptr<int>();
     const int* const inp2_data = b.const_data_ptr<int>();
     int* const out_data = out.mutable_data_ptr<int>();
@@ -161,7 +156,7 @@ Tensor& sub_out(
           alpha_val,
           out.numel());
     }
-  } else if ((compute_type == ScalarType::Float) && (optimized)) {
+  } else if ((a.scalar_type() == ScalarType::Float) && (optimized)) {
     const float* const inp1_data = a.const_data_ptr<float>();
     const float* const inp2_data = b.const_data_ptr<float>();
     float* const out_data = out.mutable_data_ptr<float>();
@@ -204,6 +199,13 @@ Tensor& sub_out(
           out.numel());
     }
   } else {
+    // Common Dtype
+    ScalarType common_type =
+    executorch::runtime::promoteTypes(a.scalar_type(), b.scalar_type());
+    // Compute Dtype
+    ScalarType compute_type =
+      torch::executor::native::utils::get_compute_type(common_type);
+
     ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
       const CTYPE_COMPUTE val_alpha =
           torch::executor::native::utils::scalar_to<CTYPE_COMPUTE>(alpha);
@@ -232,14 +234,9 @@ Tensor& sub_scalar_out(
     const Scalar& b,
     const Scalar& alpha,
     Tensor& out) {
-  // Common Dtype
-  ScalarType common_type =
-      torch::executor::native::utils::promote_type_with_scalar(
-          a.scalar_type(), b);
-#ifdef OP_ARG_CHECK
   ScalarType alpha_type =
       torch::executor::native::utils::get_scalar_dtype(alpha);
-
+#ifdef OP_ARG_CHECK
   // Check alpha type
   ET_KERNEL_CHECK(ctx, alpha_type != ScalarType::Bool, InvalidArgument, out);
 
@@ -265,14 +262,19 @@ Tensor& sub_scalar_out(
       out);
 #endif
 
-  // Compute Dtype
-  ScalarType compute_type =
-      torch::executor::native::utils::get_compute_type(common_type);
-
   // @lint-ignore CLANGTIDY facebook-hte-CArray
   static constexpr const char op_name[] = "sub.Scalar_out";
 
-  if (compute_type == ScalarType::Int) {
+  bool optimized = 1;
+  ScalarType b_type = torch::executor::native::utils::get_scalar_dtype(b);
+
+  if (!(((a.scalar_type() == ScalarType::Int) || (a.scalar_type() == ScalarType::Float)) &&
+    (a.scalar_type() == b_type) && (a.scalar_type() == out.scalar_type())
+     && (a.scalar_type() == alpha_type))) {
+    optimized = 0;
+  }
+
+  if ((a.scalar_type() == ScalarType::Int) && (optimized)) {
     const int* const inp1_data = a.const_data_ptr<int>();
     int inp2_val;
     torch::executor::native::utils::extract_scalar(b, &inp2_val);
@@ -291,7 +293,7 @@ Tensor& sub_scalar_out(
         inp2_val,
         alpha_val,
         out.numel());
-  } else if (compute_type == ScalarType::Float) {
+  } else if ((a.scalar_type() == ScalarType::Float) && (optimized)) {
     const float* const inp1_data = a.const_data_ptr<float>();
     float inp2_val;
     torch::executor::native::utils::extract_scalar(b, &inp2_val);
@@ -311,6 +313,13 @@ Tensor& sub_scalar_out(
         alpha_val,
         out.numel());
   } else {
+    // Common Dtype
+    ScalarType common_type =
+      torch::executor::native::utils::promote_type_with_scalar(
+          a.scalar_type(), b);
+    // Compute Dtype
+    ScalarType compute_type =
+      torch::executor::native::utils::get_compute_type(common_type);
     ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
       const CTYPE_COMPUTE val_b =
           torch::executor::native::utils::scalar_to<CTYPE_COMPUTE>(b);