Save some size in dtype_util when dtype selective build is not in use

swolchok · swolchok · commit 54e13b2c9dcf · 2025-04-02T09:13:59.000-07:00
We duplicate a lot of functions depending on the operator name so that dtype selective build will work. We can just detect if dtype selective build is in use and, if not, stop duplicating. Test Plan: Saves 28288 bytes of text in size_test_all_optimized_ops compared to previous PR on my Mac. ghstack-source-id: 2edc60c ghstack-comment-id: 2761913331 Pull-Request-resolved: pytorch/executorch#9742 ghstack-source-id: 2edc60c ghstack-comment-id: 2771183226 Pull Request resolved: pytorch/executorch#9829
diff --git a/kernels/portable/cpu/util/dtype_util.h b/kernels/portable/cpu/util/dtype_util.h
@@ -228,7 +228,7 @@ enum class SupportedTensorDtypes {
 namespace internal {
 
 template <typename CTYPE_COMPUTE, const char* op_name>
-load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn(
+load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn_impl(
     const Tensor& t,
     SupportedTensorDtypes dtypes) {
   switch (dtypes) {
@@ -252,7 +252,7 @@ load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn(
 }
 
 template <typename CTYPE_COMPUTE, const char* op_name>
-store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn(
+store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn_impl(
     const Tensor& t,
     SupportedTensorDtypes dtypes) {
   switch (dtypes) {
@@ -285,6 +285,37 @@ store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn(
   return nullptr;
 }
 
+#ifndef EXECUTORCH_SELECTIVE_BUILD_DTYPE
+constexpr const char kGenericElementwiseOpName[] = "generic_elementwise_op";
+#endif // EXECUTORCH_SELECTIVE_BUILD_DTYPE
+
+template <typename CTYPE_COMPUTE, const char* op_name>
+load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn(
+    const Tensor& t,
+    SupportedTensorDtypes dtypes) {
+  return get_load_to_compute_fn_impl<
+      CTYPE_COMPUTE,
+#ifdef EXECUTORCH_SELECTIVE_BUILD_DTYPE
+      op_name
+#else // EXECUTORCH_SELECTIVE_BUILD_DTYPE
+      kGenericElementwiseOpName
+#endif // EXECUTORCH_SELECTIVE_BUILD_DTYPE
+      >(t, dtypes);
+}
+
+template <typename CTYPE_COMPUTE, const char* op_name>
+store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn(
+    const Tensor& t,
+    SupportedTensorDtypes dtypes) {
+  return get_store_compute_to_tensor_fn_impl<
+      CTYPE_COMPUTE,
+#ifdef EXECUTORCH_SELECTIVE_BUILD_DTYPE
+      op_name
+#else // EXECUTORCH_SELECTIVE_BUILD_DTYPE
+      kGenericElementwiseOpName
+#endif // EXECUTORCH_SELECTIVE_BUILD_DTYPE
+      >(t, dtypes);
+}
 bool check_tensor_dtype(
     const Tensor t,
     SupportedTensorDtypes dtypes,