pytorch · Tanish2101 · Jul 3, 2025 · Jul 3, 2025 · Jul 9, 2025 · Jul 12, 2025
@@ -323,6 +323,7 @@ def gen_unboxing(
     use_aten_lib: bool,
     kernel_index: ETKernelIndex,
     manual_registration: bool,
+    lib_name: Optional[str] = None,
     add_exception_boundary: bool = False,
 ) -> None:
     # Iterable type for write_sharded is a Tuple of (native_function, (kernel_key, metadata))
@@ -339,7 +340,9 @@ def key_func(
 
     header = ["Functions.h" if use_aten_lib else "NativeFunctions.h"]
     filename = (
-        "RegisterKernels.cpp"
+        f"register_{lib_name}_kernels.cpp"
+        if manual_registration and lib_name
+        else "RegisterKernels.cpp"
         if manual_registration
         else "RegisterCodegenUnboxedKernels.cpp"
     )
@@ -356,9 +359,10 @@ def key_func(
             "fn_header": (
                 header if unbox_kernel_entry == items[0] else []
             ),  # Only write header once
+            "lib_name": lib_name or "all",
         },
         num_shards=1,
-        sharded_keys={"unboxed_kernels", "fn_header"},
+        sharded_keys={"unboxed_kernels", "fn_header", "lib_name"},
     )
 
 
@@ -953,6 +957,12 @@ def main() -> None:
         help="a boolean flag to indicate whether we want to manually call"
         "register_kernels() or rely on static init. ",
     )
+    parser.add_argument(
+        "--lib-name",
+        type=str,
+        default=None,
+        help="Optional library name used to customize the generated register_<lib>_kernels() function and file names.",
+    )
     parser.add_argument(
         "--generate",
         type=str,
@@ -1015,6 +1025,7 @@ def main() -> None:
             kernel_index=kernel_index,
             manual_registration=options.manual_registration,
             add_exception_boundary=options.add_exception_boundary,
+            lib_name=options.lib_name,
         )
         if custom_ops_native_functions:
             gen_custom_ops(

@@ -15,14 +15,14 @@
 namespace torch {
 namespace executor {
 
-Error register_all_kernels() {
+Error register_${lib_name}_kernels() {
   Kernel kernels_to_register[] = {
       ${unboxed_kernels} // Generated kernels
   };
   Error success_with_kernel_reg =
       ::executorch::runtime::register_kernels({kernels_to_register});
   if (success_with_kernel_reg != Error::Ok) {
-    ET_LOG(Error, "Failed register all kernels");
+    ET_LOG(Error, "Failed to register ${lib_name} kernels");
     return success_with_kernel_reg;
   }
   return Error::Ok;

@@ -16,7 +16,7 @@
 namespace torch {
 namespace executor {
 
-Error register_all_kernels();
+Error register_${lib_name}_kernels();
 
 } // namespace executor
 } // namespace torch
@@ -36,17 +36,20 @@ The basic flow looks like this:
 
 ## APIs
 
-We expose a CMake macro `[gen_selected_ops](https://github.com/pytorch/executorch/blob/main/tools/cmake/Codegen.cmake#L12)`, to allow users specifying op info:
+We expose a CMake macro [gen_selected_ops](https://github.com/pytorch/executorch/blob/main/tools/cmake/Codegen.cmake#L12), to allow users specifying op info:
 
 ```
 gen_selected_ops(
-  LIB_NAME         # the name of the selective build operator library to be generated
-  OPS_SCHEMA_YAML  # path to a yaml file containing operators to be selected
-  ROOT_OPS         # comma separated operator names to be selected
-  INCLUDE_ALL_OPS  # boolean flag to include all operators
+  LIB_NAME              # the name of the selective build operator library to be generated
+  OPS_SCHEMA_YAML       # path to a yaml file containing operators to be selected
+  ROOT_OPS              # comma separated operator names to be selected
+  INCLUDE_ALL_OPS       # boolean flag to include all operators
+  OPS_FROM_MODEL        # path to a pte file of model to select operators from
+  DTYPE_SELECTIVE_BUILD # boolean flag to enable dtye selection
 )
 ```
 
+The macro makes a call to gen_oplist.py, which requires a [distinct selection](https://github.com/BujSet/executorch/blob/main/codegen/tools/gen_oplist.py#L222-L228) of API choice. `OPS_SCHEMA_YAML`, `ROOT_OPS`, `INCLUDE_ALL_OPS`, and `OPS_FROM_MODEL` are mutually exclusive options, and should not be used in conjunction. 
 
 ### Select all ops
 
@@ -62,31 +65,64 @@ Context: each kernel library is designed to have a yaml file associated with it.
 
 This API lets users pass in a list of operator names. Note that this API can be combined with the API above and we will create a allowlist from the union of both API inputs.
 
+### Select ops from model
+
+This API lets users pass in a pte file of an exported model. When used, the pte file will be parsed to generate a yaml file that enumerates the operators and dtypes used in the model. 
+
+### Dtype Selective Build
+
+Beyond pruning the binary to remove unused operators, the binary size can further reduced by removing unused dtypes. For example, if your model only uses floats for the `add` operator, then including variants of the `add` operators for `doubles` and `ints` is unnecessary. The flag `DTYPE_SELECTIVE_BUILD` can be set to `ON` to support this additional optimization. Currently, dtype selective build is only supported with the model API described above. Once enabled, a header file that specifies only the operators and dtypes used by the model is created and linked against a rebuild of the `portable_kernels` lib. This feature is only supported for the portable kernels library; it's not supported for optimized, quantized or custom kernel libraries.
 
 ## Example Walkthrough
 
-In CMakeLists.txt we have the following logic:
-```cmake
-set(_kernel_lib)
-if(SELECT_ALL_OPS)
-  gen_selected_ops("" "" "${SELECT_ALL_OPS}")
-elseif(SELECT_OPS_LIST)
-  gen_selected_ops("" "${SELECT_OPS_LIST}" "")
-elseif(SELECT_OPS_YAML)
- set(_custom_ops_yaml ${EXECUTORCH_ROOT}/examples/portable/custom_ops/custom_ops.yaml)
-  gen_selected_ops("${_custom_ops_yaml}" "" "")
-endif()
-```
-Then when calling CMake, we can do:
+In [CMakeLists.txt](https://github.com/BujSet/executorch/blob/main/examples/selective_build/CMakeLists.txt#L48-L72), we have the following cmake config options:
+
+1. `EXECUTORCH_SELECT_OPS_YAML`
+2. `EXECUTORCH_SELECT_OPS_LIST`
+3. `EXECUTORCH_SELECT_ALL_OPS`
+4. `EXECUTORCH_SELECT_OPS_FROM_MODEL`
+5. `EXECUTORCH_DTYPE_SELECTIVE_BUILD`
+
+These options allow a user to tailor the cmake build process to utilize the different APIs, and results in different invocations on the `gen_selected_ops` [function](https://github.com/BujSet/executorch/blob/main/examples/selective_build/CMakeLists.txt#L110-L123). The following table describes some examples of how the invocation changes when these configs are set:
+
+| Example cmake Call | Resultant `gen_selected_ops` Invocation |
+| :----: | :---:| 
+|<code><br>  cmake -D… -DSELECT_OPS_LIST="aten::add.out,aten::mm.out" <br></code> | <code><br>  gen_selected_ops("" "${SELECT_OPS_LIST}" "" "" "") <br></code> |
+|<code><br> cmake -D… -DSELECT_OPS_YAML=ON <br></code> | <code><br>  set(_custom_ops_yaml ${EXECUTORCH_ROOT}/examples/portable/custom_ops/custom_ops.yaml) <br> gen_selected_ops("${_custom_ops_yaml}" "" "") <br></code> |
+|<code><br> cmake -D… -DEXECUTORCH_SELECT_OPS_FROM_MODEL="model.pte.out" <br></code> | <code><br> gen_selected_ops("" "" "" "${_model_path}" "") <br></code> |
+|<code><br> cmake -D… -DEXECUTORCH_SELECT_OPS_FROM_MODEL="model.pte.out" -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON<br></code> | <code><br> gen_selected_ops("" "" "" "${_model_path}" "ON") <br></code> |
+
+
+## Manual Kernel Registration with `--lib-name`
+ExecuTorch now supports generating library-specific kernel registration APIs using the `--lib-name` option along with `--manual-registration` during codegen. This allows applications to avoid using static initialization or linker flags like `-force_load` when linking in kernel libraries.
+
+## Motivation
+In environments like Xcode, using static libraries requires developers to manually specify `-force_load` flags to ensure kernel registration code is executed. This is inconvenient and error-prone.
+
+By passing a library name to the codegen script, developers can generate explicit registration functions and headers, which they can call directly in their application.
+
+## How to Use
+Run the codegen script like this:
 
 ```
-cmake -D… -DSELECT_OPS_LIST="aten::add.out,aten::mm.out”
+python -m codegen.gen \
+  --functions-yaml-path=path/to/functions.yaml \
+  --manual-registration \
+  --lib-name=custom
 ```
+This will generate:
 
-Or
+`register_custom_kernels.cpp` defines `register_custom_kernels()` with only the kernels selected and `register_custom_kernels.h` declares the function for inclusion in your application
+
+Then in your application, call:
 
 ```
-cmake -D… -DSELECT_OPS_YAML=ON
+#include "register_custom_kernels.h"
+
+register_custom_kernels(); // Registers only the "custom" kernels
 ```
 
-To select from either an operator name list or a schema yaml from kernel library.
+This avoids relying on static initialization and enables you to register only the kernels you want.
+
+### Compatibility
+If `--lib-name` is not passed, the default behavior remains unchanged, the codegen script will generate a general `RegisterKernels.cpp` and `register_all_kernels()` function.
@@ -142,6 +142,10 @@ function(generate_bindings_for_kernels)
     set(_gen_command "${_gen_command}" --add-exception-boundary)
   endif()
 
+  if(GEN_LIB_NAME)
+    list(APPEND _gen_command --lib-name=${GEN_LIB_NAME})
+  endif()
+
   set(_gen_command_sources
       ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp
       ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h