From 8ebb8eb1fa630515e099db9fa59fc5ff11a7a308 Mon Sep 17 00:00:00 2001 From: Tanish Jadhav <91683678+Tanish2101@users.noreply.github.com> Date: Thu, 3 Jul 2025 13:36:09 +0000 Subject: [PATCH 1/4] Added support for --lib-name in manual kernel registration --- codegen/gen.py | 15 ++++++-- codegen/templates/RegisterKernels.cpp | 4 +-- codegen/templates/RegisterKernels.h | 2 +- docs/source/kernel-library-selective-build.md | 34 +++++++++++++++++++ tools/cmake/Codegen.cmake | 4 +++ 5 files changed, 54 insertions(+), 5 deletions(-) diff --git a/codegen/gen.py b/codegen/gen.py index 0dc1a167712..d43aafd0431 100644 --- a/codegen/gen.py +++ b/codegen/gen.py @@ -323,6 +323,7 @@ def gen_unboxing( use_aten_lib: bool, kernel_index: ETKernelIndex, manual_registration: bool, + lib_name: Optional[str] = None, add_exception_boundary: bool = False, ) -> None: # Iterable type for write_sharded is a Tuple of (native_function, (kernel_key, metadata)) @@ -339,7 +340,9 @@ def key_func( header = ["Functions.h" if use_aten_lib else "NativeFunctions.h"] filename = ( - "RegisterKernels.cpp" + f"register_{lib_name}_kernels.cpp" + if manual_registration and lib_name + else "RegisterKernels.cpp" if manual_registration else "RegisterCodegenUnboxedKernels.cpp" ) @@ -356,9 +359,10 @@ def key_func( "fn_header": ( header if unbox_kernel_entry == items[0] else [] ), # Only write header once + "lib_name": lib_name or "all", }, num_shards=1, - sharded_keys={"unboxed_kernels", "fn_header"}, + sharded_keys={"unboxed_kernels", "fn_header", "lib_name"}, ) @@ -953,6 +957,12 @@ def main() -> None: help="a boolean flag to indicate whether we want to manually call" "register_kernels() or rely on static init. ", ) + parser.add_argument( + "--lib-name", + type=str, + default=None, + help="Optional library name used to customize the generated register__kernels() function and file names.", + ) parser.add_argument( "--generate", type=str, @@ -1015,6 +1025,7 @@ def main() -> None: kernel_index=kernel_index, manual_registration=options.manual_registration, add_exception_boundary=options.add_exception_boundary, + lib_name=options.lib_name, ) if custom_ops_native_functions: gen_custom_ops( diff --git a/codegen/templates/RegisterKernels.cpp b/codegen/templates/RegisterKernels.cpp index 91eac200222..484debaccf3 100644 --- a/codegen/templates/RegisterKernels.cpp +++ b/codegen/templates/RegisterKernels.cpp @@ -15,14 +15,14 @@ namespace torch { namespace executor { -Error register_all_kernels() { +Error register_${lib_name}_kernels() { Kernel kernels_to_register[] = { ${unboxed_kernels} // Generated kernels }; Error success_with_kernel_reg = ::executorch::runtime::register_kernels({kernels_to_register}); if (success_with_kernel_reg != Error::Ok) { - ET_LOG(Error, "Failed register all kernels"); + ET_LOG(Error, "Failed to register ${lib_name} kernels"); return success_with_kernel_reg; } return Error::Ok; diff --git a/codegen/templates/RegisterKernels.h b/codegen/templates/RegisterKernels.h index 3c7ecff50b5..36b300db9c6 100644 --- a/codegen/templates/RegisterKernels.h +++ b/codegen/templates/RegisterKernels.h @@ -16,7 +16,7 @@ namespace torch { namespace executor { -Error register_all_kernels(); +Error register_${lib_name}_kernels(); } // namespace executor } // namespace torch diff --git a/docs/source/kernel-library-selective-build.md b/docs/source/kernel-library-selective-build.md index f9a991767a3..cb7a5a16fc8 100644 --- a/docs/source/kernel-library-selective-build.md +++ b/docs/source/kernel-library-selective-build.md @@ -90,3 +90,37 @@ cmake -D… -DSELECT_OPS_YAML=ON ``` To select from either an operator name list or a schema yaml from kernel library. + +## Manual Kernel Registration with '--lib-name' +ExecuTorch now supports generating library-specific kernel registration APIs using the '--lib-name' option along with '--manual-registration' during codegen. This allows applications to avoid using static initialization or linker flags like '-force_load' when linking in kernel libraries. + +## Motivation +In environments like Xcode, using static libraries requires developers to manually specify '-force_load' flags to ensure kernel registration code is executed. This is inconvenient and error-prone. + +By passing a library name to the codegen script, developers can generate explicit registration functions and headers, which they can call directly in their application. + +## How to Use +Run the codegen script like this: + +``` +python -m codegen.gen \ + --functions-yaml-path=path/to/functions.yaml \ + --manual-registration \ + --lib-name=custom +``` +This will generate: + +'register_custom_kernels.cpp' defines 'register_custom_kernels()' with only the kernels selected and 'register_custom_kernels.h' declares the function for inclusion in your application + +Then in your application, call: + +``` +#include "register_custom_kernels.h" + +register_custom_kernels(); // Registers only the "custom" kernels +``` + +This avoids relying on static initialization and enables you to register only the kernels you want. + +### Compatibility +If '--lib-name' is not passed, the default behavior remains unchanged, the codegen script will generate a general 'RegisterKernels.cpp' and 'register_all_kernels()' function. diff --git a/tools/cmake/Codegen.cmake b/tools/cmake/Codegen.cmake index 93331c7ed89..f3e592d8297 100644 --- a/tools/cmake/Codegen.cmake +++ b/tools/cmake/Codegen.cmake @@ -142,6 +142,10 @@ function(generate_bindings_for_kernels) set(_gen_command "${_gen_command}" --add-exception-boundary) endif() + if(GEN_LIB_NAME) + list(APPEND _gen_command --lib-name=${GEN_LIB_NAME}) + endif() + set(_gen_command_sources ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h From ab3400d358a289f53d3c992353fce23db7683437 Mon Sep 17 00:00:00 2001 From: Tanish Jadhav <91683678+Tanish2101@users.noreply.github.com> Date: Thu, 3 Jul 2025 14:36:38 +0000 Subject: [PATCH 2/4] Fixed the documentation conflict in kernel-library-selective-build.md --- docs/source/kernel-library-selective-build.md | 64 ++++++++++--------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/docs/source/kernel-library-selective-build.md b/docs/source/kernel-library-selective-build.md index cb7a5a16fc8..d04190a4f57 100644 --- a/docs/source/kernel-library-selective-build.md +++ b/docs/source/kernel-library-selective-build.md @@ -36,17 +36,20 @@ The basic flow looks like this: ## APIs -We expose a CMake macro `[gen_selected_ops](https://github.com/pytorch/executorch/blob/main/tools/cmake/Codegen.cmake#L12)`, to allow users specifying op info: +We expose a CMake macro [gen_selected_ops](https://github.com/pytorch/executorch/blob/main/tools/cmake/Codegen.cmake#L12), to allow users specifying op info: ``` gen_selected_ops( - LIB_NAME # the name of the selective build operator library to be generated - OPS_SCHEMA_YAML # path to a yaml file containing operators to be selected - ROOT_OPS # comma separated operator names to be selected - INCLUDE_ALL_OPS # boolean flag to include all operators + LIB_NAME # the name of the selective build operator library to be generated + OPS_SCHEMA_YAML # path to a yaml file containing operators to be selected + ROOT_OPS # comma separated operator names to be selected + INCLUDE_ALL_OPS # boolean flag to include all operators + OPS_FROM_MODEL # path to a pte file of model to select operators from + DTYPE_SELECTIVE_BUILD # boolean flag to enable dtye selection ) ``` +The macro makes a call to gen_oplist.py, which requires a [distinct selection](https://github.com/BujSet/executorch/blob/main/codegen/tools/gen_oplist.py#L222-L228) of API choice. `OPS_SCHEMA_YAML`, `ROOT_OPS`, `INCLUDE_ALL_OPS`, and `OPS_FROM_MODEL` are mutually exclusive options, and should not be used in conjunction. ### Select all ops @@ -62,40 +65,39 @@ Context: each kernel library is designed to have a yaml file associated with it. This API lets users pass in a list of operator names. Note that this API can be combined with the API above and we will create a allowlist from the union of both API inputs. +### Select ops from model + +This API lets users pass in a pte file of an exported model. When used, the pte file will be parsed to generate a yaml file that enumerates the operators and dtypes used in the model. + +### Dtype Selective Build + +Beyond pruning the binary to remove unused operators, the binary size can further reduced by removing unused dtypes. For example, if your model only uses floats for the `add` operator, then including variants of the `add` operators for `doubles` and `ints` is unnecessary. The flag `DTYPE_SELECTIVE_BUILD` can be set to `ON` to support this additional optimization. Currently, dtype selective build is only supported with the model API described above. Once enabled, a header file that specifies only the operators and dtypes used by the model is created and linked against a rebuild of the `portable_kernels` lib. This feature is only supported for the portable kernels library; it's not supported for optimized, quantized or custom kernel libraries. ## Example Walkthrough -In CMakeLists.txt we have the following logic: -```cmake -set(_kernel_lib) -if(SELECT_ALL_OPS) - gen_selected_ops("" "" "${SELECT_ALL_OPS}") -elseif(SELECT_OPS_LIST) - gen_selected_ops("" "${SELECT_OPS_LIST}" "") -elseif(SELECT_OPS_YAML) - set(_custom_ops_yaml ${EXECUTORCH_ROOT}/examples/portable/custom_ops/custom_ops.yaml) - gen_selected_ops("${_custom_ops_yaml}" "" "") -endif() -``` -Then when calling CMake, we can do: +In [CMakeLists.txt](https://github.com/BujSet/executorch/blob/main/examples/selective_build/CMakeLists.txt#L48-L72), we have the following cmake config options: -``` -cmake -D… -DSELECT_OPS_LIST="aten::add.out,aten::mm.out” -``` +1. `EXECUTORCH_SELECT_OPS_YAML` +2. `EXECUTORCH_SELECT_OPS_LIST` +3. `EXECUTORCH_SELECT_ALL_OPS` +4. `EXECUTORCH_SELECT_OPS_FROM_MODEL` +5. `EXECUTORCH_DTYPE_SELECTIVE_BUILD` -Or +These options allow a user to tailor the cmake build process to utilize the different APIs, and results in different invocations on the `gen_selected_ops` [function](https://github.com/BujSet/executorch/blob/main/examples/selective_build/CMakeLists.txt#L110-L123). The following table describes some examples of how the invocation changes when these configs are set: -``` -cmake -D… -DSELECT_OPS_YAML=ON -``` +| Example cmake Call | Resultant `gen_selected_ops` Invocation | +| :----: | :---:| +|
cmake -D… -DSELECT_OPS_LIST="aten::add.out,aten::mm.out"
|
gen_selected_ops("" "${SELECT_OPS_LIST}" "" "" "")
| +|
cmake -D… -DSELECT_OPS_YAML=ON
|
set(_custom_ops_yaml ${EXECUTORCH_ROOT}/examples/portable/custom_ops/custom_ops.yaml)
gen_selected_ops("${_custom_ops_yaml}" "" "")
| +|
cmake -D… -DEXECUTORCH_SELECT_OPS_FROM_MODEL="model.pte.out"
|
gen_selected_ops("" "" "" "${_model_path}" "")
| +|
cmake -D… -DEXECUTORCH_SELECT_OPS_FROM_MODEL="model.pte.out" -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON
|
gen_selected_ops("" "" "" "${_model_path}" "ON")
| -To select from either an operator name list or a schema yaml from kernel library. -## Manual Kernel Registration with '--lib-name' -ExecuTorch now supports generating library-specific kernel registration APIs using the '--lib-name' option along with '--manual-registration' during codegen. This allows applications to avoid using static initialization or linker flags like '-force_load' when linking in kernel libraries. +## Manual Kernel Registration with `--lib-name` +ExecuTorch now supports generating library-specific kernel registration APIs using the `--lib-name` option along with `--manual-registration` during codegen. This allows applications to avoid using static initialization or linker flags like `-force_load` when linking in kernel libraries. ## Motivation -In environments like Xcode, using static libraries requires developers to manually specify '-force_load' flags to ensure kernel registration code is executed. This is inconvenient and error-prone. +In environments like Xcode, using static libraries requires developers to manually specify `-force_load` flags to ensure kernel registration code is executed. This is inconvenient and error-prone. By passing a library name to the codegen script, developers can generate explicit registration functions and headers, which they can call directly in their application. @@ -110,7 +112,7 @@ python -m codegen.gen \ ``` This will generate: -'register_custom_kernels.cpp' defines 'register_custom_kernels()' with only the kernels selected and 'register_custom_kernels.h' declares the function for inclusion in your application +`register_custom_kernels.cpp` defines `register_custom_kernels()` with only the kernels selected and `register_custom_kernels.h` declares the function for inclusion in your application Then in your application, call: @@ -123,4 +125,4 @@ register_custom_kernels(); // Registers only the "custom" kernels This avoids relying on static initialization and enables you to register only the kernels you want. ### Compatibility -If '--lib-name' is not passed, the default behavior remains unchanged, the codegen script will generate a general 'RegisterKernels.cpp' and 'register_all_kernels()' function. +If `--lib-name` is not passed, the default behavior remains unchanged, the codegen script will generate a general `RegisterKernels.cpp` and `register_all_kernels()` function. \ No newline at end of file From 4e0bac5d10e3198875ca09168a156d688cd60801 Mon Sep 17 00:00:00 2001 From: Tanish Jadhav <91683678+Tanish2101@users.noreply.github.com> Date: Wed, 9 Jul 2025 14:54:42 +0000 Subject: [PATCH 3/4] Final changes done based on review comments --- codegen/gen.py | 12 +++---- codegen/templates/RegisterKernels.cpp | 20 ++++++++++-- codegen/templates/RegisterKernels.h | 6 +++- codegen/test/test_executorch_gen.py | 31 +++++++++++++++++++ docs/source/kernel-library-selective-build.md | 8 ++--- tools/cmake/Codegen.cmake | 12 +++++-- 6 files changed, 72 insertions(+), 17 deletions(-) diff --git a/codegen/gen.py b/codegen/gen.py index d43aafd0431..b94d1b28e6b 100644 --- a/codegen/gen.py +++ b/codegen/gen.py @@ -340,9 +340,7 @@ def key_func( header = ["Functions.h" if use_aten_lib else "NativeFunctions.h"] filename = ( - f"register_{lib_name}_kernels.cpp" - if manual_registration and lib_name - else "RegisterKernels.cpp" + "RegisterKernels.cpp" if manual_registration else "RegisterCodegenUnboxedKernels.cpp" ) @@ -359,10 +357,11 @@ def key_func( "fn_header": ( header if unbox_kernel_entry == items[0] else [] ), # Only write header once - "lib_name": lib_name or "all", + "lib_name": lib_name or "", + "use_lib_name_in_register": bool(lib_name), }, num_shards=1, - sharded_keys={"unboxed_kernels", "fn_header", "lib_name"}, + sharded_keys={"unboxed_kernels", "fn_header", "lib_name", "use_lib_name_in_register"}, ) @@ -1015,7 +1014,8 @@ def main() -> None: cpu_fm=cpu_fm, use_aten_lib=options.use_aten_lib, ) - + if options.lib_name and not options.manual_registration: + raise ValueError("--lib-name can only be used with --manual-registration") if "sources" in options.generate: gen_unboxing( native_functions=native_functions, diff --git a/codegen/templates/RegisterKernels.cpp b/codegen/templates/RegisterKernels.cpp index 484debaccf3..8e2869d3d29 100644 --- a/codegen/templates/RegisterKernels.cpp +++ b/codegen/templates/RegisterKernels.cpp @@ -15,16 +15,30 @@ namespace torch { namespace executor { -Error register_${lib_name}_kernels() { +#if USE_LIB_NAME_IN_REGISTER +Error register_kernels_${lib_name}() { +#else +Error register_all_kernels() { +#endif + Kernel kernels_to_register[] = { ${unboxed_kernels} // Generated kernels }; Error success_with_kernel_reg = ::executorch::runtime::register_kernels({kernels_to_register}); if (success_with_kernel_reg != Error::Ok) { - ET_LOG(Error, "Failed to register ${lib_name} kernels"); + #if USE_LIB_NAME_IN_REGISTER + ET_LOG(Error, "Failed to register %zu kernels for %s (from %s)", + sizeof(kernels_to_register) / sizeof(Kernel), + "${lib_name}", + __FILE__); + #else + ET_LOG(Error, "Failed to register %zu kernels (from %s)", + sizeof(kernels_to_register) / sizeof(Kernel), + __FILE__); + #endif return success_with_kernel_reg; - } +} return Error::Ok; } diff --git a/codegen/templates/RegisterKernels.h b/codegen/templates/RegisterKernels.h index 36b300db9c6..26296116add 100644 --- a/codegen/templates/RegisterKernels.h +++ b/codegen/templates/RegisterKernels.h @@ -16,7 +16,11 @@ namespace torch { namespace executor { -Error register_${lib_name}_kernels(); +#if USE_LIB_NAME_IN_REGISTER +Error register_kernels_${lib_name}(); +#else +Error register_all_kernels(); +#endif } // namespace executor } // namespace torch diff --git a/codegen/test/test_executorch_gen.py b/codegen/test/test_executorch_gen.py index 30c82254de7..c76856ab8b6 100644 --- a/codegen/test/test_executorch_gen.py +++ b/codegen/test/test_executorch_gen.py @@ -9,6 +9,9 @@ import os import tempfile import unittest +import shutil +import subprocess +import sys import yaml from executorch.codegen.gen import ( @@ -693,3 +696,31 @@ def test_codegen_unboxed_default_kernel_key_selected(self) -> None: ) self.assertEqual(expected_str, result) + +class TestGenMainArgumentChecks(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.mkdtemp() + self.dummy_yaml = os.path.join(self.temp_dir, "dummy.yaml") + with open(self.dummy_yaml, "w") as f: + f.write("- tag: dummy\n") + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_lib_name_without_manual_registration_raises(self): + script_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../gen.py")) + result = subprocess.run( + [ + sys.executable, + script_path, + "--lib-name", "foo", + "--tags-path", self.dummy_yaml, + "--aten-yaml-path", self.dummy_yaml, + "--functions-yaml-path", self.dummy_yaml, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + self.assertNotEqual(result.returncode, 0) + self.assertIn("--lib-name can only be used with --manual-registration", result.stderr) diff --git a/docs/source/kernel-library-selective-build.md b/docs/source/kernel-library-selective-build.md index d04190a4f57..39d41a3bf5e 100644 --- a/docs/source/kernel-library-selective-build.md +++ b/docs/source/kernel-library-selective-build.md @@ -45,7 +45,7 @@ gen_selected_ops( ROOT_OPS # comma separated operator names to be selected INCLUDE_ALL_OPS # boolean flag to include all operators OPS_FROM_MODEL # path to a pte file of model to select operators from - DTYPE_SELECTIVE_BUILD # boolean flag to enable dtye selection + DTYPE_SELECTIVE_BUILD # boolean flag to enable dtype selection ) ``` @@ -112,14 +112,14 @@ python -m codegen.gen \ ``` This will generate: -`register_custom_kernels.cpp` defines `register_custom_kernels()` with only the kernels selected and `register_custom_kernels.h` declares the function for inclusion in your application +`register_kernels_custom.cpp` defines `register_kernels_custom()` with only the kernels selected and `register_kernels_custom.h` declares the function for inclusion in your application Then in your application, call: ``` -#include "register_custom_kernels.h" +#include "register_kernels_custom.h" -register_custom_kernels(); // Registers only the "custom" kernels +register_kernels_custom(); // Registers only the "custom" kernels ``` This avoids relying on static initialization and enables you to register only the kernels you want. diff --git a/tools/cmake/Codegen.cmake b/tools/cmake/Codegen.cmake index f3e592d8297..18861dae7ff 100644 --- a/tools/cmake/Codegen.cmake +++ b/tools/cmake/Codegen.cmake @@ -92,7 +92,7 @@ endfunction() # Invoked as generate_bindings_for_kernels( LIB_NAME lib_name FUNCTIONS_YAML # functions_yaml CUSTOM_OPS_YAML custom_ops_yaml ) function(generate_bindings_for_kernels) - set(options ADD_EXCEPTION_BOUNDARY) + set(options ADD_EXCEPTION_BOUNDARY MANUAL_REGISTRATION) set(arg_names LIB_NAME FUNCTIONS_YAML CUSTOM_OPS_YAML DTYPE_SELECTIVE_BUILD) cmake_parse_arguments(GEN "${options}" "${arg_names}" "" ${ARGN}) @@ -102,11 +102,17 @@ function(generate_bindings_for_kernels) message(STATUS " CUSTOM_OPS_YAML: ${GEN_CUSTOM_OPS_YAML}") message(STATUS " ADD_EXCEPTION_BOUNDARY: ${GEN_ADD_EXCEPTION_BOUNDARY}") message(STATUS " DTYPE_SELECTIVE_BUILD: ${GEN_DTYPE_SELECTIVE_BUILD}") + message(STATUS " MANUAL_REGISTRATION: ${GEN_MANUAL_REGISTRATION}") # Command to generate selected_operators.yaml from custom_ops.yaml. file(GLOB_RECURSE _codegen_templates "${EXECUTORCH_ROOT}/codegen/templates/*") - set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}) + if(GEN_LIB_NAME) + set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}) + else() + set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/codegen_output) + endif() + # By default selective build output is selected_operators.yaml set(_oplist_yaml ${_out_dir}/selected_operators.yaml) @@ -142,7 +148,7 @@ function(generate_bindings_for_kernels) set(_gen_command "${_gen_command}" --add-exception-boundary) endif() - if(GEN_LIB_NAME) + if(GEN_LIB_NAME AND GEN_MANUAL_REGISTRATION) list(APPEND _gen_command --lib-name=${GEN_LIB_NAME}) endif() From 2635d1e3ade93e53a09a7c03babfd74157a3538c Mon Sep 17 00:00:00 2001 From: Tanish Jadhav <91683678+Tanish2101@users.noreply.github.com> Date: Sat, 12 Jul 2025 12:27:57 +0530 Subject: [PATCH 4/4] Update kernel-library-selective-build.md --- docs/source/kernel-library-selective-build.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/kernel-library-selective-build.md b/docs/source/kernel-library-selective-build.md index 39d41a3bf5e..4a315606a47 100644 --- a/docs/source/kernel-library-selective-build.md +++ b/docs/source/kernel-library-selective-build.md @@ -125,4 +125,4 @@ register_kernels_custom(); // Registers only the "custom" kernels This avoids relying on static initialization and enables you to register only the kernels you want. ### Compatibility -If `--lib-name` is not passed, the default behavior remains unchanged, the codegen script will generate a general `RegisterKernels.cpp` and `register_all_kernels()` function. \ No newline at end of file +If `--lib-name` is not passed, the default behavior remains unchanged, the codegen script will generate a general `RegisterKernels.cpp` and `register_all_kernels()` function.