Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions codegen/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ def gen_unboxing(
use_aten_lib: bool,
kernel_index: ETKernelIndex,
manual_registration: bool,
lib_name: Optional[str] = None,
add_exception_boundary: bool = False,
) -> None:
# Iterable type for write_sharded is a Tuple of (native_function, (kernel_key, metadata))
Expand Down Expand Up @@ -356,9 +357,11 @@ def key_func(
"fn_header": (
header if unbox_kernel_entry == items[0] else []
), # Only write header once
"lib_name": lib_name or "",
"use_lib_name_in_register": bool(lib_name),
},
num_shards=1,
sharded_keys={"unboxed_kernels", "fn_header"},
sharded_keys={"unboxed_kernels", "fn_header", "lib_name", "use_lib_name_in_register"},
)


Expand Down Expand Up @@ -953,6 +956,12 @@ def main() -> None:
help="a boolean flag to indicate whether we want to manually call"
"register_kernels() or rely on static init. ",
)
parser.add_argument(
"--lib-name",
type=str,
default=None,
help="Optional library name used to customize the generated register_<lib>_kernels() function and file names.",
)
parser.add_argument(
"--generate",
type=str,
Expand Down Expand Up @@ -1005,7 +1014,8 @@ def main() -> None:
cpu_fm=cpu_fm,
use_aten_lib=options.use_aten_lib,
)

if options.lib_name and not options.manual_registration:
raise ValueError("--lib-name can only be used with --manual-registration")
if "sources" in options.generate:
gen_unboxing(
native_functions=native_functions,
Expand All @@ -1015,6 +1025,7 @@ def main() -> None:
kernel_index=kernel_index,
manual_registration=options.manual_registration,
add_exception_boundary=options.add_exception_boundary,
lib_name=options.lib_name,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's error out, if lib_name is specified but manual_registration is not enabled.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

)
if custom_ops_native_functions:
gen_custom_ops(
Expand Down
18 changes: 16 additions & 2 deletions codegen/templates/RegisterKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,30 @@
namespace torch {
namespace executor {

#if USE_LIB_NAME_IN_REGISTER
Error register_kernels_${lib_name}() {
#else
Error register_all_kernels() {
#endif

Kernel kernels_to_register[] = {
${unboxed_kernels} // Generated kernels
};
Error success_with_kernel_reg =
::executorch::runtime::register_kernels({kernels_to_register});
if (success_with_kernel_reg != Error::Ok) {
ET_LOG(Error, "Failed register all kernels");
#if USE_LIB_NAME_IN_REGISTER
ET_LOG(Error, "Failed to register %zu kernels for %s (from %s)",
sizeof(kernels_to_register) / sizeof(Kernel),
"${lib_name}",
__FILE__);
#else
ET_LOG(Error, "Failed to register %zu kernels (from %s)",
sizeof(kernels_to_register) / sizeof(Kernel),
__FILE__);
#endif
return success_with_kernel_reg;
}
}
return Error::Ok;
}

Expand Down
4 changes: 4 additions & 0 deletions codegen/templates/RegisterKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
namespace torch {
namespace executor {

#if USE_LIB_NAME_IN_REGISTER
Error register_kernels_${lib_name}();
#else
Error register_all_kernels();
#endif

} // namespace executor
} // namespace torch
31 changes: 31 additions & 0 deletions codegen/test/test_executorch_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import os
import tempfile
import unittest
import shutil
import subprocess
import sys

import yaml
from executorch.codegen.gen import (
Expand Down Expand Up @@ -693,3 +696,31 @@ def test_codegen_unboxed_default_kernel_key_selected(self) -> None:
)

self.assertEqual(expected_str, result)

class TestGenMainArgumentChecks(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.mkdtemp()
self.dummy_yaml = os.path.join(self.temp_dir, "dummy.yaml")
with open(self.dummy_yaml, "w") as f:
f.write("- tag: dummy\n")

def tearDown(self):
shutil.rmtree(self.temp_dir)

def test_lib_name_without_manual_registration_raises(self):
script_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../gen.py"))
result = subprocess.run(
[
sys.executable,
script_path,
"--lib-name", "foo",
"--tags-path", self.dummy_yaml,
"--aten-yaml-path", self.dummy_yaml,
"--functions-yaml-path", self.dummy_yaml,
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
self.assertNotEqual(result.returncode, 0)
self.assertIn("--lib-name can only be used with --manual-registration", result.stderr)
61 changes: 60 additions & 1 deletion docs/source/kernel-library-selective-build.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ gen_selected_ops(
ROOT_OPS # comma separated operator names to be selected
INCLUDE_ALL_OPS # boolean flag to include all operators
OPS_FROM_MODEL # path to a pte file of model to select operators from
DTYPE_SELECTIVE_BUILD # boolean flag to enable dtye selection
DTYPE_SELECTIVE_BUILD # boolean flag to enable dtype selection
)
```

Expand Down Expand Up @@ -73,6 +73,65 @@ This API lets users pass in a pte file of an exported model. When used, the pte

Beyond pruning the binary to remove unused operators, the binary size can further reduced by removing unused dtypes. For example, if your model only uses floats for the `add` operator, then including variants of the `add` operators for `doubles` and `ints` is unnecessary. The flag `DTYPE_SELECTIVE_BUILD` can be set to `ON` to support this additional optimization. Currently, dtype selective build is only supported with the model API described above. Once enabled, a header file that specifies only the operators and dtypes used by the model is created and linked against a rebuild of the `portable_kernels` lib. This feature is only supported for the portable kernels library; it's not supported for optimized, quantized or custom kernel libraries.


This API lets users pass in a pte file of an exported model. When used, the pte file will be parsed to generate a yaml file that enumerates the operators and dtypes used in the model.

In [CMakeLists.txt](https://github.com/BujSet/executorch/blob/main/examples/selective_build/CMakeLists.txt#L48-L72), we have the following cmake config options:

1. `EXECUTORCH_SELECT_OPS_YAML`
2. `EXECUTORCH_SELECT_OPS_LIST`
3. `EXECUTORCH_SELECT_ALL_OPS`
4. `EXECUTORCH_SELECT_OPS_FROM_MODEL`
5. `EXECUTORCH_DTYPE_SELECTIVE_BUILD`

These options allow a user to tailor the cmake build process to utilize the different APIs, and results in different invocations on the `gen_selected_ops` [function](https://github.com/BujSet/executorch/blob/main/examples/selective_build/CMakeLists.txt#L110-L123). The following table describes some examples of how the invocation changes when these configs are set:

| Example cmake Call | Resultant `gen_selected_ops` Invocation |
| :----: | :---:|
|<code><br> cmake -D… -DSELECT_OPS_LIST="aten::add.out,aten::mm.out" <br></code> | <code><br> gen_selected_ops("" "${SELECT_OPS_LIST}" "" "" "") <br></code> |
|<code><br> cmake -D… -DSELECT_OPS_YAML=ON <br></code> | <code><br> set(_custom_ops_yaml ${EXECUTORCH_ROOT}/examples/portable/custom_ops/custom_ops.yaml) <br> gen_selected_ops("${_custom_ops_yaml}" "" "") <br></code> |
|<code><br> cmake -D… -DEXECUTORCH_SELECT_OPS_FROM_MODEL="model.pte.out" <br></code> | <code><br> gen_selected_ops("" "" "" "${_model_path}" "") <br></code> |
|<code><br> cmake -D… -DEXECUTORCH_SELECT_OPS_FROM_MODEL="model.pte.out" -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON<br></code> | <code><br> gen_selected_ops("" "" "" "${_model_path}" "ON") <br></code> |


## Manual Kernel Registration with `--lib-name`
ExecuTorch now supports generating library-specific kernel registration APIs using the `--lib-name` option along with `--manual-registration` during codegen. This allows applications to avoid using static initialization or linker flags like `-force_load` when linking in kernel libraries.

## Motivation
In environments like Xcode, using static libraries requires developers to manually specify `-force_load` flags to ensure kernel registration code is executed. This is inconvenient and error-prone.

By passing a library name to the codegen script, developers can generate explicit registration functions and headers, which they can call directly in their application.

## How to Use
Run the codegen script like this:

```
python -m codegen.gen \
--functions-yaml-path=path/to/functions.yaml \
--manual-registration \
--lib-name=custom
```
This will generate:

`register_kernels_custom.cpp` defines `register_kernels_custom()` with only the kernels selected and `register_kernels_custom.h` declares the function for inclusion in your application

Then in your application, call:

```
#include "register_kernels_custom.h"

register_kernels_custom(); // Registers only the "custom" kernels
```

This avoids relying on static initialization and enables you to register only the kernels you want.

### Compatibility
If `--lib-name` is not passed, the default behavior remains unchanged, the codegen script will generate a general `RegisterKernels.cpp` and `register_all_kernels()` function.

### Dtype Selective Build

Beyond pruning the binary to remove unused operators, the binary size can further reduced by removing unused dtypes. For example, if your model only uses floats for the `add` operator, then including variants of the `add` operators for `doubles` and `ints` is unnecessary. The flag `DTYPE_SELECTIVE_BUILD` can be set to `ON` to support this additional optimization. Currently, dtype selective build is only supported with the model API described above. Once enabled, a header file that specifies only the operators and dtypes used by the model is created and linked against a rebuild of the `portable_kernels` lib. This feature is only supported for the portable kernels library; it's not supported for optimized, quantized or custom kernel libraries.

## Example Walkthrough

In [CMakeLists.txt](https://github.com/BujSet/executorch/blob/main/examples/selective_build/CMakeLists.txt#L48-L72), we have the following cmake config options:
Expand Down
14 changes: 12 additions & 2 deletions tools/cmake/Codegen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ endfunction()
# Invoked as generate_bindings_for_kernels( LIB_NAME lib_name FUNCTIONS_YAML
# functions_yaml CUSTOM_OPS_YAML custom_ops_yaml )
function(generate_bindings_for_kernels)
set(options ADD_EXCEPTION_BOUNDARY)
set(options ADD_EXCEPTION_BOUNDARY MANUAL_REGISTRATION)
set(arg_names LIB_NAME FUNCTIONS_YAML CUSTOM_OPS_YAML DTYPE_SELECTIVE_BUILD)
cmake_parse_arguments(GEN "${options}" "${arg_names}" "" ${ARGN})

Expand All @@ -102,11 +102,17 @@ function(generate_bindings_for_kernels)
message(STATUS " CUSTOM_OPS_YAML: ${GEN_CUSTOM_OPS_YAML}")
message(STATUS " ADD_EXCEPTION_BOUNDARY: ${GEN_ADD_EXCEPTION_BOUNDARY}")
message(STATUS " DTYPE_SELECTIVE_BUILD: ${GEN_DTYPE_SELECTIVE_BUILD}")
message(STATUS " MANUAL_REGISTRATION: ${GEN_MANUAL_REGISTRATION}")

# Command to generate selected_operators.yaml from custom_ops.yaml.
file(GLOB_RECURSE _codegen_templates "${EXECUTORCH_ROOT}/codegen/templates/*")

set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME})
if(GEN_LIB_NAME)
set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME})
else()
set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/codegen_output)
endif()

# By default selective build output is selected_operators.yaml
set(_oplist_yaml ${_out_dir}/selected_operators.yaml)

Expand Down Expand Up @@ -142,6 +148,10 @@ function(generate_bindings_for_kernels)
set(_gen_command "${_gen_command}" --add-exception-boundary)
endif()

if(GEN_LIB_NAME AND GEN_MANUAL_REGISTRATION)
list(APPEND _gen_command --lib-name=${GEN_LIB_NAME})
endif()

set(_gen_command_sources
${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp
${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h
Expand Down
Loading