diff --git a/codegen/tools/targets.bzl b/codegen/tools/targets.bzl index 39de8fcb482..6ea43951385 100644 --- a/codegen/tools/targets.bzl +++ b/codegen/tools/targets.bzl @@ -28,6 +28,7 @@ def define_common_targets(is_fbcode = False): main_module = "executorch.codegen.tools.gen_oplist", deps = [ ":gen_oplist_lib", + "//executorch/exir:lib", ], preload_deps = [] if runtime.is_oss else ["//executorch/codegen/tools:selective_build"], # TODO(larryliu0820) :selective_build doesn't build in OSS yet package_style = "inplace", diff --git a/examples/selective_build/CMakeLists.txt b/examples/selective_build/CMakeLists.txt index a37b4362d78..487ecdec982 100644 --- a/examples/selective_build/CMakeLists.txt +++ b/examples/selective_build/CMakeLists.txt @@ -61,6 +61,14 @@ option(EXECUTORCH_SELECT_OPS_LIST "Register a list of ops, separated by comma" option(EXECUTORCH_SELECT_ALL_OPS "Whether to register all ops defined in portable kernel library." OFF ) + +# Option to enable dtype selective build +option(EXECUTORCH_SELECT_OPS_FROM_MODEL "Enable op selection from pte during build." OFF +) + +# Option to enable dtype selective build +option(EXECUTORCH_DTYPE_SELECTIVE_BUILD "Enable dtype selection during build." OFF +) # ------------------------------- OPTIONS END -------------------------------- # @@ -108,16 +116,32 @@ gen_selected_ops( "${EXECUTORCH_SELECT_OPS_LIST}" INCLUDE_ALL_OPS "${EXECUTORCH_SELECT_ALL_OPS}" + OPS_FROM_MODEL + "${EXECUTORCH_SELECT_OPS_FROM_MODEL}" + DTYPE_SELECT + "${EXECUTORCH_DTYPE_SELECTIVE_BUILD}" ) generate_bindings_for_kernels( - LIB_NAME "select_build_lib" FUNCTIONS_YAML - ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml CUSTOM_OPS_YAML + LIB_NAME + "select_build_lib" + FUNCTIONS_YAML + ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml + CUSTOM_OPS_YAML "${_custom_ops_yaml}" + DTYPE_SELECT + "${EXECUTORCH_DTYPE_SELECTIVE_BUILD}" ) gen_operators_lib( - LIB_NAME "select_build_lib" KERNEL_LIBS ${_kernel_lib} DEPS executorch_core + LIB_NAME + "select_build_lib" + KERNEL_LIBS + ${_kernel_lib} + DEPS + executorch_core + DTYPE_SELECT + "${EXECUTORCH_DTYPE_SELECTIVE_BUILD}" ) list(TRANSFORM _executor_runner__srcs PREPEND "${EXECUTORCH_ROOT}/") diff --git a/examples/selective_build/test_selective_build.sh b/examples/selective_build/test_selective_build.sh index 324a4fe27a5..f5a598011a3 100644 --- a/examples/selective_build/test_selective_build.sh +++ b/examples/selective_build/test_selective_build.sh @@ -94,6 +94,7 @@ test_cmake_select_all_ops() { rm -rf ${build_dir} retry cmake -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_SELECT_ALL_OPS=ON \ + -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ -B${build_dir} \ @@ -118,11 +119,9 @@ test_cmake_select_ops_in_list() { # set MAX_KERNEL_NUM=22: 19 primops, add, mul rm -rf ${build_dir} retry cmake -DCMAKE_BUILD_TYPE=Release \ - -DMAX_KERNEL_NUM=22 \ - -DEXECUTORCH_SELECT_OPS_LIST="aten::convolution.out,\ -aten::_native_batch_norm_legit_no_training.out,aten::hardtanh.out,aten::add.out,\ -aten::mean.out,aten::view_copy.out,aten::permute_copy.out,aten::addmm.out,\ -aten,aten::clone.out" \ + -DEXECUTORCH_SELECT_OPS_FROM_MODEL="./mv2.pte" \ + -DEXECUTORCH_OPTIMIZE_SIZE=ON \ + -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ -B${build_dir} \ @@ -146,6 +145,7 @@ test_cmake_select_ops_in_yaml() { rm -rf ${build_dir} retry cmake -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_SELECT_OPS_YAML=ON \ + -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ -B${build_dir} \ @@ -161,6 +161,30 @@ test_cmake_select_ops_in_yaml() { rm "./custom_ops_1.pte" } +test_cmake_select_ops_and_dtype() { + echo "Exporting MobilenetV2" + ${PYTHON_EXECUTABLE} -m examples.portable.scripts.export --model_name="mv2" + local example_dir=examples/selective_build + local build_dir=cmake-out/${example_dir} + retry cmake -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_SELECT_OPS_FROM_MODEL="./mv2.pte" \ + -DEXECUTORCH_OPTIMIZE_SIZE=ON \ + -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ + -B${build_dir} \ + ${example_dir} + + echo "Building ${example_dir}" + cmake --build ${build_dir} -j9 --config Release + + echo 'Running selective build test' + ${build_dir}/selective_build_test --model_path="./mv2.pte" + + echo "Removing mv2.pte" + rm "./mv2.pte" +} + if [[ -z $BUCK ]]; then BUCK=buck2 @@ -174,9 +198,10 @@ fi if [[ $1 == "cmake" ]]; then cmake_install_executorch_lib - test_cmake_select_all_ops +# test_cmake_select_all_ops test_cmake_select_ops_in_list - test_cmake_select_ops_in_yaml +# test_cmake_select_ops_in_yaml +# test_cmake_select_ops_and_dtype elif [[ $1 == "buck2" ]]; then test_buck2_select_all_ops diff --git a/mv2_schema.yaml b/mv2_schema.yaml new file mode 100644 index 00000000000..aa74b94c305 --- /dev/null +++ b/mv2_schema.yaml @@ -0,0 +1,71 @@ +build_features: [] +custom_classes: [] +et_kernel_metadata: + aten::_native_batch_norm_legit_no_training.out: + - v1/6;0,1,2,3|6;0|6;0|6;0|6;0|6;0,1,2,3|6;0|6;0|6;0,1,2,3 + aten::add.out: + - v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3|6;0,1,2,3 + aten::addmm.out: + - v1/6;0|6;0,1|6;0,1|6;0,1|6;0,1 + aten::clone.out: + - v1/6;0,1|6;0,1|6;0,1 + aten::convolution.out: + - v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3|6;0,1,2,3 + aten::hardtanh.out: + - v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3 + aten::mean.out: + - v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3 + aten::permute_copy.out: + - v1/6;0,1|6;0,1|6;0,1 +include_all_non_op_selectives: false +include_all_operators: false +kernel_metadata: {} +operators: + aten::_native_batch_norm_legit_no_training.out: + debug_info: + - mv2.pte + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::add.out: + debug_info: + - mv2.pte + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::addmm.out: + debug_info: + - mv2.pte + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::clone.out: + debug_info: + - mv2.pte + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::convolution.out: + debug_info: + - mv2.pte + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::hardtanh.out: + debug_info: + - mv2.pte + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::mean.out: + debug_info: + - mv2.pte + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::permute_copy.out: + debug_info: + - mv2.pte + include_all_overloads: false + is_root_operator: true + is_used_for_training: true diff --git a/pytest.ini b/pytest.ini index 557a307bdf2..d1a8450b6f4 100644 --- a/pytest.ini +++ b/pytest.ini @@ -58,6 +58,7 @@ addopts = runtime # Tools codegen/test + codegen/tools/test tools/cmake # test TODO: fix these tests # test/end2end/test_end2end.py diff --git a/test/build_size_test.sh b/test/build_size_test.sh index d020ab58c95..2baafcfcfe3 100644 --- a/test/build_size_test.sh +++ b/test/build_size_test.sh @@ -29,6 +29,7 @@ cmake_install_executorch_lib() { -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \ -DEXECUTORCH_OPTIMIZE_SIZE=ON \ + -DEXECUTORCH_SELECTIVE_BUILD_DTYPE=ON \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ ${EXTRA_BUILD_ARGS} \ -Bcmake-out . @@ -44,11 +45,9 @@ test_cmake_size_test() { echo "Build size test" cmake --build cmake-out/test -j9 --config Release - echo 'ExecuTorch with no ops binary size, unstripped:' - ls -al cmake-out/test/size_test - - echo 'ExecuTorch with portable ops binary size, unstripped:' - ls -al cmake-out/test/size_test_all_ops + strip cmake-out/test/size_test + strip cmake-out/test/size_test_all_ops + ls -lah cmake-out/test/ } if [[ -z $PYTHON_EXECUTABLE ]]; then diff --git a/tools/cmake/Codegen.cmake b/tools/cmake/Codegen.cmake index f1dac84de43..c4d412ceb0e 100644 --- a/tools/cmake/Codegen.cmake +++ b/tools/cmake/Codegen.cmake @@ -12,7 +12,7 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) function(gen_selected_ops) - set(arg_names LIB_NAME OPS_SCHEMA_YAML ROOT_OPS INCLUDE_ALL_OPS) + set(arg_names LIB_NAME OPS_SCHEMA_YAML ROOT_OPS INCLUDE_ALL_OPS OPS_FROM_MODEL DTYPE_SELECT) cmake_parse_arguments(GEN "" "" "${arg_names}" ${ARGN}) message(STATUS "Generating operator lib:") @@ -20,10 +20,13 @@ function(gen_selected_ops) message(STATUS " OPS_SCHEMA_YAML: ${GEN_OPS_SCHEMA_YAML}") message(STATUS " ROOT_OPS: ${GEN_ROOT_OPS}") message(STATUS " INCLUDE_ALL_OPS: ${GEN_INCLUDE_ALL_OPS}") + message(STATUS " OPS_FROM_MODEL: ${GEN_OPS_FROM_MODEL}") + message(STATUS " DTYPE_SELECT: ${GEN_DTYPE_SELECT}") set(_oplist_yaml ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}/selected_operators.yaml ) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}) file(GLOB_RECURSE _codegen_tools_srcs "${EXECUTORCH_ROOT}/codegen/tools/*.py") @@ -43,6 +46,9 @@ function(gen_selected_ops) if(GEN_INCLUDE_ALL_OPS) list(APPEND _gen_oplist_command --include_all_operators) endif() + if(GEN_OPS_FROM_MODEL) + list(APPEND _gen_oplist_command --model_file_path="${GEN_OPS_FROM_MODEL}") + endif() message("Command - ${_gen_oplist_command}") add_custom_command( @@ -53,6 +59,23 @@ function(gen_selected_ops) WORKING_DIRECTORY ${EXECUTORCH_ROOT} ) + if(GEN_DTYPE_SELECT) + set(_opvariant_h + ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}/selected_op_variants.h + ) + set(_gen_opvariant_command "${PYTHON_EXECUTABLE}" -m codegen.tools.gen_selected_op_variants + --yaml-file=${_oplist_yaml} + --output-dir=${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}/ + ) + message("Command - ${_gen_opvariant_command}") + add_custom_command( + COMMENT "Generating selected_op_variants.h for ${GEN_LIB_NAME}" + OUTPUT ${_opvariant_h} + COMMAND ${_gen_opvariant_command} + DEPENDS ${_optlist_yaml} ${_codegen_tools_srcs} + WORKING_DIRECTORY ${EXECUTORCH_ROOT} + ) + endif() endfunction() # Codegen for registering kernels. Kernels are defined in functions_yaml and @@ -62,7 +85,7 @@ endfunction() # functions_yaml CUSTOM_OPS_YAML custom_ops_yaml ) function(generate_bindings_for_kernels) set(options ADD_EXCEPTION_BOUNDARY) - set(arg_names LIB_NAME FUNCTIONS_YAML CUSTOM_OPS_YAML) + set(arg_names LIB_NAME FUNCTIONS_YAML CUSTOM_OPS_YAML DTYPE_SELECT) cmake_parse_arguments(GEN "${options}" "${arg_names}" "" ${ARGN}) message(STATUS "Generating kernel bindings:") @@ -70,6 +93,7 @@ function(generate_bindings_for_kernels) message(STATUS " FUNCTIONS_YAML: ${GEN_FUNCTIONS_YAML}") message(STATUS " CUSTOM_OPS_YAML: ${GEN_CUSTOM_OPS_YAML}") message(STATUS " ADD_EXCEPTION_BOUNDARY: ${GEN_ADD_EXCEPTION_BOUNDARY}") + message(STATUS " DTYPE_SELECT: ${GEN_DTYPE_SELECT}") # Command to generate selected_operators.yaml from custom_ops.yaml. file(GLOB_RECURSE _codegen_templates "${EXECUTORCH_ROOT}/codegen/templates/*") @@ -78,6 +102,13 @@ function(generate_bindings_for_kernels) # By default selective build output is selected_operators.yaml set(_oplist_yaml ${_out_dir}/selected_operators.yaml) + # If dtype selective build is enable, force header file to be preserved + if(GEN_DTYPE_SELECT) + set(_opvariant_h ${_out_dir}/selected_op_variants.h) + else() + set(_opvariant_h "") + endif() + # Command to codegen C++ wrappers to register custom ops to both PyTorch and # Executorch runtime. execute_process( @@ -108,6 +139,10 @@ function(generate_bindings_for_kernels) ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h ) + if(GEN_DTYPE_SELECT) + list(APPEND _gen_command_sources ${_out_dir}/selected_op_variants.h) + endif() + if(GEN_FUNCTIONS_YAML) list(APPEND _gen_command --functions-yaml-path=${GEN_FUNCTIONS_YAML}) endif() @@ -122,8 +157,9 @@ function(generate_bindings_for_kernels) COMMENT "Generating code for kernel registration" OUTPUT ${_gen_command_sources} COMMAND ${_gen_command} - DEPENDS ${_oplist_yaml} ${GEN_CUSTOM_OPS_YAML} ${GEN_FUNCTIONS_YAML} - ${_codegen_templates} ${_torchgen_srcs} + DEPENDS ${_oplist_yaml} ${_opvariants_h} ${GEN_CUSTOM_OPS_YAML} + ${GEN_FUNCTIONS_YAML} ${_codegen_templates} + ${_torchgen_srcs} WORKING_DIRECTORY ${EXECUTORCH_ROOT} ) # Make generated file list available in parent scope @@ -165,22 +201,33 @@ endfunction() # Generate a runtime lib for registering operators in Executorch function(gen_operators_lib) - set(multi_arg_names LIB_NAME KERNEL_LIBS DEPS) + set(multi_arg_names LIB_NAME KERNEL_LIBS DEPS DTYPE_SELECT) cmake_parse_arguments(GEN "" "" "${multi_arg_names}" ${ARGN}) message(STATUS "Generating operator lib:") message(STATUS " LIB_NAME: ${GEN_LIB_NAME}") message(STATUS " KERNEL_LIBS: ${GEN_KERNEL_LIBS}") message(STATUS " DEPS: ${GEN_DEPS}") + message(STATUS " DTYPE_SELECT: ${GEN_DTYPE_SELECT}") set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}) add_library(${GEN_LIB_NAME}) - target_sources( - ${GEN_LIB_NAME} - PRIVATE ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp - ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h - ) + if(GEN_DTYPE_SELECT) + target_sources( + ${GEN_LIB_NAME} + PRIVATE ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp + ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h + ${_out_dir}/selected_op_variants.h + ) + else() + target_sources( + ${GEN_LIB_NAME} + PRIVATE ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp + ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h + ) + endif() + target_link_libraries(${GEN_LIB_NAME} PRIVATE ${GEN_DEPS}) if(GEN_KERNEL_LIBS) target_link_libraries(${GEN_LIB_NAME} PUBLIC ${GEN_KERNEL_LIBS}) @@ -188,6 +235,9 @@ function(gen_operators_lib) target_link_options_shared_lib(${GEN_LIB_NAME}) set(_generated_headers ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h) + if(GEN_DTYPE_SELECT) + list(APPEND _generated_headers ${_out_dir}/selected_op_variants.h) + endif() set_target_properties( ${GEN_LIB_NAME} PROPERTIES PUBLIC_HEADER "${_generated_headers}" )