diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh index f868d264f48..798aa627d65 100755 --- a/backends/arm/scripts/build_executorch.sh +++ b/backends/arm/scripts/build_executorch.sh @@ -16,18 +16,17 @@ et_root_dir=$(realpath ${et_root_dir}) toolchain_cmake=${script_dir}/../../../examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake toolchain_cmake=$(realpath ${toolchain_cmake}) - - et_build_root="${et_root_dir}/arm_test" build_type="Release" +build_devtools=false build_with_etdump=false - help() { echo "Usage: $(basename $0) [options]" echo "Options:" echo " --et_build_root= Build output root folder to use, defaults to ${et_build_root}" echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" + echo " --devtools Build Devtools libs" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" exit 0 } @@ -37,6 +36,7 @@ for arg in "$@"; do -h|--help) help ;; --et_build_root=*) et_build_root="${arg#*=}";; --build_type=*) build_type="${arg#*=}";; + --devtools) build_devtools=true ;; --etdump) build_with_etdump=true ;; *) ;; @@ -44,25 +44,25 @@ for arg in "$@"; do done et_build_dir="${et_build_root}/cmake-out" + +# Used for flatcc host excutable if Devtools is used et_build_host_dir=${et_build_root}/cmake-out-host-tools set -x cd "${et_root_dir}" -build_with_etdump_flags="" if [ "$build_with_etdump" = true ] ; then ( set +x ; echo "--------------------------------------------------------------------------------" ; - echo "Build ExecuTorch Libraries host flatcc bin ${build_type} into ${et_build_host_dir} - ${et_build_host_dir}/bin/flatcc" ; + echo "Build ExecuTorch Libraries host flatcc bin ${build_type} into ${et_build_host_dir}/bin/flatcc" ; echo "--------------------------------------------------------------------------------" ) - # Build host flatcc bin # This is a way to work around that the flatcc executable get build for target (e.g. Arm) later # and get replaced. flatcc is a tool used on the host for etdump and BundleIO handling. # The way to solve this is to generate it once for the host, then copy it to ${et_build_host_dir}/bin # and later point that out with -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc later. - mkdir -p ${et_build_host_dir} + cmake \ -DCMAKE_INSTALL_PREFIX=${et_build_host_dir} \ -DCMAKE_BUILD_TYPE=${build_type} \ @@ -79,18 +79,13 @@ if [ "$build_with_etdump" = true ] ; then -B"${et_build_host_dir}" \ "${et_root_dir}" - # Copy host flatcc excutable to it's saved when we build for target (Arm) later + # third-party/flatcc/bin/flatcc gets build already in the in the cmake config step above + # so there is no cmake building step done + + # Copy host flatcc excutable so it's saved when we build for target (Arm) later + et_build_host_dir=$(realpath ${et_build_host_dir}) mkdir -p ${et_build_host_dir}/bin cp third-party/flatcc/bin/flatcc ${et_build_host_dir}/bin - - # Add DevTools flags use in the Target build below - build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON \ - -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ - -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \ - -DFLATCC_ALLOW_WERROR=OFF \ - -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc " - echo "build_with_etdump_flags=$build_with_etdump_flags" fi ( set +x ; @@ -98,6 +93,25 @@ fi echo "Build ExecuTorch target libs ${build_type} into '${et_build_dir}'" ; echo "--------------------------------------------------------------------------------" ) +build_devtools_flags=" -DEXECUTORCH_BUILD_DEVTOOLS=OFF " +if [ "$build_devtools" = true ] ; then + build_devtools_flags=" -DEXECUTORCH_BUILD_DEVTOOLS=ON " +fi + +build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF " +if [ "$build_with_etdump" = true ] ; then + # Add DevTools flags use in the Target build below + build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \ + -DFLATCC_ALLOW_WERROR=OFF \ + -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc " +fi + +echo "Building with Devtools: ${build_devtools_flags} ${build_with_etdump_flags}" + + # Build cmake \ -DCMAKE_INSTALL_PREFIX=${et_build_dir} \ @@ -108,6 +122,7 @@ cmake \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_ENABLE_LOGGING=ON \ + ${build_devtools_flags} \ ${build_with_etdump_flags} \ -DFLATC_EXECUTABLE="$(which flatc)" \ -B"${et_build_dir}" \ diff --git a/backends/arm/scripts/build_executorch_runner.sh b/backends/arm/scripts/build_executorch_runner.sh index afa8f27bdff..3e658928274 100755 --- a/backends/arm/scripts/build_executorch_runner.sh +++ b/backends/arm/scripts/build_executorch_runner.sh @@ -15,6 +15,7 @@ pte_file="" target="ethos-u55-128" build_type="Release" system_config="" +bundleio=false build_with_etdump=false extra_build_flags="" output_folder_set=false @@ -22,6 +23,9 @@ output_folder="." et_build_root="${et_root_dir}/arm_test" ethosu_tools_dir=${et_root_dir}/examples/arm/ethos-u-scratch +build_bundleio_flags=" -DET_BUNDLE_IO=OFF " +build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF " + help() { echo "Usage: $(basename $0) [options]" echo "Options:" @@ -30,6 +34,7 @@ help() { echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" echo " --system_config= System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt." + echo " --bundleio Support both pte and Bundle IO bpte using Devtools BundelIO with Input/RefOutput included" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " echo " --output= Output folder Default: /_.pte" @@ -45,6 +50,7 @@ for arg in "$@"; do --target=*) target="${arg#*=}";; --build_type=*) build_type="${arg#*=}";; --system_config=*) system_config="${arg#*=}";; + --bundleio) bundleio=true ;; --etdump) build_with_etdump=true ;; --extra_build_flags=*) extra_build_flags="${arg#*=}";; --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; @@ -64,9 +70,8 @@ et_build_dir=${et_build_root}/cmake-out et_build_dir=$(realpath ${et_build_dir}) if [ "$output_folder_set" = false ] ; then - pte_folder=$(cd -- "$( dirname -- "${pte_file}" )" &> /dev/null && pwd) - pte_short_name=$(basename -- "${pte_file}" ".pte") - output_folder="$pte_folder/$pte_short_name" + # remove file ending + output_folder=${pte_file%.*} fi if [[ ${system_config} == "" ]] @@ -86,18 +91,21 @@ else target_cpu=cortex-m85 fi echo "--------------------------------------------------------------------------------" -echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} to '${output_folder}/cmake-out'" +echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} ${extra_build_flags} to '${output_folder}/cmake-out'" echo "--------------------------------------------------------------------------------" cd ${et_root_dir}/examples/arm/executor_runner -build_with_etdump_flags="" +if [ "$bundleio" = true ] ; then + build_bundleio_flags=" -DET_BUNDLE_IO=ON " +fi + if [ "$build_with_etdump" = true ] ; then - echo "Building with etdump e.g. -DEXECUTORCH_ENABLE_EVENT_TRACER=ON" build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=ON " fi -mkdir -p "$output_folder" +echo "Building with BundleIO/etdump/extra flags: ${build_bundleio_flags} ${build_with_etdump_flags} ${extra_build_flags}" +mkdir -p "${output_folder}" cmake \ -DCMAKE_BUILD_TYPE=${build_type} \ @@ -105,9 +113,10 @@ cmake \ -DTARGET_CPU=${target_cpu} \ -DET_DIR_PATH:PATH=${et_root_dir} \ -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ - -DET_PTE_FILE_PATH:PATH="${pte_file}" \ + -DET_PTE_FILE_PATH:PATH="${pte_file}" \ -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ -DETHOSU_TARGET_NPU_CONFIG=${target} \ + ${build_bundleio_flags} \ ${build_with_etdump_flags} \ -DPYTHON_EXECUTABLE=$(which python3) \ -DSYSTEM_CONFIG=${system_config} \ diff --git a/backends/arm/scripts/run_fvp.sh b/backends/arm/scripts/run_fvp.sh index 568f07011f2..e0237a9c414 100755 --- a/backends/arm/scripts/run_fvp.sh +++ b/backends/arm/scripts/run_fvp.sh @@ -19,12 +19,14 @@ _setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly ins elf_file="" target="ethos-u55-128" +timeout="240" help() { echo "Usage: $(basename $0) [options]" echo "Options:" echo " --elf= elf file to run" echo " --target= Target to build and run for Default: ${target}" + echo " --timeout= Maximum target runtime, used to detect hanging, might need to be higer on large models Default: ${timeout}" exit 0 } @@ -33,6 +35,7 @@ for arg in "$@"; do -h|--help) help ;; --elf=*) elf_file="${arg#*=}";; --target=*) target="${arg#*=}";; + --timeout=*) timeout="${arg#*=}";; *) ;; esac @@ -63,6 +66,7 @@ num_macs=$(echo ${target} | cut -d - -f 3) echo "--------------------------------------------------------------------------------" echo "Running ${elf_file} for ${target} run with FVP:${fvp_model} num_macs:${num_macs}" +echo "WARNING: Corstone FVP is not cycle accurate and should NOT be used to determine valid runtime" echo "--------------------------------------------------------------------------------" log_file=$(mktemp) @@ -75,7 +79,7 @@ if [[ ${target} == *"ethos-u55"* ]]; then -C mps3_board.uart0.out_file='-' \ -C mps3_board.uart0.shutdown_on_eot=1 \ -a "${elf_file}" \ - --timelimit 220 2>&1 | tee ${log_file} || true # seconds + --timelimit ${timeout} 2>&1 | tee ${log_file} || true # seconds echo "[${BASH_SOURCE[0]}] Simulation complete, $?" elif [[ ${target} == *"ethos-u85"* ]]; then ${fvp_model} \ @@ -86,7 +90,7 @@ elif [[ ${target} == *"ethos-u85"* ]]; then -C mps4_board.uart0.out_file='-' \ -C mps4_board.uart0.shutdown_on_eot=1 \ -a "${elf_file}" \ - --timelimit 220 2>&1 | tee ${log_file} || true # seconds + --timelimit ${timeout} 2>&1 | tee ${log_file} || true # seconds echo "[${BASH_SOURCE[0]}] Simulation complete, $?" else echo "Running ${elf_file} for ${target} is not supported" diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index 6c2784501b0..90b34241f3d 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -92,18 +92,18 @@ test_run_ethosu_fvp() { # End to End model tests using run.sh # TOSA quantized echo "${TEST_SUITE_NAME}: Test ethos-u target TOSA" - examples/arm/run.sh --target=TOSA --model_name=add - examples/arm/run.sh --target=TOSA --model_name=mul + examples/arm/run.sh --et_build_root=arm_test/test_run --target=TOSA --model_name=add + examples/arm/run.sh --et_build_root=arm_test/test_run --target=TOSA --model_name=mul # Ethos-U55 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U55" - examples/arm/run.sh --target=ethos-u55-128 --model_name=add - examples/arm/run.sh --target=ethos-u55-128 --model_name=mul + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=add + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=mul # Ethos-U85 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85" - examples/arm/run.sh --target=ethos-u85-128 --model_name=add - examples/arm/run.sh --target=ethos-u85-128 --model_name=mul + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=add + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=mul echo "${TEST_SUITE_NAME}: PASS" } @@ -113,26 +113,26 @@ test_models_ethosu_fvp() { # End to End model tests using model_test.py source examples/arm/ethos-u-scratch/setup_path.sh # Build common libs once - python3 backends/arm/test/test_model.py --build_libs + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --build_libs # TOSA quantized echo "${TEST_SUITE_NAME}: Test ethos-u target TOSA" - python3 backends/arm/test/test_model.py --target=TOSA --model=mv2 - python3 backends/arm/test/test_model.py --target=TOSA --model=mv3 - python3 backends/arm/test/test_model.py --target=TOSA --model=lstm - python3 backends/arm/test/test_model.py --target=TOSA --model=edsr + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA --model=mv2 + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA --model=mv3 + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA --model=lstm + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA --model=edsr # Ethos-U55 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U55" - python3 backends/arm/test/test_model.py --target=ethos-u55-128 --model=mv2 - python3 backends/arm/test/test_model.py --target=ethos-u55-64 --model=mv3 - python3 backends/arm/test/test_model.py --target=ethos-u55-256 --model=lstm + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=mv2 --extra_flags="-DET_ATOL=1.20 -DET_RTOL=1.20" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-64 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-256 --model=lstm --extra_flags="-DET_ATOL=0.02 -DET_RTOL=0.02" # Ethos-U85 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85" - python3 backends/arm/test/test_model.py --target=ethos-u85-256 --model=mv2 - python3 backends/arm/test/test_model.py --target=ethos-u85-1024 --model=mv3 - python3 backends/arm/test/test_model.py --target=ethos-u85-128 --model=lstm + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=1.20 -DET_RTOL=1.20" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-1024 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.02 -DET_RTOL=0.02" echo "${TEST_SUITE_NAME}: PASS" } @@ -146,4 +146,4 @@ test_full_ethosu_fvp() { # All End to End model tests -${TEST_SUITE} \ No newline at end of file +${TEST_SUITE} diff --git a/backends/arm/test/test_model.py b/backends/arm/test/test_model.py index 990b9e5f70b..b94a5f65256 100755 --- a/backends/arm/test/test_model.py +++ b/backends/arm/test/test_model.py @@ -56,7 +56,12 @@ def get_args(): default=False, help="Don't save temporary files during compilation", ) - + parser.add_argument( + "--extra_flags", + required=False, + default=None, + help="Extra cmake flags to pass the when building the executor_runner", + ) args = parser.parse_args() if args.model and "ethos-u" in args.target and args.system_config is None: @@ -95,6 +100,8 @@ def build_libs(et_build_root: str, script_path: str): os.path.join(script_path, "build_executorch.sh"), f"--et_build_root={et_build_root}", "--build_type=Release", + "--devtools", + "--etdump", ] ) run_external_cmd( @@ -148,6 +155,7 @@ def build_pte( "examples.arm.aot_arm_compiler", "--delegate", "--quantize", + "--bundleio", intermediate, f"--model_name={model_name}", f"--target={target}", @@ -158,7 +166,7 @@ def build_pte( ] ) - pte_file = os.path.join(output, f"{model_name}_arm_delegate_{args.target}.pte") + pte_file = os.path.join(output, f"{model_name}_arm_delegate_{args.target}.bpte") return pte_file @@ -168,17 +176,26 @@ def build_ethosu_runtime( pte_file: str, target: str, system_config: str, + extra_flags: str, elf_build_path: str, ): + + extra_build_flag = "" + if extra_flags: + extra_build_flag = f"--extra_build_flags={extra_flags}" + run_external_cmd( [ "bash", os.path.join(script_path, "build_executorch_runner.sh"), f"--et_build_root={et_build_root}", f"--pte={pte_file}", + "--bundleio", + "--etdump", f"--target={target}", "--build_type=Release", f"--system_config={system_config}", + extra_build_flag, f"--output={elf_build_path}", ] ) @@ -239,6 +256,7 @@ def run_elf_with_fvp(script_path: str, elf_file: str, target: str): pte_file, args.target, args.system_config, + args.extra_flags, elf_build_path, ) print(f"ELF file created: {elf_file} ") diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index f7f2105b99c..1f224983d4e 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -13,9 +13,10 @@ import os from pathlib import Path -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import torch +from examples.devtools.scripts.export_bundled_program import save_bundled_program from executorch.backends.arm.arm_backend import ( ArmCompileSpecBuilder, get_tosa_spec, @@ -36,6 +37,8 @@ MobileNetV2Evaluator, ) from executorch.devtools.backend_debug import get_delegation_info +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite + from executorch.exir import ( EdgeCompileConfig, ExecutorchBackendConfig, @@ -56,27 +59,50 @@ logging.basicConfig(level=logging.WARNING, format=FORMAT) -def get_model_and_inputs_from_name(model_name: str) -> Tuple[torch.nn.Module, Any]: +def get_model_and_inputs_from_name( + model_name: str, model_input: str | None +) -> Tuple[torch.nn.Module, Any]: """Given the name of an example pytorch model, return it and example inputs. Raises RuntimeError if there is no example model corresponding to the given name. """ + example_inputs = None + if model_input is not None: + logging.info(f"Load model input from {model_input}") + if model_input.endswith(".pt"): + example_inputs = torch.load(model_input, weights_only=False) + else: + raise RuntimeError( + f"Model input data '{model_input}' is not a valid name. Use --model_input .pt e.g. saved with torch.save()" + ) + # Case 1: Model is defined in this file if model_name in models.keys(): + logging.info(f"Internal model {model_name}") model = models[model_name]() - example_inputs = models[model_name].example_input + if example_inputs is None: + example_inputs = models[model_name].example_input # Case 2: Model is defined in examples/models/ elif model_name in MODEL_NAME_TO_MODEL.keys(): logging.warning( "Using a model from examples/models not all of these are currently supported" ) - model, example_inputs, _, _ = EagerModelFactory.create_model( + logging.info( + f"Load {model_name} -> {MODEL_NAME_TO_MODEL[model_name]} from examples/models" + ) + + model, tmp_example_inputs, _, _ = EagerModelFactory.create_model( *MODEL_NAME_TO_MODEL[model_name] ) + if example_inputs is None: + example_inputs = tmp_example_inputs # Case 3: Model is in an external python file loaded as a module. # ModelUnderTest should be a torch.nn.module instance # ModelInputs should be a tuple of inputs to the forward function elif model_name.endswith(".py"): + logging.info( + f"Load model file {model_name} Variable ModelUnderTest= ModelInputs=" + ) import importlib.util # load model's module and add it @@ -84,13 +110,22 @@ def get_model_and_inputs_from_name(model_name: str) -> Tuple[torch.nn.Module, An module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) model = module.ModelUnderTest - example_inputs = module.ModelInputs - + if example_inputs is None: + example_inputs = module.ModelInputs + # Case 4: Model is in an saved model file torch.save(model) + elif model_name.endswith(".pth") or model_name.endswith(".pt"): + logging.info(f"Load model file {model_name}") + model = torch.load(model_name, weights_only=False) + if example_inputs is None: + raise RuntimeError( + f"Model '{model_name}' requires input data specify --model_input .pt" + ) else: raise RuntimeError( f"Model '{model_name}' is not a valid name. Use --help for a list of available models." ) - + logging.debug(f"Loaded model: {model}") + logging.debug(f"Loaded input: {example_inputs}") return model, example_inputs @@ -107,7 +142,7 @@ def quantize( logging.debug(f"Original model: {model}") quantizer = None if is_ethosu(compile_specs): - quantizer = EthosUQuantizer(compile_spec) + quantizer = EthosUQuantizer(compile_specs) elif is_tosa(compile_specs): quantizer = TOSAQuantizer(get_tosa_spec(compile_specs)) else: @@ -365,13 +400,19 @@ def dump_delegation_info(edge, intermediate_files_folder: Optional[str] = None): file.write(delegation_info_string) -def get_args(): # noqa C901 +def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "-m", "--model_name", required=True, - help=f"Provide model name. Valid ones: {set(list(models.keys())+list(MODEL_NAME_TO_MODEL.keys()))}", + help=f"Model file .py/.pth/.pt, builtin model or a model from examples/models. Valid names: {set(list(models.keys())+list(MODEL_NAME_TO_MODEL.keys()))}", + ) + parser.add_argument( + "--model_input", + required=False, + default=None, + help="Provide model input .pt file, or python variable name", ) parser.add_argument( "-d", @@ -381,6 +422,13 @@ def get_args(): # noqa C901 default=False, help="Flag for producing ArmBackend delegated model", ) + parser.add_argument( + "--bundleio", + action="store_true", + required=False, + default=False, + help="Flag for producing BundleIO bpte file with input/output test/ref data.", + ) parser.add_argument( "-t", "--target", @@ -436,7 +484,7 @@ def get_args(): # noqa C901 "--output", action="store", required=False, - help="Location for outputs, if not the default of cwd.", + help="Filename (if .pte or .bpte is used) or a folder for outputs, if not specified the default is to place files in cwd.", ) parser.add_argument( "--system_config", @@ -468,10 +516,6 @@ def get_args(): # noqa C901 + "This is required for running quantized models with unquantized input." ) - if args.quantize and not args.delegate: - logging.error("--delegate must be set when using --quanitze flag.") - exit(1) - # if we have custom ops, register them before processing the model if args.so_library is not None: logging.info(f"Loading custom ops from {args.so_library}") @@ -503,12 +547,136 @@ def get_args(): # noqa C901 return args -if __name__ == "__main__": +def save_bpte_program(exec_prog, original_model: torch.nn.Module, output_name: str): + # Construct MethodTestSuite for Each Method + + # Generate Test Suites + method_names = [ + method.name for method in exec_prog.executorch_program.execution_plan + ] + + program_inputs = {m_name: [example_inputs] for m_name in method_names} + + method_test_suites: List[MethodTestSuite] = [] + for m_name in method_names: + method_inputs = program_inputs[m_name] + + # To create a bundled program, we first create every test cases from input. We leverage eager model + # to generate expected output for each test input, and use MethodTestCase to hold the information of + # each test case. We gather all MethodTestCase for same method into one MethodTestSuite, and generate + # bundled program by all MethodTestSuites. + method_test_cases: List[MethodTestCase] = [] + + if args.intermediates: + # Save model.pth + intermediates_path = Path(args.intermediates) + model_path = os.path.join(intermediates_path, "model.pth") + try: + torch.save(original_model, model_path) + except: + logging.warning(f"Could not torch.save(model, {model_path})") + method_index = 0 + for method_input in method_inputs: + output_ref = original_model(*method_input) + + logging.debug(f"input_{method_index}: {method_input}") + logging.debug(f"output_ref_{method_index}: {output_ref}") + + if args.intermediates: + # Save model input and referece output + input_path = os.path.join( + intermediates_path, f"input_{method_index}.pt" + ) + try: + torch.save(method_input, input_path) + except: + logging.warning( + f"Could not torch.save(input_{method_index}, {input_path})" + ) + refoutput_path = os.path.join( + intermediates_path, f"output_ref_{method_index}.pt" + ) + try: + torch.save(output_ref, refoutput_path) + except: + logging.warning( + f"Could not torch.save(output_ref_{method_index}, {refoutput_path})" + ) + + method_test_cases.append( + MethodTestCase( + inputs=method_input, + expected_outputs=output_ref, + ) + ) + + method_index = method_index + 1 + + method_test_suites.append( + MethodTestSuite( + method_name=m_name, + test_cases=method_test_cases, + ) + ) + + # Generate BundledProgram + save_bundled_program(exec_prog, method_test_suites, output_name) + + +def to_edge_TOSA_delegate( + args, + model: torch.nn.Module, +): + model_int8 = None + # As we can target multiple output encodings, one must + # be specified. + compile_spec = get_compile_spec( + args.target, + args.intermediates, + args.system_config, + args.memory_mode, + ) + if args.quantize: + model = quantize( + model, + args.model_name, + compile_spec, + example_inputs, + args.evaluate, + args.evaluate_config, + ) + model_int8 = model + # Wrap quantized model back into an exported_program + exported_program = torch.export.export_for_training(model, example_inputs) + + if args.intermediates: + os.makedirs(args.intermediates, exist_ok=True) + + if is_ethosu(compile_spec): + partitioner = EthosUPartitioner(compile_spec) + elif is_tosa(compile_spec): + partitioner = TOSAPartitioner(compile_spec) + else: + raise RuntimeError(f"Unhandled compile spec: {compile_spec}") + + edge = to_edge_transform_and_lower( + exported_program, + partitioner=[partitioner], + compile_config=EdgeCompileConfig( + _check_ir_validity=False, + ), + ) + return model_int8, edge + + +if __name__ == "__main__": # noqa: C901 args = get_args() # Pick model from one of the supported lists - model, example_inputs = get_model_and_inputs_from_name(args.model_name) - model = model.eval() + original_model, example_inputs = get_model_and_inputs_from_name( + args.model_name, args.model_input + ) + model = original_model.eval() # export_for_training under the assumption we quantize, the exported form also works # in to_edge if we don't quantize @@ -519,44 +687,7 @@ def get_args(): # noqa C901 # Quantize if required model_int8 = None if args.delegate: - # As we can target multiple output encodings, one must - # be specified. - compile_spec = get_compile_spec( - args.target, - args.intermediates, - args.system_config, - args.memory_mode, - ) - if args.quantize: - model = quantize( - model, - args.model_name, - compile_spec, - example_inputs, - args.evaluate, - args.evaluate_config, - ) - model_int8 = model - # Wrap quantized model back into an exported_program - exported_program = torch.export.export_for_training(model, example_inputs) - - if args.intermediates: - os.makedirs(args.intermediates, exist_ok=True) - - if is_ethosu(compile_spec): - partitioner = EthosUPartitioner(compile_spec) - elif is_tosa(compile_spec): - partitioner = TOSAPartitioner(compile_spec) - else: - raise RuntimeError(f"Unhandled compile spec: {compile_spec}") - - edge = to_edge_transform_and_lower( - exported_program, - partitioner=[partitioner], - compile_config=EdgeCompileConfig( - _check_ir_validity=False, - ), - ) + model_int8, edge = to_edge_TOSA_delegate(args, model) else: edge = to_edge_transform_and_lower( exported_program, @@ -587,11 +718,33 @@ def get_args(): # noqa C901 else f"_arm_{args.target}" ) + if args.bundleio: + output_name = f"{output_name}.bpte" + else: + output_name = f"{output_name}.pte" + if args.output is not None: - output_name = os.path.join(args.output, output_name) + if args.output.endswith(".pte") or args.output.endswith(".bpte"): + # --output is a pte or bundle pte filename use it as output name + if args.bundleio and not args.output.endswith(".bpte"): + raise RuntimeError( + f"--bundleio expects a .bpte file ending to --output and not .pte {args.output}" + ) + if not args.bundleio and not args.output.endswith(".pte"): + raise RuntimeError( + f"When not using --bundleio a .bpte file should not be use as --output {args.output}" + ) + output_name = args.output + else: + # --output is a folder + output_name = os.path.join(args.output, output_name) - save_pte_program(exec_prog, output_name) - print(f"PTE file saved as {output_name}.pte") + if args.bundleio: + save_bpte_program(exec_prog, original_model, output_name) + print(f"Bundle PTE file saved as {output_name}") + else: + save_pte_program(exec_prog, output_name) + print(f"PTE file saved as {output_name}") if args.evaluate: evaluate_model( diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index d43a7047080..11891e2fb93 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -9,11 +9,14 @@ project(arm_executor_runner) option(SEMIHOSTING "Enable semihosting" OFF) option(ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE "Set ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE to specify memory alloction pool size" OFF) option(ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE "Set ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE to specify temp alloction pool size" OFF) +option(ET_BUNDLE_IO "Set to compile in BundleIO support" OFF) +option(ET_ATOL "Set atol to use for BundleIO testing" OFF) +option(ET_RTOL "Set rtol to use for BundleIO testing" OFF) if(NOT DEFINED ET_PTE_FILE_PATH AND NOT ${SEMIHOSTING}) message( FATAL_ERROR - "ET_PTE_FILE_PATH must specify a model .pte, for bare metal systems the " + "ET_PTE_FILE_PATH must specify a model .pte or .bpte, for bare metal systems the " "model is built into the binary." ) endif() @@ -373,6 +376,18 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER) ) endif() +if(ET_BUNDLE_IO) + add_library(bundled_program STATIC IMPORTED) + set_property( + TARGET bundled_program + PROPERTY IMPORTED_LOCATION + "${ET_BUILD_DIR_PATH}/lib/libbundled_program.a" + ) + list(APPEND arm_executor_runner_link + bundled_program + ) +endif() + # Need whole-archive to ensure C++ ctor's are called - this may be wasteful for # bin size as we link in a number of other symbols target_link_libraries( @@ -402,6 +417,18 @@ if(ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE) target_compile_definitions(arm_executor_runner PUBLIC ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE=${ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE}) endif() +if(ET_BUNDLE_IO) + target_compile_definitions(arm_executor_runner PUBLIC -DET_BUNDLE_IO) +endif() + +if(ET_ATOL) + target_compile_definitions(arm_executor_runner PUBLIC ET_ATOL=${ET_ATOL}) +endif() + +if(ET_RTOL) + target_compile_definitions(arm_executor_runner PUBLIC ET_RTOL=${ET_RTOL}) +endif() + # Fixup compilation of retarget.c if(SEMIHOSTING) # Remove this when MLBEDSW-8910 is closed. diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp index 2d08f733eba..48237acdf22 100644 --- a/examples/arm/executor_runner/arm_executor_runner.cpp +++ b/examples/arm/executor_runner/arm_executor_runner.cpp @@ -1,17 +1,12 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. - * Copyright 2023-2024 Arm Limited and/or its affiliates. + * Copyright 2023-2025 Arm Limited and/or its affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include -#include -#include -#include -#include - #include #include #include @@ -19,8 +14,17 @@ #include #include #include +#include +#include +#include +#include #include "arm_perf_monitor.h" + +#if defined(ET_BUNDLE_IO) +#include +#endif + #if defined(ET_EVENT_TRACER_ENABLED) #include #if !defined(SEMIHOSTING) @@ -102,6 +106,24 @@ unsigned char __attribute__(( section("input_data_sec"), aligned(16))) method_allocation_pool[method_allocation_pool_size]; +#if defined(ET_BUNDLE_IO) + +const size_t testset_idx = 0; // BundleIO test indexes to test if used + +#if defined(ET_ATOL) +const float et_atol = ET_ATOL; +#else +const float et_atol = 0.01; +#endif + +#if defined(ET_RTOL) +const float et_rtol = ET_RTOL; +#else +const float et_rtol = 0.01; +#endif + +#endif + /** * The temp_allocation_pool is used for allocating temporary data during kernel * or delegate execution. This will be reset after each kernel or delegate call. @@ -409,15 +431,41 @@ int main(int argc, const char* argv[]) { } } #endif - ET_LOG(Info, "Model in %p %c", model_pte, model_pte[0]); - auto loader = BufferDataLoader(model_pte, pte_size); - ET_LOG(Info, "Model PTE file loaded. Size: %lu bytes.", pte_size); + ET_LOG( + Info, "PTE in %p %c Size: %lu bytes", model_pte, model_pte[0], pte_size); + + // Find the offset to the embedded Program. + const void* program_data = model_pte; + size_t program_data_len = pte_size; + +#if defined(ET_BUNDLE_IO) + bool bundle_io = executorch::bundled_program::is_bundled_program( + reinterpret_cast(model_pte), pte_size); + if (bundle_io) { + // BundleIO bpte is provided, dig out the actual model from the data area + Error status = executorch::bundled_program::get_program_data( + reinterpret_cast(model_pte), + pte_size, + &program_data, + &program_data_len); + + ET_CHECK_MSG( + status == Error::Ok, + "get_program_data() from bundle PTE failed: 0x%x", + (unsigned int)status); + } +#endif + auto loader = BufferDataLoader(program_data, program_data_len); + ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", program_data_len); + + // Parse the program file. This is immutable, and can also be reused + // between multiple execution invocations across multiple threads. Result program = Program::load(&loader); if (!program.ok()) { ET_LOG( Info, "Program loading failed @ 0x%p: 0x%" PRIx32, - model_pte, + program_data, program.error()); } @@ -483,6 +531,7 @@ int main(int argc, const char* argv[]) { executorch::runtime::EventTracer* event_tracer_ptr = nullptr; #if defined(ET_EVENT_TRACER_ENABLED) + ET_LOG(Info, "Setting up ETDump"); torch::executor::ETDumpGen etdump_gen = torch::executor::ETDumpGen(); event_tracer_ptr = &etdump_gen; #endif @@ -499,21 +548,75 @@ int main(int argc, const char* argv[]) { } size_t method_loaded_memsize = method_allocator.used_size() - method_loaded_membase; - ET_LOG(Info, "Method loaded."); + ET_LOG(Info, "Method '%s' loaded.", method_name); ET_LOG(Info, "Preparing inputs..."); size_t input_membase = method_allocator.used_size(); - auto inputs = - ::prepare_input_tensors(*method, method_allocator, input_buffers); - - if (!inputs.ok()) { - ET_LOG( - Info, - "Preparing inputs tensors for method %s failed with status 0x%" PRIx32, - method_name, - inputs.error()); +#if defined(ET_BUNDLE_IO) + if (bundle_io) { + // Get inputs from bundled IO ".bpte" data + // Useful for testing + ET_LOG(Info, "Input testset[%d] from bundled bpte", testset_idx); + Error status = executorch::bundled_program::load_bundled_input( + *method, model_pte, testset_idx); + ET_CHECK_MSG( + status == Error::Ok, + "load_bundled_input failed with status 0x%" PRIx32, + status); + } else +#endif + { + // Here you would add code to get input from your Hardware + // Get inputs from SEMIHOSTING or fake it with a lot of "1" + // Use "static" to force to compiler to remove this when it goes out of + // scope + static auto prepared_inputs = + ::prepare_input_tensors(*method, method_allocator, input_buffers); + + if (!prepared_inputs.ok()) { + ET_LOG( + Info, + "Preparing inputs tensors for method %s failed with status 0x%" PRIx32, + method_name, + prepared_inputs.error()); + } } +#ifdef DUMP_INPUT + { + std::vector inputs(method->inputs_size()); + ET_LOG(Info, "%zu inputs: ", inputs.size()); + Error status = method->get_inputs(inputs.data(), inputs.size()); + ET_CHECK(status == Error::Ok); + + for (int i = 0; i < inputs.size(); ++i) { + Tensor t = inputs[i].toTensor(); + // The output might be collected and parsed so printf() is used instead + // of ET_LOG() here + for (int j = 0; j < inputs[i].toTensor().numel(); ++j) { + if (t.scalar_type() == ScalarType::Int) { + printf( + "Input[%d][%d]: (int) %d\n", + i, + j, + inputs[i].toTensor().const_data_ptr()[j]); + } else if (t.scalar_type() == ScalarType::Float) { + printf( + "Input[%d][%d]: (float) %f\n", + i, + j, + inputs[i].toTensor().const_data_ptr()[j]); + } else if (t.scalar_type() == ScalarType::Char) { + printf( + "Input[%d][%d]: (char) %d\n", + i, + j, + inputs[i].toTensor().const_data_ptr()[j]); + } + } + } + } +#endif size_t input_memsize = method_allocator.used_size() - input_membase; ET_LOG(Info, "Input prepared."); @@ -524,7 +627,8 @@ int main(int argc, const char* argv[]) { StopMeasurements(); size_t executor_memsize = method_allocator.used_size() - executor_membase; - ET_LOG(Info, "model_pte_loaded_size: %lu bytes.", pte_size); + ET_LOG(Info, "model_pte_program_size: %lu bytes.", program_data_len); + ET_LOG(Info, "model_pte_loaded_size: %lu bytes.", pte_size); #if defined(SEMIHOSTING) if (input_file_allocator.size() > 0) { ET_LOG( @@ -575,50 +679,34 @@ int main(int argc, const char* argv[]) { ET_LOG(Info, "%zu outputs: ", outputs.size()); status = method->get_outputs(outputs.data(), outputs.size()); ET_CHECK(status == Error::Ok); + for (int i = 0; i < outputs.size(); ++i) { Tensor t = outputs[i].toTensor(); #if !defined(SEMIHOSTING) +#if !defined(ET_BUNDLE_IO) // The output might be collected and parsed so printf() is used instead // of ET_LOG() here for (int j = 0; j < outputs[i].toTensor().numel(); ++j) { if (t.scalar_type() == ScalarType::Int) { printf( - "Output[%d][%d]: %d\n", + "Output[%d][%d]: (int) %d\n", i, j, outputs[i].toTensor().const_data_ptr()[j]); } else if (t.scalar_type() == ScalarType::Float) { printf( - "Output[%d][%d]: %f\n", + "Output[%d][%d]: (float) %f\n", i, j, outputs[i].toTensor().const_data_ptr()[j]); } else if (t.scalar_type() == ScalarType::Char) { printf( - "Output[%d][%d]: %d\n", + "Output[%d][%d]: (char) %d\n", i, j, outputs[i].toTensor().const_data_ptr()[j]); } } -#if defined(ET_EVENT_TRACER_ENABLED) - ETDumpResult result = etdump_gen.get_etdump_data(); - if (result.buf != nullptr && result.size > 0) { - // On a device with no file system we can't just write it out - // to the file-system so we base64 encode it and dump it on the log. - int mode = 0; - size_t len = result.size; - size_t encoded_len = base64_encoded_size(result.size, mode); - uint8_t* encoded_buf = reinterpret_cast( - method_allocator.allocate(encoded_len + 1)); - int ret = base64_encode( - encoded_buf, (uint8_t*)result.buf, &encoded_len, &len, mode); - encoded_buf[encoded_len] = 0x00; // Ensure null termination - ET_LOG(Info, "Writing etdump.bin [base64]"); - printf( - "#---\nbase64 -i -d <<<\"\\\n%s\\\n\" >etdump.bin\npython3 -m devtools.inspector.inspector_cli --etdump_path etdump.bin --source_time_scale cycles --target_time_scale cycles\n#---\n", - encoded_buf); - } #endif #else char out_filename[255]; @@ -631,21 +719,66 @@ int main(int argc, const char* argv[]) { outputs[i].toTensor().nbytes(), out_file); fclose(out_file); -#if defined(ET_EVENT_TRACER_ENABLED) - etdump_result result = etdump_gen.get_etdump_data(); - if (result.buf != nullptr && result.size > 0) { - // On a device with a file system we can just write it out - // to the file-system. - char etdump_filename = "etdump.bin"; - ET_LOG(Info, "Writing etdump to file: %s", etdump_filename); - FILE* f = fopen(etdump_filename, "w+"); - fwrite((uint8_t*)result.buf, 1, result.size, f); - fclose(f); - free(result.buf); - } #endif + } + +#if defined(ET_BUNDLE_IO) + if (bundle_io) { + // Verify the result. + status = executorch::bundled_program::verify_method_outputs( + *method, model_pte, testset_idx, et_rtol, et_atol); + if (status == Error::Ok) { + ET_LOG(Info, "Model output match expected BundleIO bpte ref data."); + ET_LOG(Info, "TEST: BundleIO index[%d] Test_result: PASS", testset_idx); + } else { + ET_LOG( + Error, + "Model output don't match expected BundleIO bpte ref data. rtol=%f atol=%f", + et_rtol, + et_atol); + ET_LOG(Error, "TEST: BundleIO index[%d] Test_result: FAIL", testset_idx); + } + ET_CHECK_MSG( + status == Error::Ok, + "Bundle verification failed with status 0x%" PRIx32, + status); + } #endif + +#if defined(ET_EVENT_TRACER_ENABLED) +#if !defined(SEMIHOSTING) + ETDumpResult result = etdump_gen.get_etdump_data(); + if (result.buf != nullptr && result.size > 0) { + // On a device with no file system we can't just write it out + // to the file-system so we base64 encode it and dump it on the log. + int mode = 0; + size_t len = result.size; + size_t encoded_len = base64_encoded_size(result.size, mode); + uint8_t* encoded_buf = + reinterpret_cast(method_allocator.allocate(encoded_len + 1)); + int ret = base64_encode( + encoded_buf, (uint8_t*)result.buf, &encoded_len, &len, mode); + encoded_buf[encoded_len] = 0x00; // Ensure null termination + ET_LOG(Info, "Writing etdump.bin [base64]"); + printf( + "#---\nbase64 -i -d <<<\"\\\n%s\\\n\" >etdump.bin\npython3 -m devtools.inspector.inspector_cli --etdump_path etdump.bin --source_time_scale cycles --target_time_scale cycles\n#---\n", + encoded_buf); + } +#else + etdump_result result = etdump_gen.get_etdump_data(); + if (result.buf != nullptr && result.size > 0) { + // On a device with a file system we can just write it out + // to the file-system. + char etdump_filename = "etdump.bin"; + ET_LOG(Info, "Writing etdump to file: %s", etdump_filename); + FILE* f = fopen(etdump_filename, "w+"); + fwrite((uint8_t*)result.buf, 1, result.size, f); + fclose(f); + free(result.buf); } +#endif +#endif + out: ET_LOG(Info, "Program complete, exiting."); #if defined(SEMIHOSTING) diff --git a/examples/arm/run.sh b/examples/arm/run.sh index ce92312b652..5f1e3764de2 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -18,11 +18,14 @@ et_root_dir=$(realpath ${et_root_dir}) model_name="" +model_input_set=false +model_input="" aot_arm_compiler_flags="--delegate --quantize" portable_kernels="aten::_softmax.out" target="ethos-u55-128" output_folder_set=false output_folder="." +bundleio=false build_with_etdump=false build_type="Release" extra_build_flags="" @@ -35,11 +38,13 @@ ethos_u_scratch_dir=${script_dir}/ethos-u-scratch function help() { echo "Usage: $(basename $0) [options]" echo "Options:" - echo " --model_name= Model to run, can be a builtin, examples/models or a filename Default to all builtin models" + echo " --model_name= Model file .py/.pth/.pt, builtin model or a model from examples/models. Passed to aot_arm_compiler" + echo " --model_input= Provide model input .pt file to override the input in the model file. Passed to aot_arm_compiler" echo " --aot_arm_compiler_flags= Only used if --model_name is used Default: ${aot_arm_compiler_flags}" echo " --portable_kernels= Comma separated list of portable (non delagated) kernels to include Default: ${portable_kernels}" echo " --target= Target to build and run for Default: ${target}" echo " --output= Target build output folder Default: ${output_folder}" + echo " --bundleio Create Bundled pte using Devtools BundelIO with Input/RefOutput included" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " @@ -56,10 +61,12 @@ for arg in "$@"; do case $arg in -h|--help) help ;; --model_name=*) model_name="${arg#*=}";; + --model_input=*) model_input="${arg#*=}" ; model_input_set=true ;; --aot_arm_compiler_flags=*) aot_arm_compiler_flags="${arg#*=}";; --portable_kernels=*) portable_kernels="${arg#*=}";; --target=*) target="${arg#*=}";; --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; + --bundleio) bundleio=true ;; --etdump) build_with_etdump=true ;; --build_type=*) build_type="${arg#*=}";; --extra_build_flags=*) extra_build_flags="${arg#*=}";; @@ -121,13 +128,21 @@ hash arm-none-eabi-gcc \ # Build executorch libraries cd $et_root_dir +devtools_flag="" +bundleio_flag="" +et_dump_flag="" if [ "$build_with_etdump" = true ] ; then + devtools_flag="--devtools --etdump" et_dump_flag="--etdump" -else - et_dump_flag="" fi -backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $et_dump_flag +if [ "$bundleio" = true ] ; then + devtools_flag="--devtools --etdump" + bundleio_flag="--bundleio" + et_dump_flag="--etdump" +fi + +backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag backends/arm/scripts/build_portable_kernels.sh --et_build_root="${et_build_root}" --build_type=$build_type --portable_kernels=$portable_kernels # Build a lib quantized_ops_aot_lib @@ -157,12 +172,21 @@ for i in "${!test_model[@]}"; do echo "--------------------------------------------------------------------------------" cd $et_root_dir - model_short_name=$(basename -- "${model}" ".py") - model_filename=${model_short_name}_arm_${target}.pte + # Remove path and file exetension to get model_short_name + ext=${model##*.} + model_short_name=$(basename -- "${model}" .$ext) + model_filename=${model_short_name}_arm_${target} if [[ "${model_compiler_flags}" == *"--delegate"* ]]; then # Name aligned with default aot_arm_compiler output - model_filename=${model_short_name}_arm_delegate_${target}.pte + model_filename=${model_short_name}_arm_delegate_${target} + fi + elf_folder=${model_filename} + + if [ "$bundleio" = true ] ; then + model_filename=${model_filename}.bpte + else + model_filename=${model_filename}.pte fi if [ "$output_folder_set" = false ] ; then @@ -170,15 +194,19 @@ for i in "${!test_model[@]}"; do fi output_folder=$(realpath ${output_folder}) - mkdir -p ${output_folder} - pte_file=$(realpath -m ${output_folder}/${model_filename}) + pte_file="${output_folder}/${model_filename}" - rm -f "${pte_file}" + mkdir -p ${output_folder} - ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${output_folder} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode}" + # Remove old pte files + rm -f "${output_folder}/${model_filename}" + + ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag" echo "CALL ${ARM_AOT_CMD}" >&2 ${ARM_AOT_CMD} 1>&2 + pte_file=$(realpath ${pte_file}) + [[ -f ${pte_file} ]] || { >&2 echo "Failed to generate a pte file - ${pte_file}"; exit 1; } echo "pte_data_size: $(wc -c ${pte_file})" echo "pte_file: ${pte_file}" @@ -188,10 +216,11 @@ for i in "${!test_model[@]}"; do else set -x # Rebuild the application as the pte is imported as a header/c array - backends/arm/scripts/build_executorch_runner.sh "--pte=${pte_file}" --build_type=$build_type --target=$target --system_config=$system_config $et_dump_flag --extra_build_flags="$extra_build_flags" --ethosu_tools_dir="$ethos_u_scratch_dir" --output="${output_folder}" + backends/arm/scripts/build_executorch_runner.sh --et_build_root="${et_build_root}" --pte="${pte_file}" --build_type=${build_type} --target=${target} --system_config=${system_config} ${bundleio_flag} ${et_dump_flag} --extra_build_flags="${extra_build_flags}" --ethosu_tools_dir="${ethos_u_scratch_dir}" if [ "$build_only" = false ] ; then # Execute the executor_runner on FVP Simulator - backends/arm/scripts/run_fvp.sh --elf=${output_folder}/cmake-out/arm_executor_runner --target=$target + elf_file="${output_folder}/${elf_folder}/cmake-out/arm_executor_runner" + backends/arm/scripts/run_fvp.sh --elf=${elf_file} --target=$target fi set +x fi