diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index f21ed849d03..8c0ba752259 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -96,63 +96,6 @@ jobs: PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py $ARGS - prepare-test-specs: - runs-on: linux.2xlarge - needs: set-parameters - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - steps: - - uses: actions/checkout@v3 - - - name: Prepare the spec - id: prepare - shell: bash - env: - BENCHMARK_CONFIG: ${{ toJSON(matrix) }} - working-directory: extension/benchmark/android/benchmark - run: | - set -eux - - # The model will be exported in the next step to this S3 path - MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" - # We could write a script to properly use jinja here, but there is only one variable, - # so let's just sed it - sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2 - - BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') - # The config for this benchmark runs, we save it in the test spec so that it can be fetched - # later by the upload script - sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2 - - cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml - # Just print the test spec for debugging - cat android-llm-device-farm-test-spec.yml - - # Save the benchmark configs so that we can use it later in the dashboard - echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" - echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT - - - name: Upload the spec - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml - - - name: Update the benchmark configs - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json - export-models: name: export-models uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -335,6 +278,69 @@ jobs: fi echo "::endgroup::" + prepare-test-specs: + runs-on: linux.2xlarge + needs: + - set-parameters + - export-models + strategy: + matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} + fail-fast: false + steps: + - uses: actions/checkout@v3 + + - name: Prepare the spec + id: prepare + shell: bash + env: + BENCHMARK_CONFIG: ${{ toJSON(matrix) }} + working-directory: extension/benchmark/android/benchmark + run: | + set -eux + + # The model will be exported in the next step to this S3 path + MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" + + # Check if the model artifact exists, fail this step skip generating test-spec. + curl -s --head -f ${MODEL_PATH} + + # We could write a script to properly use jinja here, but there is only one variable, + # so let's just sed it + sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2 + + BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') + # The config for this benchmark runs, we save it in the test spec so that it can be fetched + # later by the upload script + sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2 + + cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml + # Just print the test spec for debugging + cat android-llm-device-farm-test-spec.yml + + # Save the benchmark configs so that we can use it later in the dashboard + echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" + echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT + + - name: Upload the spec + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml + + - name: Update the benchmark configs + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json + build-benchmark-app: name: build-benchmark-app uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 44aa645d16d..cc7f85e9386 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -98,63 +98,6 @@ jobs: echo "benchmark_configs is: ${{ steps.set-parameters.outputs.benchmark_configs }}" - prepare-test-specs: - runs-on: linux.2xlarge - needs: set-parameters - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - steps: - - uses: actions/checkout@v3 - - - name: Prepare the spec - id: prepare - shell: bash - env: - BENCHMARK_CONFIG: ${{ toJSON(matrix) }} - working-directory: extension/benchmark/apple/Benchmark - run: | - set -eux - - # The model will be exported in the next step to this S3 path - MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" - # We could write a script to properly use jinja here, but there is only one variable, - # so let's just sed it - sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2 - - BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') - # The config for this benchmark runs, we save it in the test spec so that it can be fetched - # later by the upload script - sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2 - - cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml - # Just print the test spec for debugging - cat default-ios-device-farm-appium-test-spec.yml - - # Save the benchmark configs so that we can use it later in the dashboard - echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" - echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT - - - name: Upload the spec - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml - - - name: Update the benchmark configs - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json - export-models: name: export-models uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -344,6 +287,68 @@ jobs: fi echo "::endgroup::" + prepare-test-specs: + runs-on: linux.2xlarge + needs: + - set-parameters + - export-models + strategy: + matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} + fail-fast: false + steps: + - uses: actions/checkout@v3 + + - name: Prepare the spec + id: prepare + shell: bash + env: + BENCHMARK_CONFIG: ${{ toJSON(matrix) }} + working-directory: extension/benchmark/apple/Benchmark + run: | + set -eux + + # The model will be exported in the next step to this S3 path + MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" + # Check if the model artifact exists, fail this step skip generating test-spec. + curl -s --head -f ${MODEL_PATH} + # We could write a script to properly use jinja here, but there is only one variable, + # so let's just sed it + sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2 + + BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') + # The config for this benchmark runs, we save it in the test spec so that it can be fetched + # later by the upload script + sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2 + + cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml + # Just print the test spec for debugging + cat default-ios-device-farm-appium-test-spec.yml + + # Save the benchmark configs so that we can use it later in the dashboard + echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" + echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT + + - name: Upload the spec + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml + + - name: Update the benchmark configs + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json + + build-benchmark-app: name: build-benchmark-app uses: pytorch/test-infra/.github/workflows/macos_job.yml@main diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bdcda2f19c..de941663a88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,14 +248,15 @@ cmake_dependent_option( "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF ) -if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR) +if(EXECUTORCH_BUILD_EXTENSION_TRAINING) set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) + set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) + set(EXECUTORCH_BUILD_EXTENSION_MODULE ON) + set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) endif() -if(EXECUTORCH_BUILD_EXTENSION_TRAINING) - set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) +if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR) set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) - set(EXECUTORCH_BUILD_EXTENSION_MODULE ON) endif() if(EXECUTORCH_BUILD_EXTENSION_MODULE) diff --git a/backends/apple/coreml/TARGETS b/backends/apple/coreml/TARGETS index d77e33679ab..df1165dd74e 100644 --- a/backends/apple/coreml/TARGETS +++ b/backends/apple/coreml/TARGETS @@ -14,10 +14,10 @@ runtime.python_library( "@EXECUTORCH_CLIENTS", ], deps = [ + "fbsource//third-party/pypi/coremltools:coremltools", ":executorchcoreml", "//executorch/exir/backend:backend_details", "//executorch/exir/backend:compile_spec_schema", - "fbsource//third-party/pypi/coremltools:coremltools", ], ) @@ -30,13 +30,13 @@ runtime.python_library( "@EXECUTORCH_CLIENTS", ], deps = [ + "fbsource//third-party/pypi/coremltools:coremltools", ":backend", "//caffe2:torch", "//executorch/exir:lib", "//executorch/exir/backend:compile_spec_schema", "//executorch/exir/backend:partitioner", "//executorch/exir/backend:utils", - "fbsource//third-party/pypi/coremltools:coremltools", ], ) @@ -64,25 +64,23 @@ runtime.cxx_python_extension( headers = glob([ "runtime/inmemoryfs/**/*.hpp", ]), + base_module = "", + compiler_flags = [ + "-std=c++17", + ], preprocessor_flags = [ "-Iexecutorch/backends/apple/coreml/runtime/util", ], types = [ "executorchcoreml.pyi", ], - compiler_flags = [ - "-std=c++17", - ], - base_module = "", visibility = [ "//executorch/examples/apple/coreml/...", "@EXECUTORCH_CLIENTS", ], - external_deps = [ - "pybind11", - ], deps = [ "fbsource//third-party/nlohmann-json:nlohmann-json", + "fbsource//third-party/pybind11:pybind11", ], ) @@ -92,10 +90,10 @@ runtime.python_test( "test/*.py", ]), deps = [ + "fbsource//third-party/pypi/pytest:pytest", ":partitioner", ":quantizer", "//caffe2:torch", "//pytorch/vision:torchvision", - "fbsource//third-party/pypi/pytest:pytest", ], ) diff --git a/backends/arm/tosa_mapping.py b/backends/arm/tosa_mapping.py index 9a8b6b2c35d..292d6209bb3 100644 --- a/backends/arm/tosa_mapping.py +++ b/backends/arm/tosa_mapping.py @@ -107,7 +107,10 @@ def __init__(self, argument: Any) -> None: if isinstance(argument, (int, float)): self.__process_number(argument) return + if isinstance(argument, torch.dtype): + # Dtype is parsed from fake tensor + return - RuntimeError( + raise RuntimeError( f"Unhandled node input argument: {argument}, of type {type(argument)}" ) diff --git a/backends/qualcomm/aot/python/targets.bzl b/backends/qualcomm/aot/python/targets.bzl index e1f5a6a8fc5..f29c02aa593 100644 --- a/backends/qualcomm/aot/python/targets.bzl +++ b/backends/qualcomm/aot/python/targets.bzl @@ -33,10 +33,10 @@ def define_common_targets(): "//executorch/backends/qualcomm:schema", "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", + "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()), ], external_deps = [ - "pybind11", "libtorch_python", ], use_static_deps = True, @@ -66,10 +66,10 @@ def define_common_targets(): "//executorch/backends/qualcomm:schema", "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", + "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()), ], external_deps = [ - "pybind11", "libtorch_python", ], use_static_deps = True, @@ -93,9 +93,7 @@ def define_common_targets(): "//executorch/backends/qualcomm:schema", "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", + "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()), ], - external_deps = [ - "pybind11", - ], ) diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py index 872ba355c70..8712c2709ac 100644 --- a/backends/xnnpack/partition/config/gemm_configs.py +++ b/backends/xnnpack/partition/config/gemm_configs.py @@ -21,6 +21,7 @@ is_dynamic_qdq, is_per_channel, is_per_channel_group, + is_per_tensor, is_qparam, is_quant, ) @@ -66,8 +67,6 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool: return False is_valid, _ = self.get_deps(node, ep) - if not is_valid: - why(node, "Failed to get valid dependent nodes.") return is_valid def get_node_and_deps( @@ -97,9 +96,9 @@ def _detect_precision(self, node: torch.fx.Node) -> ConfigPrecisionType: def _overwrite_precision(self, node: torch.fx.Node): precision = self._detect_precision(node) if precision not in self.enabled_precision_types: - # detected precision is not enabled, lets try to partition it as fp32 + # detected precision is not enabled, try to partition it as fp32 if self.enabled_precision_types == [ConfigPrecisionType.FP32]: - # if only fp32 is enabled, then we can still partition fp32 gemms + # when only fp32 is enabled, then we can still partition fp32 gemms # even with in a quantized graph if precision in [ ConfigPrecisionType.STATIC_QUANT, @@ -108,6 +107,7 @@ def _overwrite_precision(self, node: torch.fx.Node): precision = ConfigPrecisionType.FP32 logging.info(f"Overwriting precision, partitioning {node} as FP32") return True, precision + return False, precision def get_deps( @@ -123,6 +123,7 @@ def get_deps( precision = self._detect_precision(node) if precision not in self.supported_precision_types(): # detected precision but it is either disabled or not supported + why(node, f"Unsupported precision type {precision}") return (False, []) _, precision = self._overwrite_precision(node) valid_bias, bias_deps = self._get_bias_deps(node, ep, precision) @@ -143,7 +144,8 @@ def _get_weight_deps( # First find the weight weight_node = get_input_node(node, self.weight_idx) if not is_param_node(ep, weight_node): - return (False, []) # weight must be a static param + why(node, "Expected weight to be a static param") + return (False, []) gemm_deps.append(weight_node) return (True, gemm_deps) @@ -151,19 +153,33 @@ def _get_weight_deps( # Quantized Weight deps dequant_node = get_input_node(node, self.weight_idx) if not is_dequant(dequant_node): + why(node, "Expected weight to have a dequantized node") return False, [] gemm_deps.append(dequant_node) weight = get_input_node(dequant_node, 0) if not is_param_node(ep, weight): + why(node, "Expected weight to be a static param") return False, [] gemm_deps.append(weight) + if ( + is_per_tensor(dequant_node) + and precision == ConfigPrecisionType.DYNAMIC_QUANT + ): + why( + node, + "XNNPACK does not support per tensor quantized weights for dynamic quantization of activations", + ) + return False, [] + if is_per_channel(dequant_node) or is_per_channel_group(dequant_node): if len(dequant_node.all_input_nodes) < 2: # Expected channel quantized to have scale/zp nodes + why(node, "Expected channel quantized to have scale/zp nodes") return False, [] gemm_deps.extend(dequant_node.all_input_nodes[1:3]) + return (True, gemm_deps) def _get_output_deps( @@ -174,7 +190,7 @@ def _get_output_deps( # Look for fused activations and tail end quant node node_users = list(node.users.keys()) if len(node_users) != 1: - # Expect quantized node to have a single output (fused act or dequant) + why(node, "Expected quantized node to have a single output") return False, [] # Check if the quantized pattern has a fused activation @@ -190,6 +206,7 @@ def _get_output_deps( if not is_quant(n_output): # Expected gemm_node --> fused_act (optional) --> dequant + why(node, "Expected output node to have a dequantized node") return (False, []) gemm_deps.append(n_output) elif precision == ConfigPrecisionType.FP32: @@ -210,8 +227,11 @@ def _get_bias_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: gemm_deps = [] - if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: - # if force force_fp32_dynamic_linear is enabled, then we + if ( + precision == ConfigPrecisionType.FP32 + and self.force_non_static_weights_for_f32_linear + ): + # if force_non_static_weights_for_f32_linear is enabled, then we # do not partition the weight node return (True, gemm_deps) @@ -219,7 +239,8 @@ def _get_bias_deps( bias_node = get_input_node(node, self.bias_idx) if bias_node: if not is_param_node(ep, bias_node): - return (False, []) # bias node must be a static param + why(node, "Expected bias to be a static param") + return (False, []) gemm_deps.append(bias_node) return (True, gemm_deps) @@ -233,7 +254,7 @@ def _get_act_deps( else: dq_input = get_input_node(node, self.act_idx) if not is_dequant(dq_input): - # Expected static quant input to be dequant node + why(node, "Expected act input to be dequant node") return False, [] gemm_deps.append(dq_input) if precision == ConfigPrecisionType.STATIC_QUANT: @@ -243,6 +264,7 @@ def _get_act_deps( # q input node q_input = get_input_node(dq_input, 0) if not is_quant(q_input): + why(node, "Expected dequant input to be quant node") return (False, []) gemm_deps.append(q_input) @@ -250,20 +272,20 @@ def _get_act_deps( if is_affine_qdq(q_input): q_input_args = extract_qdq_affine_op_args_for_decomposed_ops(q_input) if not (is_node(q_input_args[1]) and is_node(q_input_args[2])): - # expected to find getitem node from choose qparam + why(node, "expected to find getitem node from choose qparam") return (False, []) getitem1 = q_input_args[1] getitem2 = q_input_args[2] if not (is_getitem(getitem1) and is_getitem(getitem2)): - # expected getitem node from choose qparam + why(node, "expected getitem node from choose qparam") return (False, []) gemm_deps.extend([getitem1, getitem2]) choose_qparam = get_input_node(getitem1, 0) if not is_qparam(choose_qparam): - # expected to find choose_qparam node + why(node, "expected to find choose_qparam node") return (False, []) gemm_deps.append(choose_qparam) return (True, gemm_deps) @@ -287,8 +309,11 @@ def get_original_aten(self) -> Optional[torch._ops.OpOverload]: def _get_weight_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: - if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: - # if force fp32_dynamic_linear is enabled, then we + if ( + precision == ConfigPrecisionType.FP32 + and self.force_non_static_weights_for_f32_linear + ): + # if force_non_static_weights_for_f32_linear is enabled, then we # do not partition the weight node return (True, []) @@ -394,9 +419,11 @@ def __init__(self, **kwargs): def _get_weight_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: - # TODO(maxren, T210537195): - if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: - # if force fp32_dynamic_linear is on and we detected this as fp32, then we + if ( + precision == ConfigPrecisionType.FP32 + and self.force_non_static_weights_for_f32_linear + ): + # if force_non_static_weights_for_f32_linear is on and we detected this as fp32, then we # do not partition the weight node return (True, []) @@ -471,6 +498,7 @@ def find_partition_args(input_node): # there can only be a single output node in partition or len(src_partition.output_nodes) != 1 ): + why(node, "invalid source partition") return (False, []) # map addmm's args to the source partition linear's inputs and users @@ -482,11 +510,11 @@ def find_partition_args(input_node): node.args = old_args node.users = old_users - # When using force_fp32_dynamic_linear, we want to get_deps to overwrite the source partition nodes. + # When using force_non_static_weights_for_f32_linear, we want to get_deps to overwrite the source partition nodes. # Else we want to be greedy. ret_deps = ( list(set(deps) & set(src_partition.nodes)) - if self.force_fp32_dynamic_linear + if self.force_non_static_weights_for_f32_linear else list(set(deps) | set(src_partition.nodes)) ) @@ -512,8 +540,11 @@ def __init__(self, **kwargs): def _get_weight_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: - if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: - # if force fp32_dynamic_linear is on and we detected this as fp32, then we + if ( + precision == ConfigPrecisionType.FP32 + and self.force_non_static_weights_for_f32_linear + ): + # if force_non_static_weights_for_f32_linear is on and we detected this as fp32, then we # do not partition the weight node return (True, []) diff --git a/backends/xnnpack/partition/config/xnnpack_config.py b/backends/xnnpack/partition/config/xnnpack_config.py index d261416a76f..20018610fce 100644 --- a/backends/xnnpack/partition/config/xnnpack_config.py +++ b/backends/xnnpack/partition/config/xnnpack_config.py @@ -41,7 +41,9 @@ def __init__(self, **kwargs): super().__init__() self.enabled_precision_types = self.supported_precision_types() # Flag used in GEMMConfig() - self.force_fp32_dynamic_linear = kwargs.get("force_fp32_dynamic_linear", False) + self.force_non_static_weights_for_f32_linear = kwargs.get( + "force_non_static_weights_for_f32_linear", False + ) def get_partition( self, node: torch.fx.Node, ep: ExportedProgram diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py index 30bb4f0aba2..690a1109a17 100644 --- a/backends/xnnpack/test/ops/test_linear.py +++ b/backends/xnnpack/test/ops/test_linear.py @@ -539,6 +539,66 @@ def _test_qd8_per_channel_linear(self, dtype: torch.dtype = torch.float): uses_bias=uses_bias, ) + def _test_qd8_linear_per_tensor_unsupported(self, dtype: torch.dtype = torch.float): + for uses_bias in (False, True): + module = BaseLinear( + in_size=8, + input_channels=13, + output_channels=17, + dtype=dtype, + use_bias=uses_bias, + ) + inputs = module.get_inputs() + dynamic_shapes = ({1: torch.export.Dim("batch", max=100)},) + + quant_config = get_symmetric_quantization_config( + is_per_channel=False, + is_dynamic=True, + ) + + for legacy_partitioner in (True, False): + for per_op_mode in (True, False): + # Every combination should fail to partition Linear or [add]mm. + DynamicallyQuantizedPartitioner = XnnpackPartitioner( + config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, + per_op_mode=per_op_mode, + ) + + tester = Tester(module, inputs, dynamic_shapes=dynamic_shapes) + tester.quantize(Quantize(quantization_config=quant_config)) + tester.export() + + if legacy_partitioner: + tester.to_edge() + tester.partition( + Partition(DynamicallyQuantizedPartitioner) + ).dump_artifact() + # should have [add]mm node + if uses_bias: + tester.check( + [ + "executorch_exir_dialects_edge__ops_aten_addmm_default", + ] + ) + else: + tester.check( + [ + "executorch_exir_dialects_edge__ops_aten_mm_default", + ] + ) + else: + tester.to_edge_transform_and_lower( + ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner]) + ).dump_artifact() + # should not have a delegate node + tester.check_not( + [ + "torch.ops.higher_order.executorch_call_delegate", + ] + ) + # No need to run the model, since it should fail to partition. + return + def _test_qd8_per_channel_4w_linear(self, dtype: torch.dtype = torch.float): qconfig = self._get_4b_dqconfig() input_channels = [2, 63] @@ -697,10 +757,24 @@ def test_qs8_linear(self): def test_qd8_f16_per_channel_linear(self): self._test_qd8_per_channel_linear(dtype=torch.half) + def test_qd8_f16_per_tensor_linear(self): + """ + XNNPACK doesn't support per_tensor quantized weights for dynamic quantized linear op. + This test is to verify that we can't lower per_tensor quantized weights to per_channel quantized weights. + """ + self._test_qd8_linear_per_tensor_unsupported(dtype=torch.half) + # Tests for q[dp]8-f32-qc8w def test_qd8_f32_per_channel_linear(self): self._test_qd8_per_channel_linear(dtype=torch.float) + def test_qd8_f32_per_tensor_linear(self): + """ + XNNPACK doesn't support per_tensor quantized weights for dynamic quantized linear op. + This test is to verify that we can't lower per_tensor quantized weights to per_channel quantized weights. + """ + self._test_qd8_linear_per_tensor_unsupported(dtype=torch.half) + # Tests for q[dp]8-f16-qc4w def test_linear_qd8_f16_per_channel_int4(self): self._test_qd8_per_channel_4w_linear(dtype=torch.half) @@ -874,7 +948,7 @@ def test_linear_qd8_as_fp32(self): }, ) - def test_linear_fp32_with_force_as_mm(self): + def test_linear_with_force_non_static_weights_for_f32_linear(self): def check_signature( signature: ExportGraphSignature, force_flag: bool, @@ -907,7 +981,7 @@ def check_signature( inputs = module.get_inputs() tester = Tester(module, inputs).export() partitioner = XnnpackPartitioner( - force_fp32_dynamic_linear=force_flag + force_non_static_weights_for_f32_linear=force_flag ) if legacy_mode: tester.to_edge() diff --git a/backends/xnnpack/test/ops/test_lstm.py b/backends/xnnpack/test/ops/test_lstm.py index be209082b37..6c174b16f33 100644 --- a/backends/xnnpack/test/ops/test_lstm.py +++ b/backends/xnnpack/test/ops/test_lstm.py @@ -43,18 +43,20 @@ def test_fp32_lstm(self): .run_method_and_compare_outputs() ) - def test_fp32_lstm_force_dynamic_linear(self): + def test_lstm_with_force_non_static_weights_for_f32_linear(self): ( Tester(self.LSTMLinear(32, 32, 10), (torch.rand(1, 32, 32),)) .export() .to_edge_transform_and_lower( ToEdgeTransformAndLower( - partitioners=[XnnpackPartitioner(force_fp32_dynamic_linear=True)] + partitioners=[ + XnnpackPartitioner(force_non_static_weights_for_f32_linear=True) + ] ) ) .check_not(["executorch_exir_dialects_edge__ops_aten_addmm_default"]) # Weights are supplied as input to linears - # Biases are not owned by delegates when force_fp32_dynamic_linear is set + # Biases are not owned by delegates when force_non_static_weights_for_f32_linear is set .check(["p_lstm_weight_hh_l0", "p_lstm_weight_ih_l0", "p_lstm_bias"]) .to_executorch() .serialize() diff --git a/backends/xnnpack/utils/quant_utils.py b/backends/xnnpack/utils/quant_utils.py index 7c035757a6f..49c5a963161 100644 --- a/backends/xnnpack/utils/quant_utils.py +++ b/backends/xnnpack/utils/quant_utils.py @@ -89,6 +89,15 @@ def is_per_channel(node: torch.fx.Node) -> bool: return is_per_channel or is_affine_per_channel_group +def is_per_tensor(node: torch.fx.Node) -> bool: + if not (is_quant(node) or is_dequant(node)): + return False + + is_per_tensor = "per_tensor" in node.target.__name__ # pyre-ignore + + return is_per_tensor and not (is_per_channel(node)) + + def is_affine_qdq(node: torch.fx.Node) -> bool: if not (is_quant(node) or is_dequant(node)): return False diff --git a/devtools/etdump/etdump_flatcc.cpp b/devtools/etdump/etdump_flatcc.cpp index a5242c8ed4b..8c20bb4ad89 100644 --- a/devtools/etdump/etdump_flatcc.cpp +++ b/devtools/etdump/etdump_flatcc.cpp @@ -503,7 +503,7 @@ void ETDumpGen::set_debug_buffer(Span buffer) { Result bds_ret = BufferDataSink::create(buffer); ET_CHECK_MSG( bds_ret.ok(), - "Failed to write tensor with error 0x%" PRIx32, + "Failed to create data sink from debug buffer with error 0x%" PRIx32, static_cast(bds_ret.error())); buffer_data_sink_ = std::move(bds_ret.get()); diff --git a/docs/source/using-executorch-building-from-source.md b/docs/source/using-executorch-building-from-source.md index eae7fbabf57..8196c7d39df 100644 --- a/docs/source/using-executorch-building-from-source.md +++ b/docs/source/using-executorch-building-from-source.md @@ -80,6 +80,14 @@ portability details. ./install_executorch.sh --pybind off ``` + For development, install the package in `--editable` mode, which allows to modify Python source code and see changes reflected immediately. + ``` + ./install_executorch.sh --editable [--pybind xnnpack] + + # Or you can directly do the following if dependencies are already installed. + pip install -e . + ``` + > **_NOTE:_** Cleaning the build system > > When fetching a new version of the upstream repo (via `git fetch` or `git diff --git a/examples/models/checkpoint.py b/examples/models/checkpoint.py index ee3fb560429..c84a689b951 100644 --- a/examples/models/checkpoint.py +++ b/examples/models/checkpoint.py @@ -64,7 +64,7 @@ def get_checkpoint_dtype(checkpoint: Dict[str, Any]) -> Optional[str]: mismatched_dtypes = [ (key, value.dtype) for key, value in checkpoint.items() - if value.dtype != dtype + if hasattr(value, "dtype") and value.dtype != dtype ] if len(mismatched_dtypes) > 0: print( diff --git a/examples/models/llama/runner/generation.py b/examples/models/llama/runner/generation.py index 3e9ceb34af5..4ba645ffd87 100644 --- a/examples/models/llama/runner/generation.py +++ b/examples/models/llama/runner/generation.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import time from abc import ABC, abstractmethod from typing import List, Optional @@ -97,6 +98,7 @@ def generate( # noqa: C901 pos_base: int = 0, ) -> List[int]: # Prefill + prefill_start = time.time() logits = self.forward( tokens=torch.tensor([prompt_tokens], dtype=torch.long, device=self.device), input_pos=( @@ -105,11 +107,13 @@ def generate( # noqa: C901 else None ), ) + prefill_time = time.time() - prefill_start current_token = next_token(logits, temperature, top_p) print(f"{self.tokenizer.decode_token(current_token)}", end="", flush=True) tokens = prompt_tokens + [current_token] + generate_start = time.time() while len(tokens) < max_seq_len: if self.use_kv_cache: logits = self.forward( @@ -140,6 +144,10 @@ def generate( # noqa: C901 print(f"{self.tokenizer.decode_token(current_token)}", end="", flush=True) print("\n") + generate_time = time.time() - generate_start + print(f"Prefill time: {prefill_time}") + print(f"Generation tok/s: {len(tokens) / generate_time}") + return tokens if echo else tokens[len(prompt_tokens) :] def text_completion( diff --git a/examples/qualcomm/oss_scripts/llama/model/static_llama.py b/examples/qualcomm/oss_scripts/llama/model/static_llama.py index 40044db7428..ea8e2f5d319 100755 --- a/examples/qualcomm/oss_scripts/llama/model/static_llama.py +++ b/examples/qualcomm/oss_scripts/llama/model/static_llama.py @@ -461,7 +461,7 @@ def get_metadata(self): "get_bos_id": 1, "get_eos_id": 2, "get_dim": self.dim, - "get_head_dim": self.dim // self.n_heads, + "get_head_dim": self.head_dim, "get_max_batch_size": self.max_batch_size, "get_max_seq_len": self.max_seq_len, "get_n_bos": 1, diff --git a/exir/_serialize/_named_data_store.py b/exir/_serialize/_named_data_store.py index 999913a4bb0..2c2d975937e 100644 --- a/exir/_serialize/_named_data_store.py +++ b/exir/_serialize/_named_data_store.py @@ -181,3 +181,30 @@ def get_named_data_store_output(self) -> NamedDataStoreOutput: # Clean up empty maps inside self.external_data self.external_data = {k: v for k, v in self.external_data.items() if len(v) > 0} return NamedDataStoreOutput(self.buffers, self.pte_data, self.external_data) + + def merge_named_data_store(self, other: NamedDataStoreOutput) -> None: + """ + Merge another NamedDataStore into this one. + Args: + other (NamedDataStore): the other NamedDataStore to merge. + Raises: + ValueError: when the key exists in both stores, and corresponding + data is different between them. + """ + # Merge the pte_data. + for key, buffer_idx in other.pte_data.items(): + self.add_named_data( + key, + other.buffers[buffer_idx].buffer, + other.buffers[buffer_idx].alignment, + ) + + # Merge the external_data. + for filename, key_to_buffer_idx in other.external_data.items(): + for key, buffer_idx in key_to_buffer_idx.items(): + self.add_named_data( + key, + other.buffers[buffer_idx].buffer, + other.buffers[buffer_idx].alignment, + external_tag=filename, + ) diff --git a/exir/_serialize/test/test_named_data_store.py b/exir/_serialize/test/test_named_data_store.py index d5355f6d7bf..ffe6f2ddce7 100644 --- a/exir/_serialize/test/test_named_data_store.py +++ b/exir/_serialize/test/test_named_data_store.py @@ -83,3 +83,62 @@ def test_add_duplicate_key_fail(self) -> None: self.assertEqual(len(output.pte_data), 1) self.assertEqual(output.pte_data["key"], 0) self.assertEqual(len(output.external_data), 0) + + def test_merge(self) -> None: + store1 = NamedDataStore() + store1.add_named_data("key1", b"data1", None, None) + store1.add_named_data("key2", b"data2", 16, "file1") + + # Check items in the store1. + output = store1.get_named_data_store_output() + self.assertEqual(len(output.buffers), 2) + self.assertEqual(len(output.pte_data), 1) + self.assertEqual(len(output.external_data), 1) + self.assertEqual(len(output.external_data["file1"]), 1) + + store2 = NamedDataStore() + store2.add_named_data("key1", b"data1", None, None) + store2.add_named_data("key3", b"data3", None, None) + store2.add_named_data("key4", b"data4", 16, "file1") + store2.add_named_data("key5", b"data5", 16, "file2") + + # Check items in store2. + output2 = store2.get_named_data_store_output() + self.assertEqual(len(output2.buffers), 4) + self.assertEqual(len(output2.pte_data), 2) + self.assertEqual(len(output2.external_data), 2) + self.assertEqual(len(output2.external_data["file1"]), 1) + self.assertEqual(len(output2.external_data["file2"]), 1) + + # Merge store2 into store1. + store1.merge_named_data_store(output2) + + # Check items in store2 are merged into store1. + output = store1.get_named_data_store_output() + # key1, data1 exist in both store1 and store2, so we only have one copy of it. + self.assertEqual(len(output.buffers), 5) + self.assertEqual(len(output.pte_data), 2) + self.assertEqual(len(output.external_data), 2) + self.assertEqual(len(output.external_data["file1"]), 2) + self.assertEqual(len(output.external_data["file2"]), 1) + + def test_merge_duplicate_error(self) -> None: + store1 = NamedDataStore() + store1.add_named_data("key1", b"data1", None, None) + + # Check items in the store1. + output = store1.get_named_data_store_output() + self.assertEqual(len(output.buffers), 1) + self.assertEqual(len(output.pte_data), 1) + + store2 = NamedDataStore() + store2.add_named_data("key1", b"data2", None, None) + + # Check items in store2. + output2 = store2.get_named_data_store_output() + self.assertEqual(len(output2.buffers), 1) + self.assertEqual(len(output2.pte_data), 1) + + # Merge store2 into store1 raises error as key1 is already in store1 + # with different data. + self.assertRaises(ValueError, store1.merge_named_data_store, output2) diff --git a/exir/backend/backend_api.py b/exir/backend/backend_api.py index 966cae5f022..519f184871a 100644 --- a/exir/backend/backend_api.py +++ b/exir/backend/backend_api.py @@ -1,5 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. +# Copyright 2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -56,9 +57,9 @@ def to_backend( ) -> LoweredBackendModule: def to_backend( - graph_module: torch.fx.GraphModule, - partitioner: Type[TPartitioner], - ) -> torch.fx.GraphModule + edge_program: ExportedProgram, + partitioner: Partitioner, + ) -> ExportedProgram: """ pass diff --git a/exir/program/_program.py b/exir/program/_program.py index 5a9c101a06a..8295907d090 100644 --- a/exir/program/_program.py +++ b/exir/program/_program.py @@ -978,6 +978,18 @@ def _remove_invalid_ops_for_not_decompose( ) -> List[torch._ops.OpOverload]: # To address https://github.com/pytorch/executorch/issues/8781 def keep(op): + # Explicit allow list + allow_list = [] + try: + # Ops in torch.ops.quant are not always loaded, so we use try/except + # Aliases output, but we need to allow it for XNNPACK + allow_list.append(torch.ops.quant.choose_qparams_affine.default) + except: + pass + + if op in allow_list: + return True + schema = op._schema native_schema = _pybind_schema_to_native_schema(schema) if native_schema.is_mutable: diff --git a/exir/verification/TARGETS b/exir/verification/TARGETS index 8ee9e5546e3..092b48658df 100644 --- a/exir/verification/TARGETS +++ b/exir/verification/TARGETS @@ -10,13 +10,11 @@ cpp_python_extension( "bindings.cpp", ], deps = [ + "fbsource//third-party/pybind11:pybind11", "//caffe2:torch-cpp-cpu", "//caffe2:torch_extension", "//caffe2/c10:c10", ], - external_deps = [ - "pybind11", - ], ) python_library( diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index 47ad30e9390..eb8dd462378 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -448,6 +448,8 @@ def to_edge_transform_and_lower( compile_config=edge_config, constant_methods=self.metadata, ) + if self.verbose: + logging.info(f"Exported graph:\n{self.edge_manager.exported_program()}") return self def to_executorch( diff --git a/extension/pybindings/TARGETS b/extension/pybindings/TARGETS index 17ccbb2477c..2e77127bf56 100644 --- a/extension/pybindings/TARGETS +++ b/extension/pybindings/TARGETS @@ -70,5 +70,8 @@ runtime.python_library( "//executorch/runtime/...", "@EXECUTORCH_CLIENTS", ], - deps = [":_portable_lib"], + deps = [ + ":_portable_lib", + "//executorch/exir:_warnings", + ], ) diff --git a/extension/pytree/TARGETS b/extension/pytree/TARGETS index 400a5b9504c..005c5c9c2d7 100644 --- a/extension/pytree/TARGETS +++ b/extension/pytree/TARGETS @@ -16,11 +16,9 @@ cpp_python_extension( ], base_module = "executorch.extension.pytree", deps = [ + "fbsource//third-party/pybind11:pybind11", ":pytree", ], - external_deps = [ - "pybind11", - ], ) cpp_python_extension( @@ -30,11 +28,9 @@ cpp_python_extension( ], base_module = "executorch.extension.pytree", deps = [ + "fbsource//third-party/pybind11:pybind11", ":pytree", ], - external_deps = [ - "pybind11", - ], ) python_library( diff --git a/extension/threadpool/cpuinfo_utils.cpp b/extension/threadpool/cpuinfo_utils.cpp index 5dc3fa7fae5..21862fbd4aa 100644 --- a/extension/threadpool/cpuinfo_utils.cpp +++ b/extension/threadpool/cpuinfo_utils.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -84,7 +85,7 @@ bool populate_available_cpu_mids() { cpu_midrs->resize(num_possible_cores); const std::string kMidrFilePathPrefix = "/sys/devices/system/cpu/cpu"; const std::string kMidrFilePathSuffix = "/regs/identification/midr_el1"; - for (int32_t i = 0; i < num_possible_cores; ++i) { + for (const auto i : c10::irange(num_possible_cores)) { std::string midr_file_path = kMidrFilePathPrefix + std::to_string(i) + kMidrFilePathSuffix; ET_LOG(Info, "Reading file %s", midr_file_path.c_str()); @@ -115,7 +116,7 @@ uint32_t _get_num_performant_cores() { ET_LOG(Info, "CPU info and manual query on # of cpus dont match."); return 0; } - for (int32_t i = 0; i < cpu_midrs->size(); ++i) { + for (const auto i : c10::irange(cpu_midrs->size())) { uint32_t masked_midr = (*cpu_midrs)[i] & RIVISION_MASK; switch (masked_midr) { case CPUINFO_ARM_MIDR_CORTEX_A520: @@ -148,7 +149,7 @@ uint32_t get_num_performant_cores() { uint32_t num_possible_cores = cpuinfo_get_processors_count(); uint32_t num_non_performant_core = 0; if (uarch_count > 1) { - for (int32_t i = 0; i < uarch_count; ++i) { + for (const auto i : c10::irange(uarch_count)) { const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i); if (is_non_performant_core(uarch_info)) { num_non_performant_core += uarch_info->processor_count; diff --git a/extension/threadpool/targets.bzl b/extension/threadpool/targets.bzl index 4a7185ce972..8bb0398b385 100644 --- a/extension/threadpool/targets.bzl +++ b/extension/threadpool/targets.bzl @@ -23,6 +23,7 @@ def define_common_targets(): srcs = _THREADPOOL_SRCS, deps = [ "//executorch/runtime/core:core", + "//executorch/runtime/core/portable_type/c10/c10:c10", ], exported_headers = _THREADPOOL_HEADERS, exported_deps = [ diff --git a/extension/training/CMakeLists.txt b/extension/training/CMakeLists.txt index e50bb3c71eb..97e75955837 100644 --- a/extension/training/CMakeLists.txt +++ b/extension/training/CMakeLists.txt @@ -26,7 +26,7 @@ target_include_directories( target_include_directories(extension_training PUBLIC ${EXECUTORCH_ROOT}/..) target_compile_options(extension_training PUBLIC ${_common_compile_options}) target_link_libraries(extension_training executorch_core - extension_data_loader extension_module extension_tensor) + extension_data_loader extension_module extension_tensor extension_flat_tensor) list(TRANSFORM _train_xor__srcs PREPEND "${EXECUTORCH_ROOT}/") diff --git a/extension/training/examples/XOR/export_model.py b/extension/training/examples/XOR/export_model.py index bfbe0ce2138..98e04f09a2f 100644 --- a/extension/training/examples/XOR/export_model.py +++ b/extension/training/examples/XOR/export_model.py @@ -11,14 +11,14 @@ import os import torch -from executorch.exir import to_edge +from executorch.exir import ExecutorchBackendConfig, to_edge from executorch.extension.training.examples.XOR.model import Net, TrainingNet from torch.export import export from torch.export.experimental import _export_forward_backward -def _export_model(): +def _export_model(external_mutable_weights: bool = False): net = TrainingNet(Net()) x = torch.randn(1, 2) @@ -30,7 +30,11 @@ def _export_model(): # Lower the graph to edge dialect. ep = to_edge(ep) # Lower the graph to executorch. - ep = ep.to_executorch() + ep = ep.to_executorch( + config=ExecutorchBackendConfig( + external_mutable_weights=external_mutable_weights + ) + ) return ep @@ -44,19 +48,27 @@ def main() -> None: "--outdir", type=str, required=True, - help="Path to the directory to write xor.pte files to", + help="Path to the directory to write xor.pte and xor.ptd files to", + ) + parser.add_argument( + "--external", + action="store_true", + help="Export the model with external weights", ) args = parser.parse_args() - ep = _export_model() + ep = _export_model(args.external) # Write out the .pte file. os.makedirs(args.outdir, exist_ok=True) outfile = os.path.join(args.outdir, "xor.pte") with open(outfile, "wb") as fp: - fp.write( - ep.buffer, - ) + ep.write_to_file(fp) + + if args.external: + # current infra doesnt easily allow renaming this file, so just hackily do it here. + ep._tensor_data["xor"] = ep._tensor_data.pop("_default_external_constant") + ep.write_tensor_data_to_file(args.outdir) if __name__ == "__main__": diff --git a/extension/training/examples/XOR/train.cpp b/extension/training/examples/XOR/train.cpp index 746daebbf1b..af1c37a6a50 100644 --- a/extension/training/examples/XOR/train.cpp +++ b/extension/training/examples/XOR/train.cpp @@ -23,12 +23,18 @@ using executorch::extension::training::optimizer::SGDOptions; using executorch::runtime::Error; using executorch::runtime::Result; DEFINE_string(model_path, "xor.pte", "Model serialized in flatbuffer format."); +DEFINE_string(ptd_path, "", "Model weights serialized in flatbuffer format."); int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); - if (argc != 1) { + if (argc == 0) { + ET_LOG(Error, "Please provide a model path."); + return 1; + } else if (argc > 2) { std::string msg = "Extra commandline args: "; - for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) { + for (int i = 2 /* skip argv[0] (pte path) and argv[1] (ptd path) */; + i < argc; + i++) { msg += argv[i]; } ET_LOG(Error, "%s", msg.c_str()); @@ -46,7 +52,21 @@ int main(int argc, char** argv) { auto loader = std::make_unique( std::move(loader_res.get())); - auto mod = executorch::extension::training::TrainingModule(std::move(loader)); + std::unique_ptr ptd_loader = nullptr; + if (!FLAGS_ptd_path.empty()) { + executorch::runtime::Result + ptd_loader_res = + executorch::extension::FileDataLoader::from(FLAGS_ptd_path.c_str()); + if (ptd_loader_res.error() != Error::Ok) { + ET_LOG(Error, "Failed to open ptd file: %s", FLAGS_ptd_path.c_str()); + return 1; + } + ptd_loader = std::make_unique( + std::move(ptd_loader_res.get())); + } + + auto mod = executorch::extension::training::TrainingModule( + std::move(loader), nullptr, nullptr, nullptr, std::move(ptd_loader)); // Create full data set of input and labels. std::vector(param_res.error())); return 1; } @@ -112,5 +135,6 @@ int main(int argc, char** argv) { std::string(param.first.data()), param.second}); } - executorch::extension::flat_tensor::save_ptd("xor.ptd", param_map, 16); + executorch::extension::flat_tensor::save_ptd( + "trained_xor.ptd", param_map, 16); } diff --git a/extension/training/pybindings/TARGETS b/extension/training/pybindings/TARGETS index 6aa11ea6726..19b54961493 100644 --- a/extension/training/pybindings/TARGETS +++ b/extension/training/pybindings/TARGETS @@ -17,13 +17,11 @@ runtime.cxx_python_extension( types = ["_training_lib.pyi"], visibility = ["//executorch/extension/training/..."], deps = [ + "fbsource//third-party/pybind11:pybind11", "//executorch/extension/aten_util:aten_bridge", "//executorch/extension/training/optimizer:sgd", ], - external_deps = [ - "pybind11", - "libtorch_python", - ], + external_deps = ["libtorch_python"], ) runtime.python_library( diff --git a/kernels/portable/cpu/op__to_dim_order_copy.cpp b/kernels/portable/cpu/op__to_dim_order_copy.cpp index efb74e3a01f..40ce86e8fdc 100644 --- a/kernels/portable/cpu/op__to_dim_order_copy.cpp +++ b/kernels/portable/cpu/op__to_dim_order_copy.cpp @@ -6,6 +6,8 @@ * LICENSE file in the root directory of this source tree. */ +#include + #include #include #include @@ -41,7 +43,7 @@ int64_t coordinateToIndexWithDimOrder( dim_order_to_stride_nocheck( sizes.data(), dim_order.data(), sizes.size(), strides); - for (size_t i = 0; i < self.dim(); ++i) { + for (const auto i : c10::irange(self.dim())) { index += cur_indices[i] * strides[i]; } return index; @@ -59,7 +61,7 @@ void _to_dim_order_copy_impl(const Tensor& self, Tensor& out) { for (ssize_t i = 0; i < self.numel(); i++) { // Update the current indices. for (ssize_t j = self.dim() - 1; j >= 0; j--) { - if (coordinate[j] + 1 < self.size(j)) { + if (coordinate[j] + 1 < static_cast(self.size(j))) { coordinate[j]++; break; } else { diff --git a/kernels/portable/cpu/op_amax.cpp b/kernels/portable/cpu/op_amax.cpp index 9f879179ec6..d36f416c7b4 100644 --- a/kernels/portable/cpu/op_amax.cpp +++ b/kernels/portable/cpu/op_amax.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -44,7 +45,7 @@ Tensor& amax_out( ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, "amax.out", CTYPE, [&]() { CTYPE* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { out_data[out_ix] = reduce_over_dim_list( [](CTYPE v, CTYPE max_v) { return std::isnan(v) || v > max_v ? v : max_v; diff --git a/kernels/portable/cpu/op_amin.cpp b/kernels/portable/cpu/op_amin.cpp index 4f6f3ce52e5..7c4c8186e59 100644 --- a/kernels/portable/cpu/op_amin.cpp +++ b/kernels/portable/cpu/op_amin.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ - +#include #include #include @@ -44,7 +44,7 @@ Tensor& amin_out( ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, "amin.out", CTYPE, [&]() { CTYPE* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { out_data[out_ix] = reduce_over_dim_list( [](CTYPE v, CTYPE min_v) { return std::isnan(v) || v < min_v ? v : min_v; diff --git a/kernels/portable/cpu/op_argmax.cpp b/kernels/portable/cpu/op_argmax.cpp index 5eb656d5b76..39ad0171d5d 100644 --- a/kernels/portable/cpu/op_argmax.cpp +++ b/kernels/portable/cpu/op_argmax.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -46,7 +47,7 @@ Tensor& argmax_out( ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "argmax.out", CTYPE, [&] { long* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { if (!std::isnan(acc_val) && (std::isnan(v) || v > acc_val)) { diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp index 1c4a2572ea8..8148efa6264 100644 --- a/kernels/portable/cpu/op_argmin.cpp +++ b/kernels/portable/cpu/op_argmin.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -46,7 +47,7 @@ Tensor& argmin_out( ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "argmin.out", CTYPE, [&] { long* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { if (!std::isnan(acc_val) && (std::isnan(v) || v < acc_val)) { diff --git a/kernels/portable/cpu/op_expand_copy.cpp b/kernels/portable/cpu/op_expand_copy.cpp index f1a7bfbf1fb..6c8685dd867 100644 --- a/kernels/portable/cpu/op_expand_copy.cpp +++ b/kernels/portable/cpu/op_expand_copy.cpp @@ -96,7 +96,8 @@ Tensor& expand_copy_out( ET_KERNEL_CHECK( ctx, - repeat_tensor(self, {repeats, repeats_size}, out) == Error::Ok, + repeat_tensor(self, makeArrayRef(repeats, repeats_size), out) == + Error::Ok, InvalidArgument, out); diff --git a/kernels/portable/cpu/util/activation_ops_util.cpp b/kernels/portable/cpu/util/activation_ops_util.cpp index fe26d4fda04..abde15f8740 100644 --- a/kernels/portable/cpu/util/activation_ops_util.cpp +++ b/kernels/portable/cpu/util/activation_ops_util.cpp @@ -31,7 +31,7 @@ bool check_glu_args(const Tensor& in, int64_t dim, Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensor_is_floating_type(in)); const size_t non_negative_dim = dim < 0 ? dim + in.dim() : dim; - const size_t dim_size = in.size(non_negative_dim); + const ssize_t dim_size = in.size(non_negative_dim); ET_CHECK_OR_RETURN_FALSE( dim_size % 2 == 0, diff --git a/kernels/portable/cpu/util/broadcast_util.cpp b/kernels/portable/cpu/util/broadcast_util.cpp index d8569d23c2f..381e07cbe30 100644 --- a/kernels/portable/cpu/util/broadcast_util.cpp +++ b/kernels/portable/cpu/util/broadcast_util.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -274,7 +275,7 @@ void delinearize_index( size_t* out_indexes, const size_t out_indexes_len) { ET_CHECK(shape.size() <= out_indexes_len); - for (auto i = 0; i < shape.size(); ++i) { + for (size_t i = 0; i < shape.size(); ++i) { auto dim = shape.size() - 1 - i; auto dim_size = shape[dim]; out_indexes[dim] = linear_index % dim_size; @@ -304,7 +305,8 @@ size_t linearize_access_indexes( size_t linear_index = 0; for (size_t i = 0; i < indexes_broadcast_from.size(); ++i) { // If this dimension is broadcasted, add zero to the linear address. - if (indexes_broadcast_from[i] >= broadcast_from_shape[i]) { + if (indexes_broadcast_from[i] >= + static_cast(broadcast_from_shape[i])) { ET_CHECK_MSG( broadcast_from_shape[i] == 1, "Expected dim size == 1 if broadcasted, but actual dim size is %zu", diff --git a/kernels/portable/cpu/util/copy_ops_util.h b/kernels/portable/cpu/util/copy_ops_util.h index 8efd6057dba..e7399ae0956 100644 --- a/kernels/portable/cpu/util/copy_ops_util.h +++ b/kernels/portable/cpu/util/copy_ops_util.h @@ -7,6 +7,7 @@ */ #pragma once +#include #include @@ -26,8 +27,8 @@ void _as_strided_copy( ArrayRef stride, int64_t dim) { // the last dimension, copy data - if (dim == size.size() - 1) { - for (size_t i = 0; i < size.at(dim); ++i) { + if (dim == static_cast(size.size()) - 1) { + for (const auto i : c10::irange(size.at(dim))) { output_data[i] = *input_data; input_data += stride.at(dim); } @@ -35,7 +36,7 @@ void _as_strided_copy( } size_t trailing_dims = getTrailingDims(out, dim); // recursively set data for the next dimension - for (size_t i = 0; i < size.at(dim); ++i) { + for ([[maybe_unused]] const auto i : c10::irange(size.at(dim))) { _as_strided_copy( input_data, output_data, out, size, stride, dim + 1); input_data += stride.at(dim); diff --git a/kernels/portable/cpu/util/functional_util.h b/kernels/portable/cpu/util/functional_util.h index cdf90813772..609a1a26fa5 100644 --- a/kernels/portable/cpu/util/functional_util.h +++ b/kernels/portable/cpu/util/functional_util.h @@ -8,6 +8,8 @@ #pragma once +#include + #include #include @@ -30,7 +32,7 @@ inline CTYPE apply_unary_reduce_fn( const int64_t size, const int64_t stride = 1) { CTYPE acc_val = data_in[0]; - for (size_t i = 1; i < size; i++) { + for (const auto i : c10::irange(1, size)) { acc_val = reduce_fun(data_in[i * stride], acc_val); } return acc_val; @@ -51,7 +53,7 @@ inline void apply_unary_map_fn( CTYPE_OUT* const data_out, const int64_t size, const int64_t stride = 1) { - for (size_t i = 0; i < size; i++) { + for (const auto i : c10::irange(size)) { data_out[i * stride] = map_fun(data_in[i * stride]); } } @@ -77,7 +79,7 @@ inline CTYPE_OUT apply_unary_map_reduce_fn( const int64_t size, const int64_t stride = 1) { CTYPE_OUT acc_val = map_fun(data_in[0]); - for (size_t i = 1; i < size; ++i) { + for (const auto i : c10::irange(1, size)) { acc_val = reduce_fun(map_fun(data_in[i * stride]), acc_val); } return acc_val; diff --git a/kernels/portable/cpu/util/reduce_util.cpp b/kernels/portable/cpu/util/reduce_util.cpp index 2902cbfc138..09ba508a31d 100644 --- a/kernels/portable/cpu/util/reduce_util.cpp +++ b/kernels/portable/cpu/util/reduce_util.cpp @@ -48,8 +48,7 @@ ET_NODISCARD bool check_dim_list_is_valid( } const size_t non_neg_d = _normalize_non_neg_d(d, in.dim()); - ET_LOG_AND_RETURN_IF_FALSE( - non_neg_d < kTensorDimensionLimit && non_neg_d >= 0); + ET_LOG_AND_RETURN_IF_FALSE(non_neg_d < kTensorDimensionLimit); ET_CHECK_OR_RETURN_FALSE( dim_exist[non_neg_d] == false, @@ -86,7 +85,7 @@ size_t get_reduced_dim_product( } size_t dim_product = 1; if (!dim.has_value()) { - for (size_t i = 0; i < in.dim(); ++i) { + for (size_t i = 0; i < static_cast(in.dim()); ++i) { dim_product *= in.size(i); } return dim_product; @@ -108,7 +107,7 @@ size_t get_reduced_dim_product( size_t dim_product = 1; const size_t in_dim = in.dim(); if (!dim_list.has_value() || dim_list.value().size() == 0) { - for (size_t i = 0; i < in.dim(); ++i) { + for (size_t i = 0; i < static_cast(in.dim()); ++i) { dim_product *= in.size(i); } return dim_product; @@ -136,7 +135,7 @@ size_t get_out_numel( ET_CHECK_VALID_DIM(dim_val, in.dim()); } const size_t non_neg_dim = _normalize_non_neg_d(dim_val, in.dim()); - for (size_t d = 0; d < in.dim(); ++d) { + for (size_t d = 0; d < static_cast(in.dim()); ++d) { if (d != non_neg_dim) { out_numel *= in.size(d); } @@ -155,7 +154,7 @@ size_t get_out_numel( dim_list) { size_t out_numel = 1; if (dim_list.has_value() && dim_list.value().size() != 0) { - for (size_t d = 0; d < in.dim(); ++d) { + for (size_t d = 0; d < static_cast(in.dim()); ++d) { if (!check_dim_in_dim_list(d, in.dim(), dim_list.value())) { out_numel *= in.size(d); } @@ -234,7 +233,7 @@ size_t compute_reduced_out_size( if (dim.has_value()) { const auto dim_val = dim.value(); const size_t non_neg_dim = _normalize_non_neg_d(dim_val, in_dim); - for (ssize_t i = 0; i < non_neg_dim; ++i) { + for (size_t i = 0; i < non_neg_dim; ++i) { sizes_arr[i] = in.size(i); } if (keepdim) { @@ -250,7 +249,7 @@ size_t compute_reduced_out_size( } } else { if (keepdim) { - for (size_t i = 0; i < in_dim; ++i) { + for (size_t i = 0; i < static_cast(in_dim); ++i) { sizes_arr[i] = 1; } } else { @@ -266,7 +265,9 @@ size_t compute_reduced_out_size( dim_list, bool keepdim, executorch::aten::SizesType* sizes_arr) { - const auto in_dim = in.dim(); + // check_dim_in_dim_list and later comparisons + // expect in_dim to be size_t, so cast it here + const size_t in_dim = static_cast(in.dim()); size_t out_dim = in_dim; if (dim_list.has_value() && dim_list.value().size() != 0) { diff --git a/kernels/portable/cpu/util/reduce_util.h b/kernels/portable/cpu/util/reduce_util.h index 25a2c0b44c4..35cfdfbaa72 100644 --- a/kernels/portable/cpu/util/reduce_util.h +++ b/kernels/portable/cpu/util/reduce_util.h @@ -50,7 +50,7 @@ void apply_on_flat_ix_with_dim_mask_and_base( const size_t start, const size_t end) { // Compute innermost dim from dim list - size_t inner_dim = in.dim() - 1; + int64_t inner_dim = in.dim() - 1; while (!dim_mask[inner_dim]) { inner_dim--; } @@ -58,7 +58,7 @@ void apply_on_flat_ix_with_dim_mask_and_base( // Initialize array of indices per dimension. This array is used to maintain // the per-dimension index of the element in `in` that is being reduced over // Only the dims that are in the dim list are relevant. - size_t dim_index[kTensorDimensionLimit]; + int64_t dim_index[kTensorDimensionLimit]; for (int64_t d = 0; d < in.dim(); d++) { dim_index[d] = 0; } diff --git a/kernels/portable/cpu/util/repeat_util.cpp b/kernels/portable/cpu/util/repeat_util.cpp index 925fda9f793..be7231cb621 100644 --- a/kernels/portable/cpu/util/repeat_util.cpp +++ b/kernels/portable/cpu/util/repeat_util.cpp @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -26,7 +27,7 @@ bool check_repeat_args( Tensor& out) { // Ensure the self tensors list is non-empty. ET_CHECK_OR_RETURN_FALSE( - repeats.size() >= self.dim(), + static_cast(repeats.size()) >= self.dim(), "Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor"); // Repeat arrayref shall not contain negative element. @@ -39,7 +40,7 @@ bool check_repeat_args( /// Check if out.size() is legal. ET_CHECK_OR_RETURN_FALSE( - out.dim() == repeats.size(), + static_cast(out.dim()) == repeats.size(), "The dimension of out shall equal size of repeats, but now is %zd and %zd", out.dim(), repeats.size()); @@ -48,7 +49,7 @@ bool check_repeat_args( // kTensorDimensionLimit. Only check out tensor because the number of // dimension of out tensor shall have more than or equal to self tensor ET_CHECK_OR_RETURN_FALSE( - out.dim() <= kTensorDimensionLimit, + static_cast(out.dim()) <= kTensorDimensionLimit, "The dimension of input and output should not be larger than %zd", kTensorDimensionLimit); @@ -58,7 +59,7 @@ bool check_repeat_args( // repeats, and called it reformat_self_size. We then make point-to-point mul // of reformat_self_size and repeats. The result should equal out.size(). size_t reformat_self_size[kTensorDimensionLimit]; - for (size_t i = 0; i < out.dim() - self.dim(); i++) { + for (ssize_t i = 0; i < out.dim() - self.dim(); i++) { reformat_self_size[i] = 1; } @@ -131,7 +132,7 @@ void repeat_internal( // The increment along index of slot array to reach the next possible valid // value. int64_t incr[kTensorDimensionLimit]; - for (size_t i = 0; i < self_dim; i++) { + for (size_t i = 0; i < static_cast(self_dim); i++) { incr[i] = self_size[i]; } @@ -141,7 +142,7 @@ void repeat_internal( // than self). size_t index = self_dim - 1; size_t start = out.dim() - self_dim; - while (slots[0] != out.size(start)) { + while (slots[0] != static_cast(out.size(start))) { // Compute the offset (from origin) in the out tensor where this self // data will be copied to. size_t offset = compute_access_offset(slots, strides, self_dim); @@ -151,7 +152,7 @@ void repeat_internal( slots[index] += incr[index]; // If we have reached the limit in the innermost dimension, successively // increment the slot index of outer dimensions. - while (slots[index] == out.size(start + index)) { + while (slots[index] == static_cast(out.size(start + index))) { if (index == 0) { break; } @@ -227,7 +228,7 @@ Error repeat_tensor( // so we reset the upper bound of innermost dim to 1. 'in_incr' indicates // the size (in bytes) of the self data. int64_t limits[kTensorDimensionLimit]; - for (size_t i = 0; i < self_dim; i++) { + for (ssize_t i = 0; i < self_dim; i++) { limits[i] = self_size[i]; } diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index eef765d5eec..2b22687274f 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -61,7 +61,6 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", ], - compiler_flags = ["-Wno-missing-prototypes"], visibility = ["//executorch/kernels/portable/cpu/..."], ) @@ -71,7 +70,6 @@ def define_common_targets(): exported_headers = [ "broadcast_util.h", ], - compiler_flags = ["-Wno-missing-prototypes"], deps = [ ":repeat_util", "//executorch/runtime/kernel:kernel_includes", diff --git a/kernels/prim_ops/et_view.cpp b/kernels/prim_ops/et_view.cpp index 0f041dae00f..7f66bca1725 100644 --- a/kernels/prim_ops/et_view.cpp +++ b/kernels/prim_ops/et_view.cpp @@ -32,7 +32,8 @@ bool get_view_target_size( executorch::aten::ArrayRef size, int64_t dim, executorch::aten::SizesType* out_size) { - ET_LOG_AND_RETURN_IF_FALSE(size.size() == dim); + ET_LOG_AND_RETURN_IF_FALSE( + dim >= 0 && size.size() == static_cast(dim)); int minus1_dim = -1; int n_zero = 0; int64_t numel_without_minus_1 = 1; diff --git a/runtime/core/data_loader.h b/runtime/core/data_loader.h index 45fd1bc8189..3dda5516908 100644 --- a/runtime/core/data_loader.h +++ b/runtime/core/data_loader.h @@ -69,12 +69,12 @@ class DataLoader { SegmentInfo() = default; explicit SegmentInfo( - Type segment_type, - size_t segment_index = 0, - const char* descriptor = nullptr) - : segment_type(segment_type), - segment_index(segment_index), - descriptor(descriptor) {} + Type segment_type_, + size_t segment_index_ = 0, + const char* descriptor_ = nullptr) + : segment_type(segment_type_), + segment_index(segment_index_), + descriptor(descriptor_) {} }; virtual ~DataLoader() = default; diff --git a/runtime/core/exec_aten/util/dim_order_util.h b/runtime/core/exec_aten/util/dim_order_util.h index 7a31db9d6ad..07b3d5c2a97 100644 --- a/runtime/core/exec_aten/util/dim_order_util.h +++ b/runtime/core/exec_aten/util/dim_order_util.h @@ -23,8 +23,8 @@ namespace runtime { namespace { template bool validate_dim_order(const DimOrderType* dim_order, const size_t dims) { - for (int32_t i = 0; i < dims; ++i) { - if (dim_order[i] >= dims) { + for (size_t i = 0; i < dims; ++i) { + if (dim_order[i] >= static_cast(dims)) { return false; } } @@ -43,8 +43,8 @@ template inline bool is_contiguous_dim_order( const DimOrderType* dim_order, const size_t dims) { - for (int i = 0; i < dims; ++i) { - if (dim_order[i] != i) { + for (size_t i = 0; i < dims; ++i) { + if (dim_order[i] != static_cast(i)) { return false; } } @@ -66,7 +66,7 @@ bool is_channels_last_dim_order( return false; } // 4-dim tensor is interpreted as NCHW, 5-dim tensor is interpreted as NCHWD - size_t channels_dim = 1; + DimOrderType channels_dim = 1; // Last value in the dim order should be the channels dim if (dim_order[dims - 1] != channels_dim) { return false; @@ -75,8 +75,8 @@ bool is_channels_last_dim_order( if (dim_order[0] != 0) { return false; } - int d = 1; - while (d < dims - 1) { + DimOrderType d = 1; + while (d < static_cast(dims) - 1) { if (dim_order[d] != d + 1) { return false; } @@ -163,8 +163,8 @@ struct StrideDimOrder { StridesType stride; DimOrderType dim_order; - StrideDimOrder(StridesType stride, DimOrderType dim_order) - : stride(stride), dim_order(dim_order) {} + StrideDimOrder(StridesType stride_, DimOrderType dim_order_) + : stride(stride_), dim_order(dim_order_) {} StrideDimOrder() = default; bool operator>(const StrideDimOrder& other) const { // descending order diff --git a/runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp b/runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp index cfd416285c5..02155a4d9b4 100644 --- a/runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp +++ b/runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp @@ -30,7 +30,9 @@ std::array tensor_shape_to_c_string_impl( } *p++ = '('; for (const auto elem : shape) { - if (elem < 0 || elem > internal::kMaximumPrintableTensorShapeElement) { + if (elem < 0 || + static_cast(elem) > + internal::kMaximumPrintableTensorShapeElement) { static_assert( internal::kMaximumPrintableTensorShapeElement > 99999, "must have room for error string!"); diff --git a/runtime/core/exec_aten/util/tensor_util.h b/runtime/core/exec_aten/util/tensor_util.h index eb5ce10b6f3..fcc08ebf98d 100644 --- a/runtime/core/exec_aten/util/tensor_util.h +++ b/runtime/core/exec_aten/util/tensor_util.h @@ -584,7 +584,7 @@ inline bool tensors_have_same_dtype( inline bool tensor_is_rank(executorch::aten::Tensor t, size_t rank) { ET_CHECK_OR_RETURN_FALSE( - t.dim() == rank, + static_cast(t.dim()) == rank, "Expected tensor.dim() to be %zu, but got %zu", static_cast(rank), static_cast(t.dim())); @@ -596,7 +596,7 @@ inline bool tensor_has_rank_greater_or_equal_to( executorch::aten::Tensor t, size_t rank) { ET_CHECK_OR_RETURN_FALSE( - t.dim() >= rank, + static_cast(t.dim()) >= rank, "Expected tensor.dim() to be >= %zu, but got %zu", static_cast(rank), static_cast(t.dim())); @@ -608,7 +608,7 @@ inline bool tensor_has_rank_smaller_or_equal_to( executorch::aten::Tensor t, size_t rank) { ET_CHECK_OR_RETURN_FALSE( - t.dim() <= rank, + static_cast(t.dim()) <= rank, "Expected tensor.dim() to be <= %zu, but got %zu", static_cast(rank), static_cast(t.dim())); @@ -665,12 +665,12 @@ inline bool tensors_have_same_size_at_dims( executorch::aten::Tensor b, size_t dim_b) { ET_CHECK_OR_RETURN_FALSE( - dim_a < a.dim(), + dim_a < static_cast(a.dim()), "Cannot retrieve dim %zu from tensor with dim %zu", static_cast(dim_a), static_cast(a.dim())); ET_CHECK_OR_RETURN_FALSE( - dim_b < b.dim(), + dim_b < static_cast(b.dim()), "Cannot retrieve dim %zu from tensor with dim %zu", static_cast(dim_b), static_cast(b.dim())); @@ -702,7 +702,9 @@ inline bool tensors_have_same_shape( static_cast(b.numel()), static_cast(a.dim()), static_cast(b.dim())); - for (size_t d = 0; d < ET_MIN2(a.dim(), b.dim()); ++d) { + // Using [[maybe_unused]] as ET_LOG may not trigger based on verbosity + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN2(a.dim(), b.dim()))) { ET_LOG( Error, " size(%zu): (%zu, %zu)", @@ -739,7 +741,8 @@ inline bool tensors_have_same_shape( static_cast(a.dim()), static_cast(b.dim()), static_cast(c.dim())); - for (size_t d = 0; d < ET_MIN3(a.dim(), b.dim(), c.dim()); ++d) { + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN3(a.dim(), b.dim(), c.dim()))) { ET_LOG( Error, " size(%zu): (%zu, %zu, %zu)", @@ -779,7 +782,8 @@ inline bool tensor_has_expected_size( static_cast(expected_sizes.size())); size_t a_dim = static_cast(a.dim()); size_t expected_dim = static_cast(expected_sizes.size()); - for (size_t d = 0; d < ET_MIN2(a_dim, expected_dim); ++d) { + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN2(a_dim, expected_dim))) { ET_LOG( Error, " size(%zu): (%zu, %zu)", @@ -802,7 +806,8 @@ inline bool tensors_have_same_strides( ET_TENSOR_CHECK_PREFIX__ ": dim=(%zu, %zu)", static_cast(a.dim()), static_cast(b.dim())); - for (size_t d = 0; d < ET_MIN2(a.dim(), b.dim()); ++d) { + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN2(a.dim(), b.dim()))) { ET_LOG( Error, " stride(%zu): (%zu, %zu)", @@ -827,7 +832,8 @@ inline bool tensors_have_same_strides( static_cast(a.dim()), static_cast(b.dim()), static_cast(c.dim())); - for (size_t d = 0; d < ET_MIN3(a.dim(), b.dim(), c.dim()); ++d) { + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN3(a.dim(), b.dim(), c.dim()))) { ET_LOG( Error, " stride(%zu): (%zu, %zu, %zu)", @@ -894,7 +900,7 @@ inline size_t getLeadingDims( dim, ssize_t(tensor.dim())); size_t dims = 1; - for (size_t i = 0; i < dim; ++i) { + for (const auto i : c10::irange(dim)) { dims *= static_cast(tensor.size(i)); } return dims; @@ -911,7 +917,7 @@ inline size_t getTrailingDims( dim, ssize_t(tensor.dim())); size_t dims = 1; - for (size_t i = dim + 1; i < tensor.dim(); ++i) { + for (size_t i = dim + 1; i < static_cast(tensor.dim()); ++i) { dims *= static_cast(tensor.size(i)); } return dims; @@ -984,7 +990,7 @@ inline void indexToCoordinate( const executorch::aten::Tensor& tensor, size_t index, size_t* coordinate) { - ET_CHECK(index < tensor.numel()); + ET_CHECK(index < static_cast(tensor.numel())); for (auto i = 0; i < tensor.dim(); ++i) { auto dim = tensor.dim() - 1 - i; size_t dim_size = tensor.size(dim); diff --git a/runtime/core/exec_aten/util/tensor_util_portable.cpp b/runtime/core/exec_aten/util/tensor_util_portable.cpp index c1cbcfb6064..e4aa875aed4 100644 --- a/runtime/core/exec_aten/util/tensor_util_portable.cpp +++ b/runtime/core/exec_aten/util/tensor_util_portable.cpp @@ -8,6 +8,7 @@ #include +#include #include #include @@ -41,11 +42,11 @@ Error get_dim_order( bool tensor_has_valid_dim_order(torch::executor::Tensor t) { if (!validate_dim_order(t.dim_order().data(), t.dim_order().size())) { ET_LOG(Error, "Tensor dim order is not valid:"); - for (size_t d = 0; d < t.dim(); ++d) { + for (size_t d = 0; d < static_cast(t.dim()); ++d) { ET_LOG( Error, " dim_order(%zu): %zu", - static_cast(d), + d, static_cast(t.dim_order()[d])); } return false; @@ -62,11 +63,11 @@ bool tensor_is_default_or_channels_last_dim_order(torch::executor::Tensor t) { ET_LOG( Error, "Expected tensor to have default or channels last dim order, but got"); - for (size_t d = 0; d < t.dim(); ++d) { + for (size_t d = 0; d < static_cast(t.dim()); ++d) { ET_LOG( Error, " dim_order(%zu): %zu", - static_cast(d), + d, static_cast(t.dim_order()[d])); } } @@ -79,11 +80,11 @@ bool tensor_is_default_dim_order(torch::executor::Tensor t) { if (!ret_val) { ET_LOG(Error, "Expected tensor to have default dim order, but got"); - for (size_t d = 0; d < t.dim(); ++d) { + for (size_t d = 0; d < static_cast(t.dim()); ++d) { ET_LOG( Error, " dim_order(%zu): %zu", - static_cast(d), + d, static_cast(t.dim_order()[d])); } } @@ -96,11 +97,11 @@ bool tensor_is_channels_last_dim_order(torch::executor::Tensor t) { if (!ret_val) { ET_LOG(Error, "Expected tensor to have channels last dim order, but got"); - for (size_t d = 0; d < t.dim(); ++d) { + for (size_t d = 0; d < static_cast(t.dim()); ++d) { ET_LOG( Error, " dim_order(%zu): %zu", - static_cast(d), + d, static_cast(t.dim_order()[d])); } } diff --git a/runtime/core/portable_type/c10/c10/util/irange.h b/runtime/core/portable_type/c10/c10/util/irange.h index 3249bdfa5cf..81104d9568f 100644 --- a/runtime/core/portable_type/c10/c10/util/irange.h +++ b/runtime/core/portable_type/c10/c10/util/irange.h @@ -24,7 +24,7 @@ struct integer_iterator { using pointer = I*; using reference = I&; - explicit constexpr integer_iterator(I value) : value(value) {} + explicit constexpr integer_iterator(I value_) : value(value_) {} constexpr I operator*() const { return value; diff --git a/runtime/core/portable_type/tensor_impl.cpp b/runtime/core/portable_type/tensor_impl.cpp index 6366a8eac28..ede5a3d4101 100644 --- a/runtime/core/portable_type/tensor_impl.cpp +++ b/runtime/core/portable_type/tensor_impl.cpp @@ -35,8 +35,8 @@ ssize_t compute_numel(const TensorImpl::SizesType* sizes, ssize_t dim) { for (const auto i : c10::irange(dim)) { ET_CHECK_MSG( sizes[i] >= 0, - "Size must be non-negative, got %d at dimension %zd", - sizes[i], + "Size must be non-negative, got %zd at dimension %zd", + static_cast(sizes[i]), i); numel *= sizes[i]; } @@ -76,7 +76,7 @@ ssize_t TensorImpl::element_size() const { Error TensorImpl::internal_resize_contiguous(ArrayRef new_sizes) { ET_CHECK_OR_RETURN_ERROR( - new_sizes.size() == dim_, + static_cast(new_sizes.size()) == dim_, NotSupported, "Attempted to change the tensor rank which is immutable: old=%zu, new=%zu", dim_, @@ -120,7 +120,7 @@ Error TensorImpl::internal_resize_contiguous(ArrayRef new_sizes) { const auto new_numel = compute_numel(new_sizes.data(), dim_); ET_CHECK_OR_RETURN_ERROR( - new_numel <= numel_bound_, + static_cast(new_numel) <= numel_bound_, NotSupported, "Attempted to resize a bounded tensor with a maximum capacity of %zu elements to %zu elements.", numel_bound_, diff --git a/runtime/core/tensor_layout.cpp b/runtime/core/tensor_layout.cpp index f0fac442e20..2b862e6dc14 100644 --- a/runtime/core/tensor_layout.cpp +++ b/runtime/core/tensor_layout.cpp @@ -20,7 +20,7 @@ Result calculate_nbytes( const Span& sizes, const executorch::aten::ScalarType& scalar_type) { ssize_t n = 1; - for (ssize_t i = 0; i < sizes.size(); i++) { + for (const auto i : c10::irange(sizes.size())) { if (sizes[i] < 0) { return Error::InvalidArgument; } diff --git a/runtime/executor/method.cpp b/runtime/executor/method.cpp index 0857bc1c976..7da7bafd3e5 100644 --- a/runtime/executor/method.cpp +++ b/runtime/executor/method.cpp @@ -8,6 +8,7 @@ #include +#include #include #include // @donotremove #include @@ -239,10 +240,10 @@ Result gen_instruction_arguments( for (size_t i = 0; i < num_args; ++i) { int32_t arg_idx = arg_idxs[i]; ET_CHECK_OR_RETURN_ERROR( - arg_idx < num_values, + static_cast(arg_idx) < num_values, InvalidProgram, - "Arg index %d >= %" ET_PRIsize_t, - arg_idx, + "Arg index %zd >= %" ET_PRIsize_t, + static_cast(arg_idx), num_values); arg_list[i] = &values[arg_idx]; } @@ -270,7 +271,7 @@ Result parse_cond_value(const EValue& cond_value) { static_cast(cond_val.scalar_type())); const bool* cond_data = cond_val.const_data_ptr(); - for (size_t i = 0; i < cond_val.numel(); i++) { + for (size_t i = 0; i < static_cast(cond_val.numel()); i++) { if (!cond_data[i]) { return false; } @@ -481,7 +482,7 @@ Error Method::parse_values(const NamedDataMap* named_data_map) { for (size_t j = 0; j < items->size(); j++) { auto value_index = items->Get(j); ET_CHECK_OR_RETURN_ERROR( - value_index >= 0 && value_index < n_value, + value_index >= 0 && static_cast(value_index) < n_value, InvalidProgram, "Invalid value index %" PRId64 " for IntList %" ET_PRIsize_t " index %" ET_PRIsize_t, @@ -644,7 +645,7 @@ Error populate_operator_name( has_overload ? op->overload()->c_str() : ""); ET_CHECK_OR_RETURN_ERROR(cx >= 0, Internal, "snprintf failed: %d", cx); ET_CHECK_OR_RETURN_ERROR( - cx < operator_name_size, + static_cast(cx) < operator_name_size, Internal, "Operator name %s%s%s with length %d " "truncated to %" ET_PRIsize_t " due to internal buffer limit.", @@ -672,7 +673,8 @@ Error Method::resolve_operator( char operator_name[kTempBufferSizeForName]; const auto ops = serialization_plan_->operators(); ET_CHECK_OR_RETURN_ERROR( - ops != nullptr && op_index < ops->size(), + ops != nullptr && + static_cast(op_index) < ops->size(), InvalidProgram, "Op index %" PRIu32 " out of range", op_index); @@ -721,7 +723,11 @@ Error Method::resolve_operator( Result op_function = get_op_function_from_registry(operator_name, {meta, count}); if (!op_function.ok()) { - ET_LOG(Error, "Missing operator: [%d] %s", op_index, operator_name); + ET_LOG( + Error, + "Missing operator: [%zd] %s", + static_cast(op_index), + operator_name); return op_function.error(); } kernels[kernel_index] = op_function.get(); @@ -923,10 +929,10 @@ Error Method::init( instr_args) ->cond_value_index(); ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < n_value_, + index >= 0 && static_cast(index) < n_value_, InvalidProgram, - "Index %d negative or >= %" ET_PRIsize_t, - index, + "Index %zd negative or >= %" ET_PRIsize_t, + static_cast(index), n_value_); chain_instruction_arg_lists[instr_idx] = InstructionArgs(); } break; @@ -944,9 +950,9 @@ Error Method::init( ET_CHECK_OR_RETURN_ERROR( num_instructions_missing_op == 0, OperatorMissing, - "There are %d instructions don't have corresponding operator registered. " + "There are %zu instructions don't have corresponding operator registered. " "See logs for details", - num_instructions_missing_op); + static_cast(num_instructions_missing_op)); if (delayed_error != Error::Ok) { return delayed_error; } @@ -1315,7 +1321,7 @@ Error Method::execute_instruction() { auto delegate_idx = instruction->instr_args_as_DelegateCall()->delegate_index(); ET_CHECK_OR_RETURN_ERROR( - delegate_idx < n_delegate_, + static_cast(delegate_idx) < n_delegate_, Internal, "DELEGATE_CALL index %" PRIu32 " >= num delegates %" ET_PRIsize_t " at instruction %" ET_PRIsize_t, @@ -1609,18 +1615,18 @@ Method::~Method() { // Destroy the values. It's necessary in ATen mode, where the refcount of // Tensors needs to be decremented properly. if (values_ != nullptr) { - for (int i = 0; i < n_value_; ++i) { + for (size_t i = 0; i < n_value_; ++i) { values_[i].~EValue(); } } // Free any resources associated with delegate backends. if (delegates_ != nullptr) { - for (int i = 0; i < n_delegate_; i++) { + for (size_t i = 0; i < n_delegate_; i++) { delegates_[i].~BackendDelegate(); } } // Free resources associated with external constants. - for (int i = 0; i < n_external_constants_; i++) { + for (const auto i : c10::irange(n_external_constants_)) { external_constants_[i].buffer.~FreeableBuffer(); } // All other fields are trivially destructible. diff --git a/runtime/executor/method_meta.cpp b/runtime/executor/method_meta.cpp index bcc2390d2bd..651a815c335 100644 --- a/runtime/executor/method_meta.cpp +++ b/runtime/executor/method_meta.cpp @@ -56,7 +56,7 @@ size_t calculate_nbytes( Span sizes, executorch::aten::ScalarType scalar_type) { ssize_t n = 1; - for (ssize_t i = 0; i < sizes.size(); i++) { + for (size_t i = 0; i < sizes.size(); i++) { n *= sizes[i]; } // Use the full namespace to disambiguate from c10::elementSize. @@ -110,7 +110,7 @@ size_t MethodMeta::num_inputs() const { Result MethodMeta::input_tag(size_t index) const { auto num_inputs = this->num_inputs(); ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < num_inputs, + index < num_inputs, InvalidArgument, "index %zu out of range. num_inputs: %zu", index, @@ -118,10 +118,10 @@ Result MethodMeta::input_tag(size_t index) const { auto input_index = s_plan_->inputs()->Get(index); size_t num_values = s_plan_->values()->size(); ET_CHECK_OR_RETURN_ERROR( - input_index >= 0 && input_index < num_values, + input_index >= 0 && static_cast(input_index) < num_values, InvalidProgram, - "internal value index %d out of range [0,%zu) for input %zu", - input_index, + "internal value index %zd out of range [0,%zu) for input %zu", + static_cast(input_index), num_values, index); auto serialization_value = s_plan_->values()->Get(input_index); @@ -160,7 +160,7 @@ size_t MethodMeta::num_outputs() const { Result MethodMeta::output_tag(size_t index) const { auto num_outputs = this->num_outputs(); ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < num_outputs, + index < num_outputs, InvalidArgument, "index %zu out of range. num_outputs: %zu", index, @@ -168,10 +168,10 @@ Result MethodMeta::output_tag(size_t index) const { auto output_index = s_plan_->outputs()->Get(index); size_t num_values = s_plan_->values()->size(); ET_CHECK_OR_RETURN_ERROR( - output_index >= 0 && output_index < num_values, + output_index >= 0 && static_cast(output_index) < num_values, InvalidProgram, - "internal value index %d out of range [0,%zu) for output %zu", - output_index, + "internal value index %zd out of range [0,%zu) for output %zu", + static_cast(output_index), num_values, index); auto serialization_value = s_plan_->values()->Get(output_index); @@ -218,7 +218,7 @@ size_t MethodMeta::num_memory_planned_buffers() const { Result MethodMeta::memory_planned_buffer_size(size_t index) const { auto num_buffers = this->num_memory_planned_buffers(); ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < num_buffers, + index < num_buffers, InvalidArgument, "index %zu out of range. num_buffers: %zu", index, diff --git a/runtime/executor/program.cpp b/runtime/executor/program.cpp index 964b8c8bdac..67f1edd4df3 100644 --- a/runtime/executor/program.cpp +++ b/runtime/executor/program.cpp @@ -163,10 +163,10 @@ Result get_execution_plan( ET_CHECK_OR_RETURN_ERROR( constant_buffer == nullptr || constant_buffer->size() == 0, InvalidProgram, - "constant_buffer contains %u items, " - "constant_segment.offsets contains %u items. Only one should be used.", - constant_buffer->size(), - constant_segment->offsets()->size()); + "constant_buffer contains %zu items, " + "constant_segment.offsets contains %zu items. Only one should be used.", + static_cast(constant_buffer->size()), + static_cast(constant_segment->offsets()->size())); const auto* segments = flatbuffer_program->segments(); ET_CHECK_OR_RETURN_ERROR( segments != nullptr, InvalidProgram, "No segments in program"); @@ -176,9 +176,9 @@ Result get_execution_plan( ET_CHECK_OR_RETURN_ERROR( constant_segment->segment_index() < segments->size(), InvalidProgram, - "Constant segment index %d invalid for program segments range %d", - constant_segment->segment_index(), - segments->size()); + "Constant segment index %zu invalid for program segments range %zu", + static_cast(constant_segment->segment_index()), + static_cast(segments->size())); const executorch_flatbuffer::DataSegment* data_segment = segments->Get(constant_segment->segment_index()); @@ -347,8 +347,8 @@ Result Program::get_constant_buffer_data( ET_CHECK_OR_RETURN_ERROR( storage_size <= nbytes, InvalidArgument, - "Constant buffer size %u larger than allocated nbytes %zu", - storage_size, + "Constant buffer size %zu larger than allocated nbytes %zu", + static_cast(constant_buffer[buffer_index]->storage()->size()), nbytes); return storage->data(); @@ -479,8 +479,8 @@ Error Program::load_mutable_subsegment_into( if (segment_offsets->segment_index() >= num_segments) { ET_LOG( Error, - "Segment index %u out of range (>= %zu)", - segment_offsets->segment_index(), + "Segment index %zu out of range (>= %zu)", + static_cast(segment_offsets->segment_index()), num_segments); return Error::NotFound; } diff --git a/runtime/executor/targets.bzl b/runtime/executor/targets.bzl index c5d07448a06..8993c5dc473 100644 --- a/runtime/executor/targets.bzl +++ b/runtime/executor/targets.bzl @@ -74,6 +74,10 @@ def define_common_targets(): "program.h", "tensor_parser.h", ], + compiler_flags = select({ + "ovr_config//os:windows": [], + "DEFAULT" :["-Wno-error=deprecated-declarations"] + }), preprocessor_flags = _program_preprocessor_flags(), exported_deps = [ ":memory_manager", diff --git a/runtime/executor/tensor_parser.h b/runtime/executor/tensor_parser.h index cfd711713ac..362f0b11e20 100644 --- a/runtime/executor/tensor_parser.h +++ b/runtime/executor/tensor_parser.h @@ -91,7 +91,7 @@ parseListOptionalType( evalp_list[output_idx] = nullptr; } else { ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < values_len, + index >= 0 && static_cast(index) < values_len, InvalidProgram, "Invalid value index %" PRId32 " for ListOptional", index); diff --git a/runtime/executor/tensor_parser_exec_aten.cpp b/runtime/executor/tensor_parser_exec_aten.cpp index de809ee09cc..002c7366be6 100644 --- a/runtime/executor/tensor_parser_exec_aten.cpp +++ b/runtime/executor/tensor_parser_exec_aten.cpp @@ -64,7 +64,8 @@ ET_NODISCARD Result getMemPlannedPtr( "size_t cannot hold memory offset 0x%08" PRIx32 ".%08" PRIx32, memory_offset_high, memory_offset_low); - memory_offset |= static_cast(memory_offset_high) << 32; + memory_offset |= static_cast(memory_offset_high) + << (sizeof(size_t) - sizeof(uint32_t)); } return allocator->get_offset_address(memory_id, memory_offset, nbytes); } @@ -94,7 +95,7 @@ ET_NODISCARD Result> parseTensorList( size_t output_idx = 0; for (int32_t tensor_index : *tensor_indices) { ET_CHECK_OR_RETURN_ERROR( - tensor_index >= 0 && tensor_index < values_len, + tensor_index >= 0 && static_cast(tensor_index) < values_len, InvalidProgram, "Invalid value index %" PRId32 " for TensorList", tensor_index); @@ -123,7 +124,9 @@ ET_NODISCARD Error validateTensorLayout( static_cast(expected_layout.scalar_type())); int dim = s_tensor->sizes()->size(); ET_CHECK_OR_RETURN_ERROR( - dim == expected_layout.sizes().size(), + dim >= 0, InvalidExternalData, "Dim is negative: %d", dim) + ET_CHECK_OR_RETURN_ERROR( + static_cast(dim) == expected_layout.sizes().size(), InvalidExternalData, "Dim mismatch. Expected %d, got %zu.", dim, @@ -150,7 +153,7 @@ ET_NODISCARD Error validateTensorLayout( // Check if key exists in entries. If it does, return a pointer to the entry // otherwise return a nullptr. NamedData* get_data_by_key(const char* key, Span entries) { - for (int i = 0; i < entries.size(); i++) { + for (const auto i : c10::irange(entries.size())) { if (strcmp(key, entries[i].key) == 0) { return &entries[i]; } diff --git a/runtime/executor/tensor_parser_portable.cpp b/runtime/executor/tensor_parser_portable.cpp index b72fedc5eee..4b424b29f5c 100644 --- a/runtime/executor/tensor_parser_portable.cpp +++ b/runtime/executor/tensor_parser_portable.cpp @@ -107,12 +107,12 @@ Result parseTensor( // detect bad positive values, but we can reject negative values, which would // otherwise panic in the TensorImpl ctor. dim_order_to_stride() will validate // dim_order. - for (int i = 0; i < dim; i++) { + for (flatbuffers::uoffset_t i = 0; i < dim; i++) { ET_CHECK_OR_RETURN_ERROR( sizes[i] >= 0, InvalidProgram, - "Negative size[%d] %" PRId32, - i, + "Negative size[%zu] %" PRId32, + static_cast(i), sizes[i]); } diff --git a/runtime/kernel/operator_registry.cpp b/runtime/kernel/operator_registry.cpp index b51c2567f0a..85705e5b3fd 100644 --- a/runtime/kernel/operator_registry.cpp +++ b/runtime/kernel/operator_registry.cpp @@ -79,7 +79,7 @@ Error register_kernels_internal(const Span kernels) { for (const auto& kernel : kernels) { // Linear search. This is fine if the number of kernels is small. - for (int32_t i = 0; i < num_registered_kernels; i++) { + for (size_t i = 0; i < num_registered_kernels; i++) { Kernel k = registered_kernels[i]; if (strcmp(kernel.name_, k.name_) == 0 && kernel.kernel_key_ == k.kernel_key_) { @@ -188,7 +188,7 @@ Error make_kernel_key_string( buf_size -= 1; // Add dim order. - for (int j = 0; j < meta.dim_order_.size(); j++) { + for (size_t j = 0; j < meta.dim_order_.size(); j++) { n = copy_char_as_number_to_buf((int)meta.dim_order_[j], buf, buf_size); if (n < 0) { return Error::InvalidArgument; diff --git a/runtime/kernel/operator_registry.h b/runtime/kernel/operator_registry.h index 82815852e6f..8e1eaca9981 100644 --- a/runtime/kernel/operator_registry.h +++ b/runtime/kernel/operator_registry.h @@ -33,7 +33,7 @@ #define ET_LOG_TENSOR_META(meta_list) \ for (const auto& meta : meta_list) { \ ET_LOG(Error, "dtype: %d | dim order: [", int(meta.dtype_)); \ - for (int i = 0; i < meta.dim_order_.size(); i++) { \ + for (size_t i = 0; i < meta.dim_order_.size(); i++) { \ ET_LOG(Error, "%d,", static_cast(meta.dim_order_[i])); \ } \ ET_LOG(Error, "]"); \ @@ -74,7 +74,7 @@ struct TensorMeta { if (dim_order_.size() != other.dim_order_.size()) { return false; } - for (int i = 0; i < dim_order_.size(); i++) { + for (size_t i = 0; i < dim_order_.size(); i++) { if (dim_order_[i] != other.dim_order_[i]) { return false; } diff --git a/runtime/platform/log.cpp b/runtime/platform/log.cpp index c1ad6ddcc0d..6529c73b238 100644 --- a/runtime/platform/log.cpp +++ b/runtime/platform/log.cpp @@ -92,8 +92,7 @@ void vlogf( } buf[kMaxLogMessageLength - 1] = 0; - et_pal_log_level_t pal_level = - (int(level) >= 0 && level < LogLevel::NumLevels) + et_pal_log_level_t pal_level = (level < LogLevel::NumLevels) ? kLevelToPal[size_t(level)] : et_pal_log_level_t::kUnknown; diff --git a/runtime/platform/log.h b/runtime/platform/log.h index 9ad234b2520..72ea8528442 100644 --- a/runtime/platform/log.h +++ b/runtime/platform/log.h @@ -33,6 +33,15 @@ #define ET_LOG_ENABLED 1 #endif // !defined(ET_LOG_ENABLED) +// Even though it is supposed to be "portable" some toolchains +// do not define, so providing a definition here +#ifndef PRIu64 +#define PRIu64 "llu" +#endif +#ifndef PRId64 +#define PRId64 "lld" +#endif + namespace executorch { namespace runtime { diff --git a/runtime/platform/profiler.cpp b/runtime/platform/profiler.cpp index 2f514286aa1..21f68963c78 100644 --- a/runtime/platform/profiler.cpp +++ b/runtime/platform/profiler.cpp @@ -129,7 +129,8 @@ void track_allocation(int32_t id, uint32_t size) { uint32_t track_allocator(const char* name) { ET_CHECK_MSG( prof_header->allocator_entries < MEM_PROFILE_MAX_ALLOCATORS, - "Out of allocator tracking space, %d is needed. Increase MEM_PROFILE_MAX_ALLOCATORS and re-compile", + "Out of allocator tracking space, %" PRIu32 + " is needed. Increase MEM_PROFILE_MAX_ALLOCATORS and re-compile", prof_header->allocator_entries); size_t str_len = strlen(name); size_t num_allocators = prof_header->allocator_entries; @@ -151,7 +152,8 @@ void profiling_create_block(const char* name) { num_blocks += 1; ET_CHECK_MSG( num_blocks <= MAX_PROFILE_BLOCKS, - "Only %d blocks are supported and they've all been used up but %d is used. Increment MAX_PROFILE_BLOCKS and re-run", + "Only %d blocks are supported and they've all been used up but %" PRIu32 + " is used. Increment MAX_PROFILE_BLOCKS and re-run", MAX_PROFILE_BLOCKS, num_blocks); } diff --git a/schema/extended_header.cpp b/schema/extended_header.cpp index fdc463207ba..3236b040c49 100644 --- a/schema/extended_header.cpp +++ b/schema/extended_header.cpp @@ -14,8 +14,6 @@ #include #include -#pragma clang diagnostic ignored "-Wdeprecated" - namespace executorch { namespace runtime { diff --git a/test/build_size_test.sh b/test/build_size_test.sh index 823b399fe34..09c0188ff9b 100644 --- a/test/build_size_test.sh +++ b/test/build_size_test.sh @@ -11,9 +11,8 @@ set -e # shellcheck source=/dev/null source "$(dirname "${BASH_SOURCE[0]}")/../.ci/scripts/utils.sh" -# TODO(#8149): Remove -Wno-sign-compare # TODO(#8357): Remove -Wno-int-in-bool-context -COMMON_CXXFLAGS="-fno-exceptions -fno-rtti -Wall -Werror -Wno-sign-compare -Wno-unknown-pragmas -Wno-int-in-bool-context" +COMMON_CXXFLAGS="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context" cmake_install_executorch_lib() { echo "Installing libexecutorch.a"