diff --git a/.Package.swift/kernels_torchao/dummy.swift b/.Package.swift/kernels_torchao/dummy.swift
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/.Package.swift/kernels_torchao_debug/dummy.swift b/.Package.swift/kernels_torchao_debug/dummy.swift
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/.ci/docker/ci_commit_pins/optimum-executorch.txt b/.ci/docker/ci_commit_pins/optimum-executorch.txt
index 9b3126b4093..9c1dac7fa91 100644
--- a/.ci/docker/ci_commit_pins/optimum-executorch.txt
+++ b/.ci/docker/ci_commit_pins/optimum-executorch.txt
@@ -1 +1 @@
-eea657ddbdeb1118943a92fb73c289985c3ee1ba
+36e3dd54effb3f6d13d792029609292fdd5502bb
diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 6305196d2ad..1082cb4d2d1 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-6fc0ad22f0a07b6f38d138861c56a765d5a9bb02
+e7152ff8a6a929a0db7f3f4a72a5b6d471769cd3
diff --git a/.ci/scripts/build-mediatek-sdk.sh b/.ci/scripts/build-mediatek-sdk.sh
index 81e64b241ce..e01e10d6009 100755
--- a/.ci/scripts/build-mediatek-sdk.sh
+++ b/.ci/scripts/build-mediatek-sdk.sh
@@ -14,9 +14,9 @@ build_neuron_backend() {
export NEURON_BUFFER_ALLOCATOR_LIB=${MEDIATEK_SDK_ROOT}/libneuron_buffer_allocator.so
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
-
cd ${EXECUTORCH_ROOT}
./backends/mediatek/scripts/mtk_build.sh
+ ./examples/mediatek/mtk_build_examples.sh
}
build_neuron_backend
diff --git a/.ci/scripts/setup-conda.sh b/.ci/scripts/setup-conda.sh
index 5466cc0d60d..a725c90dd82 100755
--- a/.ci/scripts/setup-conda.sh
+++ b/.ci/scripts/setup-conda.sh
@@ -9,7 +9,7 @@ set -ex
install_conda() {
pushd .ci/docker || return
- ${CONDA_INSTALL} -y --file conda-env-ci.txt
+ ${CONDA_INSTALL} -c conda-forge -y --file conda-env-ci.txt
popd || return
}
diff --git a/.ci/scripts/setup-linux.sh b/.ci/scripts/setup-linux.sh
index a090571ab49..feb8a128b17 100755
--- a/.ci/scripts/setup-linux.sh
+++ b/.ci/scripts/setup-linux.sh
@@ -11,6 +11,7 @@ set -exu
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
read -r BUILD_TOOL BUILD_MODE EDITABLE < <(parse_args "$@")
+echo "Build tool: $BUILD_TOOL, Mode: $BUILD_MODE"
# As Linux job is running inside a Docker container, all of its dependencies
# have already been installed, so we use PyTorch build from source here instead
diff --git a/.ci/scripts/setup-vulkan-linux-deps.sh b/.ci/scripts/setup-vulkan-linux-deps.sh
index c0b2596f20e..1266bce38a6 100755
--- a/.ci/scripts/setup-vulkan-linux-deps.sh
+++ b/.ci/scripts/setup-vulkan-linux-deps.sh
@@ -23,6 +23,7 @@ install_swiftshader() {
export VK_ICD_FILENAMES="${_swiftshader_dir}/swiftshader/build/Linux/vk_swiftshader_icd.json"
export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/"
+ export ETVK_USING_SWIFTSHADER=1
}
install_vulkan_sdk() {
diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh
new file mode 100755
index 00000000000..254d974160a
--- /dev/null
+++ b/.ci/scripts/test_backend_linux.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+set -eux
+
+SUITE=$1
+FLOW=$2
+ARTIFACT_DIR=$3
+
+REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
+
+echo "Running backend test job for suite $SUITE, flow $FLOW."
+echo "Saving job artifacts to $ARTIFACT_DIR."
+
+# The generic Linux job chooses to use base env, not the one setup by the image
+eval "$(conda shell.bash hook)"
+CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+conda activate "${CONDA_ENV}"
+
+export PYTHON_EXECUTABLE=python
+
+# CMake options to use, in addition to the defaults.
+EXTRA_BUILD_ARGS=""
+
+if [[ "$FLOW" == *qnn* ]]; then
+ # Setup QNN sdk and deps - note that this is a bit hacky due to the nature of the
+ # Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is
+ # cleaned up.
+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+ PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+ QNN_X86_LIB_DIR=`realpath build-x86/lib/`
+ QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029"
+ export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
+
+ # TODO Get SDK root from install scripts
+ EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
+fi
+
+if [[ "$FLOW" == *vulkan* ]]; then
+ # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
+ source .ci/scripts/setup-vulkan-linux-deps.sh
+
+ EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
+fi
+
+# We need the runner to test the built library.
+PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true
+
+EXIT_CODE=0
+python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
+
+# Generate markdown summary.
+python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
diff --git a/.ci/scripts/test_backend_macos.sh b/.ci/scripts/test_backend_macos.sh
new file mode 100755
index 00000000000..c31fd504b03
--- /dev/null
+++ b/.ci/scripts/test_backend_macos.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+set -eux
+
+SUITE=$1
+FLOW=$2
+ARTIFACT_DIR=$3
+
+REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
+
+echo "Running backend test job for suite $SUITE, flow $FLOW."
+echo "Saving job artifacts to $ARTIFACT_DIR."
+
+${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
+
+bash .ci/scripts/setup-conda.sh
+eval "$(conda shell.bash hook)"
+
+PYTHON_EXECUTABLE=python
+${CONDA_RUN} --no-capture-output .ci/scripts/setup-macos.sh --build-tool cmake --build-mode Release
+
+EXIT_CODE=0
+${CONDA_RUN} --no-capture-output python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
+
+# Generate markdown summary.
+${CONDA_RUN} --no-capture-output python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
diff --git a/.ci/scripts/test_huggingface_optimum_model.py b/.ci/scripts/test_huggingface_optimum_model.py
new file mode 100644
index 00000000000..05b25299522
--- /dev/null
+++ b/.ci/scripts/test_huggingface_optimum_model.py
@@ -0,0 +1,403 @@
+import argparse
+import gc
+import logging
+import math
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import List
+
+import torch
+from datasets import load_dataset
+
+from optimum.executorch import (
+ ExecuTorchModelForCausalLM,
+ ExecuTorchModelForImageClassification,
+ ExecuTorchModelForMaskedLM,
+ ExecuTorchModelForSeq2SeqLM,
+ ExecuTorchModelForSpeechSeq2Seq,
+)
+from transformers import (
+ AutoConfig,
+ AutoModelForCausalLM,
+ AutoModelForImageClassification,
+ AutoProcessor,
+ AutoTokenizer,
+)
+
+
+def cli_export(command, model_dir):
+ p = Path(model_dir)
+ if p.exists():
+ if not p.is_dir():
+ raise Exception(f"Path {model_dir} already exists and is not a directory.")
+ if any(p.iterdir()):
+ raise Exception(
+ f"Existing directory {model_dir} is non-empty. Please remove it first."
+ )
+ try:
+ subprocess.run(command, check=True)
+ print("Export completed successfully.")
+ except subprocess.CalledProcessError as e:
+ print(f"Export failed with error: {e}")
+
+
+def check_causal_lm_output_quality(
+ model_id: str, generated_tokens: List[int], max_perplexity_threshold: float = 100.0
+):
+ """
+ Evaluates the quality of text generated by a causal language model by calculating its perplexity.
+
+ Args:
+ model_id: HuggingFace model identifier (e.g., "google/gemma2-2b")
+ generated_tokens: The tokens generated by the exported model to evaluate
+ max_perplexity_threshold: Maximum acceptable perplexity (lower is better)
+
+ Returns:
+ tuple: (is_quality_ok, reason) with boolean result and explanation
+ """
+ logging.info(f"Starting perplexity check with model '{model_id}' ...")
+ # Load model
+ model = AutoModelForCausalLM.from_pretrained(
+ model_id,
+ low_cpu_mem_usage=True,
+ use_cache=False,
+ torch_dtype=torch.bfloat16,
+ )
+
+ with torch.no_grad():
+ outputs = model(input_ids=generated_tokens, labels=generated_tokens)
+
+ # Get the loss (negative log-likelihood)
+ loss = outputs.loss.item()
+
+ # Calculate perplexity (exp of the average negative log-likelihood)
+ perplexity = math.exp(loss)
+
+ is_quality_ok = perplexity <= max_perplexity_threshold
+ if is_quality_ok:
+ logging.info(
+ f"✓ Perplexity check passed: {perplexity:.2f} <= {max_perplexity_threshold}"
+ )
+ else:
+ logging.warning(
+ f"✗ Perplexity check failed: {perplexity:.2f} > {max_perplexity_threshold}"
+ )
+
+ # Clean up immediately
+ del model
+ del outputs
+ gc.collect()
+
+ return is_quality_ok
+
+
+def test_text_generation(model_id, model_dir, recipe, *, quantize=True, run_only=False):
+ command = [
+ "optimum-cli",
+ "export",
+ "executorch",
+ "--model",
+ model_id,
+ "--task",
+ "text-generation",
+ "--recipe",
+ recipe,
+ "--output_dir",
+ model_dir,
+ ]
+ if "xnnpack" in recipe:
+ command += [
+ "--use_custom_sdpa",
+ "--use_custom_kv_cache",
+ ]
+ if quantize:
+ command += [
+ "--qlinear",
+ "8da4w",
+ "--qembedding",
+ "8w",
+ ]
+ elif "coreml" in recipe:
+ command += [
+ "--disable_dynamic_shapes",
+ ]
+ if quantize:
+ command += [
+ "--qlinear",
+ "4w",
+ "--qembedding",
+ "8w",
+ ]
+ else:
+ assert (
+ not quantize
+ ), "Quantization is only supported for XnnPack and CoreML recipes at the moment."
+
+ if not run_only:
+ cli_export(command, model_dir)
+
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
+ tokenizer.save_pretrained(model_dir)
+ model = ExecuTorchModelForCausalLM.from_pretrained(model_dir)
+ generated_text = model.text_generation(
+ tokenizer=tokenizer,
+ prompt="Simply put, the theory of relativity states that",
+ max_seq_len=64,
+ )
+ print(f"\nGenerated text:\n\t{generated_text}")
+ generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
+
+ # Free memory before loading eager for quality check
+ del model
+ del tokenizer
+ gc.collect()
+
+ assert check_causal_lm_output_quality(model_id, generated_tokens) is True
+
+
+def test_fill_mask(model_id, model_dir, recipe, *, quantize=True, run_only=False):
+ command = [
+ "optimum-cli",
+ "export",
+ "executorch",
+ "--model",
+ model_id,
+ "--task",
+ "fill-mask",
+ "--recipe",
+ recipe,
+ "--output_dir",
+ model_dir,
+ ]
+ if "coreml" in recipe and quantize:
+ command += [
+ "--qlinear",
+ "4w",
+ "--qembedding",
+ "8w",
+ ]
+ else:
+ assert not quantize, "Quantization is not supported for non-CoreML recipes yet"
+
+ if not run_only:
+ cli_export(command, model_dir)
+
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
+ model = ExecuTorchModelForMaskedLM.from_pretrained(model_dir)
+ input_text = f"Paris is the {tokenizer.mask_token} of France."
+ inputs = tokenizer(
+ input_text,
+ return_tensors="pt",
+ padding="max_length",
+ max_length=10,
+ )
+
+ # Test inference using ExecuTorch model
+ exported_outputs = model.forward(inputs["input_ids"], inputs["attention_mask"])
+ predicted_masks = tokenizer.decode(exported_outputs[0, 4].topk(5).indices)
+ print(f"\nInput text:\n\t{input_text}\nPredicted masks:\n\t{predicted_masks}")
+
+
+def test_t5(model_id, model_dir, recipe, *, quantize=False, run_only=False):
+ assert not quantize, "Quantization is not supported for T5 model yet"
+
+ assert model_id == "google-t5/t5-small"
+ command = [
+ "optimum-cli",
+ "export",
+ "executorch",
+ "--model",
+ model_id,
+ "--task",
+ "text2text-generation",
+ "--recipe",
+ recipe,
+ "--output_dir",
+ model_dir,
+ ]
+ if not run_only:
+ cli_export(command, model_dir)
+
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
+ model = ExecuTorchModelForSeq2SeqLM.from_pretrained(model_dir)
+ article = (
+ " New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A"
+ " year later, she got married again in Westchester County, but to a different man and without divorcing"
+ " her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos"
+ ' declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married'
+ " once more, this time in the Bronx. In an application for a marriage license, she stated it was her"
+ ' "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false'
+ ' instrument for filing in the first degree," referring to her false statements on the 2010 marriage'
+ " license application, according to court documents. Prosecutors said the marriages were part of an"
+ " immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to"
+ " her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was"
+ " arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New"
+ " York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total,"
+ " Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All"
+ " occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be"
+ " married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors"
+ " said the immigration scam involved some of her husbands, who filed for permanent residence status"
+ " shortly after the marriages. Any divorces happened only after such filings were approved. It was"
+ " unclear whether any of the men will be prosecuted. The case was referred to the Bronx District"
+ " Attorney's Office by Immigration and Customs Enforcement and the Department of Homeland Security's"
+ ' Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt,'
+ " Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his"
+ " native Pakistan after an investigation by the Joint Terrorism Task Force."
+ )
+ article = "summarize: " + article.strip()
+
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
+ generated_text = model.text_generation(
+ tokenizer=tokenizer,
+ prompt=article,
+ )
+ expected_text = 'a year later, she got married again in westchester county, new york. she was married to a different man, but only 18 days after that marriage. she is facing two criminal counts of "offering a false instrument"'
+ print(f"Generated text:\n\t{generated_text}")
+ print(f"Expected text:\n\t{expected_text}")
+
+
+def test_whisper(model_id, model_dir, recipe, *, quantize=False, run_only=False):
+ assert not quantize, "Quantization is not supported for whisper model yet"
+
+ assert model_id == "openai/whisper-tiny"
+ command = [
+ "optimum-cli",
+ "export",
+ "executorch",
+ "--model",
+ model_id,
+ "--task",
+ "automatic-speech-recognition",
+ "--recipe",
+ recipe,
+ "--output_dir",
+ model_dir,
+ ]
+ if not run_only:
+ cli_export(command, model_dir)
+
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
+ model = ExecuTorchModelForSpeechSeq2Seq.from_pretrained(model_dir)
+ processor = AutoProcessor.from_pretrained(model_id)
+ dataset = load_dataset(
+ "distil-whisper/librispeech_long", "clean", split="validation"
+ )
+ sample = dataset[0]["audio"]
+
+ input_features = processor(
+ sample["array"],
+ return_tensors="pt",
+ truncation=False,
+ sampling_rate=sample["sampling_rate"],
+ ).input_features
+
+ # Current implementation of the transcibe method accepts up to 30 seconds of audio, therefore I trim the audio here.
+ input_features_trimmed = input_features[:, :, :3000].contiguous()
+
+ generated_transcription = model.transcribe(tokenizer, input_features_trimmed)
+ expected_text = " Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel. Nor is Mr. Quilter's manner less interesting than his matter. He tells us that at this festive season of the year, with Christmas and roast beef looming before us, similarly drawn from eating and its results occur most readily to the mind. He has grave doubts whether Sir Frederick Latins work is really Greek after all, and can discover that."
+ print(f"Generated transcription: {generated_transcription}")
+ print(f"Expected transcription: {expected_text}")
+
+
+def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
+ assert not quantize, "Quantization is not supported for ViT models yet."
+
+ assert model_id == "google/vit-base-patch16-224"
+ command = [
+ "optimum-cli",
+ "export",
+ "executorch",
+ "--model",
+ model_id,
+ "--task",
+ "image-classification",
+ "--recipe",
+ recipe,
+ "--output_dir",
+ model_dir,
+ ]
+ if not run_only:
+ cli_export(command, model_dir)
+
+ config = AutoConfig.from_pretrained(model_id)
+ batch_size = 1
+ num_channels = config.num_channels
+ height = config.image_size
+ width = config.image_size
+ pixel_values = torch.rand(batch_size, num_channels, height, width)
+
+ # Test fetching and lowering the model to ExecuTorch
+ et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_dir)
+ eager_model = (
+ AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
+ )
+ with torch.no_grad():
+ eager_output = eager_model(pixel_values)
+ et_output = et_model.forward(pixel_values)
+
+ assert torch.allclose(
+ eager_output.logits, et_output, atol=1e-02, rtol=1e-02
+ ), "Model output does not match eager"
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str, required=True)
+ parser.add_argument("--recipe", type=str, required=True)
+ parser.add_argument("--quantize", action="store_true", help="Enable quantization")
+ parser.add_argument(
+ "--model_dir",
+ type=str,
+ required=False,
+ help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
+ )
+ args = parser.parse_args()
+
+ _text_generation_mapping = {
+ "llama3.2-1b": ("NousResearch/Llama-3.2-1B", test_text_generation),
+ "qwen3-0.6b": ("Qwen/Qwen3-0.6B", test_text_generation),
+ "qwen3-1.7b": ("Qwen/Qwen3-1.7B", test_text_generation),
+ "gemma3-1b": (
+ "unsloth/gemma-3-1b-it",
+ test_text_generation,
+ ), # does not export for CoreML
+ "phi4-mini": (
+ "microsoft/Phi-4-mini-instruct",
+ test_text_generation,
+ ), # fails to lower for CoreML
+ "smollm2-135m": ("HuggingFaceTB/SmolLM2-135M", test_text_generation),
+ "smollm3-3b": ("HuggingFaceTB/SmolLM3-3B", test_text_generation),
+ "olmo-1b": ("allenai/OLMo-1B-hf", test_text_generation),
+ }
+
+ _mask_fill_mapping = {
+ "bert": ("google-bert/bert-base-uncased", test_fill_mask),
+ "roberta": ("FacebookAI/xlmcl-roberta-base", test_fill_mask),
+ "distilbert": ("distilbert/distilbert-base-uncased", test_fill_mask),
+ }
+
+ _misc_model_mapping = {
+ "whisper": ("openai/whisper-tiny", test_whisper),
+ "t5": ("google-t5/t5-small", test_t5), # CoreML runime failure
+ "vit": ("google/vit-base-patch16-224", test_vit),
+ }
+
+ model_to_model_id_and_test_function = (
+ _text_generation_mapping | _mask_fill_mapping | _misc_model_mapping
+ )
+
+ if args.model not in model_to_model_id_and_test_function:
+ raise ValueError(
+ f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
+ )
+
+ model_id, test_fn = model_to_model_id_and_test_function[args.model]
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ test_fn(
+ model_id=model_id,
+ model_dir=tmp_dir if args.model_dir is None else args.model_dir,
+ recipe=args.recipe,
+ quantize=args.quantize,
+ )
diff --git a/.ci/scripts/test_ios_ci.sh b/.ci/scripts/test_ios_ci.sh
index 6908d61483c..a89c2cc5809 100755
--- a/.ci/scripts/test_ios_ci.sh
+++ b/.ci/scripts/test_ios_ci.sh
@@ -36,7 +36,7 @@ say() {
say "Cloning the Demo App"
-git clone --depth 1 https://github.com/pytorch-labs/executorch-examples.git
+git clone --depth 1 https://github.com/meta-pytorch/executorch-examples.git
say "Installing CoreML Backend Requirements"
diff --git a/.ci/scripts/test_llama_lora.sh b/.ci/scripts/test_llama_lora.sh
new file mode 100644
index 00000000000..6337bbf76a2
--- /dev/null
+++ b/.ci/scripts/test_llama_lora.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+cmake_install_executorch_libraries() {
+ echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
+ rm -rf cmake-out
+ retry cmake --preset llm \
+ -DCMAKE_INSTALL_PREFIX=cmake-out \
+ -DCMAKE_BUILD_TYPE=Release
+ cmake --build cmake-out -j9 --target install --config Release
+}
+
+cmake_build_llama_runner() {
+ echo "Building llama runner"
+ pushd extension/llm/tokenizers
+ echo "Updating tokenizers submodule"
+ git submodule update --init
+ popd
+ dir="examples/models/llama"
+ retry cmake \
+ -DBUILD_TESTING=OFF \
+ -DCMAKE_INSTALL_PREFIX=cmake-out \
+ -DCMAKE_BUILD_TYPE=Release \
+ -Bcmake-out/${dir} \
+ ${dir}
+ cmake --build cmake-out/${dir} -j9 --config Release
+}
+
+cleanup_files() {
+ echo "Deleting downloaded and generated files"
+ rm -rf "${DOWNLOADED_PATH}/"
+ rm result.txt
+}
+
+# Download model artifacts from HF Hub.
+# Hosting in personal repo for now.
+HF_MODEL_REPO="lucylq/llama3_1B_lora"
+DOWNLOADED_PATH=$(
+ bash "$(dirname "${BASH_SOURCE[0]}")/download_hf_hub.sh" \
+ --model_id "${HF_MODEL_REPO}" \
+ --files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
+)
+# Build llama runner.
+cmake_install_executorch_libraries
+cmake_build_llama_runner
+
+# Constants.
+RUNTIME_ARGS="--tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
+PROMPT="What happens if you eat watermelon seeds?"
+EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
+
+# Export LoRA PTE file.
+MODEL_NAME="llama_3_2_1B_lora"
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
+ base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+ base.params="${DOWNLOADED_PATH}/params.json" \
+ base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+ base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+ base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+ model.use_kv_cache=true \
+ model.use_sdpa_with_kv_cache=true \
+ model.dtype_override="fp32" \
+ backend.xnnpack.enabled=true \
+ backend.xnnpack.extended_ops=true \
+ export.output_name="${MODEL_NAME}.pte"
+
+# Run llama runner
+NOW=$(date +"%H:%M:%S")
+echo "Starting to run llama runner at ${NOW}"
+# shellcheck source=/dev/null
+cmake-out/examples/models/llama/llama_main --model_path=${MODEL_NAME}.pte --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
+NOW=$(date +"%H:%M:%S")
+echo "Finished at ${NOW}"
+
+RESULT=$(cat result.txt)
+if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
+ echo "Expected result prefix: ${EXPECTED_PREFIX}"
+ echo "Actual result: ${RESULT}"
+ # Do not clean up files if test passes, as they're re-used in the next test.
+ echo "Success"
+else
+ echo "Expected result prefix: ${EXPECTED_PREFIX}"
+ echo "Actual result: ${RESULT}"
+ echo "Failure; results not the same"
+ cleanup_files
+ exit 1
+fi
+
+# Export LoRA PTE, PTD file.
+MODEL_SEPARATE="${MODEL_NAME}_separate"
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
+ base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+ base.params="${DOWNLOADED_PATH}/params.json" \
+ base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+ base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+ base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+ model.use_kv_cache=true \
+ model.use_sdpa_with_kv_cache=true \
+ model.dtype_override="fp32" \
+ backend.xnnpack.enabled=true \
+ backend.xnnpack.extended_ops=true \
+ export.output_name="${MODEL_SEPARATE}.pte" \
+ export.foundation_weights_file="${MODEL_SEPARATE}.ptd"
+
+# Run llama runner.
+NOW=$(date +"%H:%M:%S")
+echo "Starting to run llama runner at ${NOW}"
+# shellcheck source=/dev/null
+cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_path=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
+NOW=$(date +"%H:%M:%S")
+echo "Finished at ${NOW}"
+
+RESULT2=$(cat result2.txt)
+if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
+ echo "Expected result prefix: ${EXPECTED_PREFIX}"
+ echo "Actual result: ${RESULT2}"
+ echo "Success"
+ cleanup_files
+else
+ echo "Expected result prefix: ${EXPECTED_PREFIX}"
+ echo "Actual result: ${RESULT2}"
+ echo "Failure; results not the same"
+ cleanup_files
+ exit 1
+fi
diff --git a/.ci/scripts/test_llama_torchao_lowbit.sh b/.ci/scripts/test_llama_torchao_lowbit.sh
index ae8f74a5df5..5f472fad63b 100644
--- a/.ci/scripts/test_llama_torchao_lowbit.sh
+++ b/.ci/scripts/test_llama_torchao_lowbit.sh
@@ -29,27 +29,22 @@ cmake -DPYTHON_EXECUTABLE=python \
-DEXECUTORCH_ENABLE_LOGGING=1 \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
- -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
- -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
- -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_XNNPACK=OFF \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+ -DEXECUTORCH_BUILD_KERNELS_TORCHAO=ON \
+ -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
+ -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
-Bcmake-out .
-cmake --build cmake-out -j16 --target install --config Release
+cmake --build cmake-out -j16 --config Release --target install
# Install llama runner with torchao
cmake -DPYTHON_EXECUTABLE=python \
- -DBUILD_TESTING=OFF \
-DCMAKE_BUILD_TYPE=Release \
- -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
- -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
- -DEXECUTORCH_BUILD_XNNPACK=OFF \
- -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
- -DEXECUTORCH_BUILD_TORCHAO=ON \
-Bcmake-out/examples/models/llama \
examples/models/llama
cmake --build cmake-out/examples/models/llama -j16 --config Release
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
index a71fe85352d..035d30f6adb 100755
--- a/.ci/scripts/test_model.sh
+++ b/.ci/scripts/test_model.sh
@@ -166,34 +166,51 @@ test_model_with_qnn() {
export PYTHONPATH=$EXECUTORCH_ROOT/..
EXTRA_FLAGS=""
+ # Ordered by the folder name, then alphabetically by the model name
+ # Following models are inside examples/qualcomm/scripts folder
if [[ "${MODEL_NAME}" == "dl3" ]]; then
EXPORT_SCRIPT=deeplab_v3
- elif [[ "${MODEL_NAME}" == "mv3" ]]; then
- EXPORT_SCRIPT=mobilenet_v3
- elif [[ "${MODEL_NAME}" == "mv2" ]]; then
- EXPORT_SCRIPT=mobilenet_v2
- elif [[ "${MODEL_NAME}" == "ic4" ]]; then
- EXPORT_SCRIPT=inception_v4
+ elif [[ "${MODEL_NAME}" == "edsr" ]]; then
+ EXPORT_SCRIPT=edsr
+ # Additional deps for edsr
+ pip install piq
elif [[ "${MODEL_NAME}" == "ic3" ]]; then
EXPORT_SCRIPT=inception_v3
- elif [[ "${MODEL_NAME}" == "vit" ]]; then
- EXPORT_SCRIPT=torchvision_vit
+ elif [[ "${MODEL_NAME}" == "ic4" ]]; then
+ EXPORT_SCRIPT=inception_v4
elif [[ "${MODEL_NAME}" == "mb" ]]; then
EXPORT_SCRIPT=mobilebert_fine_tune
EXTRA_FLAGS="--num_epochs 1"
pip install scikit-learn
+ elif [[ "${MODEL_NAME}" == "mv2" ]]; then
+ EXPORT_SCRIPT=mobilenet_v2
+ elif [[ "${MODEL_NAME}" == "mv3" ]]; then
+ EXPORT_SCRIPT=mobilenet_v3
+ elif [[ "${MODEL_NAME}" == "vit" ]]; then
+ EXPORT_SCRIPT=torchvision_vit
elif [[ "${MODEL_NAME}" == "w2l" ]]; then
EXPORT_SCRIPT=wav2letter
elif [[ "${MODEL_NAME}" == "edsr" ]]; then
EXPORT_SCRIPT=edsr
# Additional deps for edsr
pip install piq
+ # Following models are inside examples/qualcomm/oss_scripts folder
+ elif [[ "${MODEL_NAME}" == "albert" ]]; then
+ EXPORT_SCRIPT=albert
+ elif [[ "${MODEL_NAME}" == "bert" ]]; then
+ EXPORT_SCRIPT=bert
+ elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
+ EXPORT_SCRIPT=conv_former
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
EXPORT_SCRIPT=cvt
+ elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
+ EXPORT_SCRIPT=distilbert
elif [[ "${MODEL_NAME}" == "dit" ]]; then
EXPORT_SCRIPT=dit
elif [[ "${MODEL_NAME}" == "efficientnet" ]]; then
EXPORT_SCRIPT=efficientnet
+ elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
+ EXPORT_SCRIPT=eurobert
elif [[ "${MODEL_NAME}" == "focalnet" ]]; then
EXPORT_SCRIPT=focalnet
elif [[ "${MODEL_NAME}" == "mobilevit_v1" ]]; then
@@ -202,18 +219,10 @@ test_model_with_qnn() {
EXPORT_SCRIPT=mobilevit_v2
elif [[ "${MODEL_NAME}" == "pvt" ]]; then
EXPORT_SCRIPT=pvt
- elif [[ "${MODEL_NAME}" == "swin" ]]; then
- EXPORT_SCRIPT=swin_transformer
- elif [[ "${MODEL_NAME}" == "albert" ]]; then
- EXPORT_SCRIPT=albert
- elif [[ "${MODEL_NAME}" == "bert" ]]; then
- EXPORT_SCRIPT=bert
- elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
- EXPORT_SCRIPT=distilbert
- elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
- EXPORT_SCRIPT=eurobert
elif [[ "${MODEL_NAME}" == "roberta" ]]; then
EXPORT_SCRIPT=roberta
+ elif [[ "${MODEL_NAME}" == "swin" ]]; then
+ EXPORT_SCRIPT=swin_transformer
else
echo "Unsupported model $MODEL_NAME"
exit 1
@@ -231,7 +240,7 @@ test_model_with_qnn() {
"cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
SCRIPT_FOLDER=oss_scripts
;;
- "albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
+ "albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
pip install evaluate
SCRIPT_FOLDER=oss_scripts
# 16bit models will encounter op validation fail on some operations,
diff --git a/.ci/scripts/test_qnn_static_llama.sh b/.ci/scripts/test_qnn_static_llama.sh
index a5f194ba0b9..d70eca81b69 100644
--- a/.ci/scripts/test_qnn_static_llama.sh
+++ b/.ci/scripts/test_qnn_static_llama.sh
@@ -33,12 +33,12 @@ echo "Creating tokenizer.bin"
$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
set +e
-# Compile only as weight sharing is not applicable on x86
-$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --llama_artifacts . --compile_only
+# Compile only as weight sharing is not applicable on x86.
+$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
exit_code1=$?
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
-$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --llama_artifacts . --enable_x86_64
+$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
exit_code2=$?
# Check BC
diff --git a/.ci/scripts/unittest-buck2.sh b/.ci/scripts/unittest-buck2.sh
index f56db8924be..f748be62ac1 100755
--- a/.ci/scripts/unittest-buck2.sh
+++ b/.ci/scripts/unittest-buck2.sh
@@ -11,9 +11,10 @@ set -eux
# TODO: can't query //kernels/prim_ops because of non-buckified stuff in OSS.
buck2 query "//backends/apple/... + //backends/example/... + \
//backends/mediatek/... + //backends/transforms/... + \
-//backends/xnnpack/... + //configurations/... + //kernels/aten/... + \
-//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
-//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
+//backends/xnnpack/... + //configurations/... + //extension/flat_tensor: + \
+//extension/llm/runner: + //kernels/aten/... + //kernels/optimized/... + \
+//kernels/portable/... + //kernels/quantized/... + //kernels/test/... + \
+//runtime/... + //schema/... + //test/... + //util/..."
# TODO: optimized ops are unbuildable because they now use ATen; put
# them back after we can use PyTorch in OSS buck.
diff --git a/.ci/scripts/unittest-macos-cmake.sh b/.ci/scripts/unittest-macos-cmake.sh
index cdb40c40244..1a6cd2a15f2 100755
--- a/.ci/scripts/unittest-macos-cmake.sh
+++ b/.ci/scripts/unittest-macos-cmake.sh
@@ -11,3 +11,4 @@ ${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml
# Run gtest
LLVM_PROFDATA="xcrun llvm-profdata" LLVM_COV="xcrun llvm-cov" \
${CONDA_RUN} test/run_oss_cpp_tests.sh
+${CONDA_RUN} test/check_for_installed_private_headers_in_cmake_out.sh
diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh
index 6902cc3dec1..f6f6ece786b 100644
--- a/.ci/scripts/utils.sh
+++ b/.ci/scripts/utils.sh
@@ -131,8 +131,6 @@ build_executorch_runner_cmake() {
else
CXXFLAGS=""
fi
- # This command uses buck2 to gather source files and buck2 could crash flakily
- # on MacOS
CXXFLAGS="$CXXFLAGS" retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" ..
popd || return
diff --git a/.ci/scripts/zephyr-utils.sh b/.ci/scripts/zephyr-utils.sh
index 2b36c6b0427..28dca2c1dfb 100644
--- a/.ci/scripts/zephyr-utils.sh
+++ b/.ci/scripts/zephyr-utils.sh
@@ -6,9 +6,9 @@
# LICENSE file in the root directory of this source tree.
download_arm_zephyr_sdk () {
- wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.16.0/zephyr-sdk-0.16.0_linux-x86_64.tar.xz
- tar -xf zephyr-sdk-0.16.0_linux-x86_64.tar.xz
- rm -f zephyr-sdk-0.16.0_linux-x86_64.tar.xz
+ wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.17.2/zephyr-sdk-0.17.2_linux-x86_64.tar.xz
+ tar -xf zephyr-sdk-0.17.2_linux-x86_64.tar.xz
+ rm -f zephyr-sdk-0.17.2_linux-x86_64.tar.xz
}
setup_zephyr_et_module () {
diff --git a/.github/workflows/add-unanswered-to-project.yml b/.github/workflows/add-unanswered-to-project.yml
new file mode 100644
index 00000000000..ba2bc6c8436
--- /dev/null
+++ b/.github/workflows/add-unanswered-to-project.yml
@@ -0,0 +1,93 @@
+name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
+
+on:
+ workflow_dispatch:
+ pull_request:
+ paths:
+ .github/workflows/add-unanswered-to-project.yml
+jobs:
+ add_to_project:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
+ uses: actions/github-script@v7
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ const projectId = "PVT_kwDOAUB9vs4A_PUL"; // PyTorch org project 136
+ const owner = 'pytorch';
+ const repo = 'executorch';
+
+ // List of authors to exclude
+ const excludedAuthors = new Set([
+ "nil-is-all", "cbilgin", "KimishPatel", "psiddh", "digantdesai", "SS-JIA", "ahmtox", "mcr229", "shoumikhin",
+ "manuelcandales", "metascroy", "cccclai", "rohansjoshi", "kirklandsign", "abhinaykukkadapu", "JacobSzwejbka",
+ "Conarnar", "lucylq", "larryliu0820", "BujSet", "Gasoonjia", "Juntian777", "guangy10", "jackzhxng",
+ "GregoryComer", "leafs1", "swolchok", "mergennachin", "tarun292", "byjlw", "jathu", "Jack-Khuu", "georgehong",
+ "zhenyan-zhang-meta", "silverguo", "dbort", "jorgep31415", "huydhn", "mcremon-meta", "trivedivivek", "angelayi",
+ "helunwencser", "hsharma35", "zhxchen17", "iseeyuan", "svekars", "nathanaelsee", "dulinriley", "jerryzh168",
+ "cmodi-meta", "bigfootjon", "sxu", "ydwu4", "Riandy", "tugsbayasgalan", "bsoyluoglu", "yangw-dev", "YIWENX14",
+ "namanahuja", "yushangdi", "limintang", "pianpwk", "viveknayakatmeta", "andreanicastro", "JakeStevens",
+ "gmagogsfm", "zonglinpeng", "eigen-k", "derekxu", "salilsdesai", "skrtskrtfb", "pssrawat", "r-barnes", "pytorchbot",
+ "pytorchmergebot", "pytorchupdatebot", "facebook-github-bot", "Erik-Lundell", "zingo", "AdrianLundell",
+ "oscarandersson8218", "per", "Sebastian-Larsson", "SaoirseARM", "robell", "mansnils", "martinlsm", "freddan80",
+ "YufengShi-dudu", "tom-arm", "perheld", "Jerry-Ge", "gggekov", "fumchin", "wwwind", "haowhsu-quic", "shewu-quic",
+ "winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "cymbalrush", "DenisVieriu97", "billmguo",
+ "StrycekSimon", "jirioc", "robert-kalmar", "skywall", "neuropilot-captain"
+ ]);
+
+ async function addItem(contentId, type, number) {
+ try {
+ await github.graphql(`
+ mutation {
+ addProjectV2ItemById(input: {projectId: "${projectId}", contentId: "${contentId}"}) {
+ item { id }
+ }
+ }
+ `);
+ console.log(`Added ${type} #${number} to project`);
+ } catch (error) {
+ if (error.message && error.message.includes("A project item already exists for this content")) {
+ // Ignore if already exists
+ console.log(`${type} #${number} already in project`);
+ } else {
+ console.log(`Error adding ${type} #${number}: ${error.message}`);
+ }
+ }
+ }
+
+ try {
+ // Add open issues (not PRs) and exclude by author
+ const issues = await github.paginate(
+ github.rest.issues.listForRepo,
+ {
+ owner,
+ repo,
+ state: 'open',
+ filter: 'all'
+ }
+ );
+ for (const issue of issues) {
+ if (!issue.pull_request && !excludedAuthors.has(issue.user.login)) {
+ await addItem(issue.node_id, 'issue', issue.number);
+ }
+ }
+
+ // Add open, non-draft PRs (regardless of review state), exclude by author
+ const prs = await github.paginate(
+ github.rest.pulls.list,
+ {
+ owner,
+ repo,
+ state: 'open',
+ draft: false,
+ }
+ );
+ for (const pr of prs) {
+ if (!excludedAuthors.has(pr.user.login)) {
+ await addItem(pr.node_id, 'pr', pr.number);
+ }
+ }
+ } catch (error) {
+ core.setFailed(`Workflow failed: ${error.message}`);
+ }
diff --git a/.github/workflows/android-release-artifacts.yml b/.github/workflows/android-release-artifacts.yml
index 9ef8d046b8b..278e5abcc5f 100644
--- a/.github/workflows/android-release-artifacts.yml
+++ b/.github/workflows/android-release-artifacts.yml
@@ -90,7 +90,7 @@ jobs:
fi
FLAVOR="${{ inputs.flavor }}"
- if [[ "$FLAVOR" == "vulkan+xnnpack" ]]; then
+ if [[ "$FLAVOR" == "vulkan+xnnpack" || -z "$FLAVOR" ]]; then
export EXECUTORCH_BUILD_VULKAN=ON
fi
diff --git a/.github/workflows/build-presets.yml b/.github/workflows/build-presets.yml
index 404e0d0e71e..6f983ba58b6 100644
--- a/.github/workflows/build-presets.yml
+++ b/.github/workflows/build-presets.yml
@@ -6,8 +6,6 @@ on:
branches:
- main
- release/*
- paths:
- - .github/workflows/build-presets.yml
workflow_dispatch:
concurrency:
@@ -20,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- preset: [macos, ios, ios-simulator, pybind, llm]
+ preset: [macos, ios, ios-simulator, pybind, profiling, llm]
with:
job-name: build
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -105,30 +103,3 @@ jobs:
./install_requirements.sh > /dev/null
cmake --preset ${{ matrix.preset }}
cmake --build cmake-out -j$(( $(nproc) - 1 ))
-
- windows:
- uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
- strategy:
- fail-fast: false
- matrix:
- preset: [pybind]
- with:
- job-name: build
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
- submodules: recursive
- timeout: 90
- script: |
- set -eux
- conda init powershell
- powershell -Command "& {
- \$ErrorActionPreference = 'Stop'
- Set-PSDebug -Trace 1
-
- conda create --yes --quiet -n et python=3.12
- conda activate et
-
- python install_requirements.py
- cmake --preset ${{ matrix.preset }}
- \$numCores = [System.Environment]::GetEnvironmentVariable('NUMBER_OF_PROCESSORS') - 1
- cmake --build cmake-out -j \$numCores
- }"
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 4658fdc0d26..c220b371c0a 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -36,3 +36,51 @@ jobs:
uses: ./.github/workflows/_link_check.yml
with:
ref: ${{ github.sha }}
+
+ backend-test-linux:
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ strategy:
+ fail-fast: false
+ matrix:
+ flow: [
+ qnn, qnn_16a16w, qnn_16a8w, qnn_16a4w, qnn_16a4w_block, qnn_8a8w,
+ vulkan, vulkan_static_int8_per_channel,
+ xnnpack, xnnpack_dynamic_int8_per_channel, xnnpack_static_int8_per_channel, xnnpack_static_int8_per_tensor
+ ]
+ suite: [models, operators]
+ with:
+ ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ runner: linux.4xlarge.memory
+ docker-image: ci-image:executorch-ubuntu-22.04-clang12
+ submodules: recursive
+ timeout: 120
+ upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
+ script: |
+ set -eux
+
+ source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
+
+ backend-test-macos:
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ permissions:
+ id-token: write
+ contents: read
+ strategy:
+ fail-fast: false
+ matrix:
+ flow: [coreml, coreml_static_int8]
+ suite: [models, operators]
+ with:
+ ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ runner: macos-m1-stable
+ python-version: 3.12
+ submodules: recursive
+ timeout: 120
+ upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
+ script: |
+ set -eux
+
+ # This is needed to get the prebuilt PyTorch wheel from S3
+ ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
+
+ source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml
index 89e1692df97..01bff087124 100644
--- a/.github/workflows/periodic.yml
+++ b/.github/workflows/periodic.yml
@@ -11,6 +11,8 @@ on:
branches:
- release/*
workflow_dispatch:
+ pull_request:
+ types: [opened, synchronize, reopened, labeled, unlabeled]
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }}
@@ -32,10 +34,11 @@ jobs:
python-version: '3.10'
- name: Extract the list of models to test
id: gather-models
+ env:
+ EFFECTIVE_EVENT: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'ciflow/periodic') && 'schedule' || github.event_name }}
run: |
set -eux
-
- PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "${GITHUB_EVENT_NAME}"
+ PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "${EFFECTIVE_EVENT}"
test-models-linux:
name: test-models-linux
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index b697b4166e0..aa7be5dfb68 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -315,7 +315,7 @@ jobs:
bash examples/models/moshi/mimi/install_requirements.sh
# reinstall executorch
- bash ./install_executorch.sh
+ bash ./install_executorch.sh --minimal
# run python unittest
python -m unittest examples.models.moshi.mimi.test_mimi
@@ -406,7 +406,7 @@ jobs:
output=$(ls -la cmake-out/test/size_test)
arr=($output)
size=${arr[4]}
- threshold="51744"
+ threshold="51752"
if [[ "$size" -le "$threshold" ]]; then
echo "Success $size <= $threshold"
else
@@ -687,6 +687,36 @@ jobs:
# run llama runner in eager mode
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
+ test-llama-lora-linux:
+ name: test-llama-lora-linux
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ permissions:
+ id-token: write
+ contents: read
+ strategy:
+ fail-fast: false
+ with:
+ runner: linux.24xlarge
+ docker-image: ci-image:executorch-ubuntu-22.04-clang12
+ submodules: 'recursive'
+ ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ timeout: 90
+ script: |
+ # The generic Linux job chooses to use base env, not the one setup by the image
+ CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+ conda activate "${CONDA_ENV}"
+
+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+ # Install llama requirements
+ bash examples/models/llama/install_requirements.sh
+
+ # install a recent version of torchtune.
+ PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250730 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+
+ # run llama runner in eager mode
+ PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh
+
test-mediatek-models-linux:
name: test-mediatek-models-linux
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -771,6 +801,8 @@ jobs:
id-token: write
contents: read
strategy:
+ matrix:
+ enable-etdump: ['', '--enable-etdump']
fail-fast: false
with:
runner: linux.2xlarge
@@ -790,7 +822,7 @@ jobs:
source .ci/scripts/setup-emscripten.sh
# Test selective build
- bash scripts/build_wasm_tests.sh
+ bash scripts/build_wasm_tests.sh ${{ matrix.enable-etdump }}
# Install Jest
cd cmake-out-wasm/extension/wasm/test
@@ -828,8 +860,46 @@ jobs:
# Run pytest
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
- # Run aot example:
- PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh
+ # Run aot examples:
+ PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh cifar10
+ PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh mobilenetv2
+
+
+ test-vulkan-models-linux:
+ name: test-vulkan-models-linux
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ permissions:
+ id-token: write
+ contents: read
+ with:
+ runner: linux.2xlarge
+ docker-image: ci-image:executorch-ubuntu-22.04-clang12
+ submodules: 'recursive'
+ ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ timeout: 90
+ script: |
+ set -eux
+
+ # The generic Linux job chooses to use base env, not the one setup by the image
+ CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+ conda activate "${CONDA_ENV}"
+
+ # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
+ source .ci/scripts/setup-vulkan-linux-deps.sh
+
+ # Setup python
+ PYTHON_EXECUTABLE=python \
+ CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
+ .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+ PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh --build
+
+ # Test models serially
+ models="mv2 mv3 edsr resnet18 resnet50 dl3"
+ for model in $models; do
+ python -m examples.vulkan.export --model_name=$model --test
+ done
+
nxp-build-test:
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 00000000000..ae7cbe6857b
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,149 @@
+# The behavior is:
+# - If a PR is not labeled stale, after 60 days inactivity label the PR as stale and comment about it.
+# - If a PR is labeled stale, after 30 days inactivity close the PR.
+# - `high priority` and `no-stale` PRs are exempt.
+
+name: Close stale pull requests
+
+on:
+ schedule:
+ # Run daily at 00:30 UTC.
+ - cron: '30 0 * * *'
+ workflow_dispatch:
+
+jobs:
+ stale:
+ if: ${{ github.repository == 'pytorch/executorch' }}
+ runs-on: linux.large
+ permissions:
+ contents: read
+ pull-requests: write
+
+ steps:
+ - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+ with:
+ script: |
+ // Do some dumb retries on requests.
+ const retries = 7;
+ const baseBackoff = 100;
+ const sleep = timeout => new Promise(resolve => setTimeout(resolve, timeout));
+ github.hook.wrap('request', async (request, options) => {
+ for (let attempt = 1; attempt <= retries; attempt++) {
+ try {
+ return await request(options);
+ } catch (err) {
+ if (attempt < retries) {
+ core.warning(`Request getting retried. Attempt: ${attempt}`);
+ await sleep(baseBackoff * Math.pow(2, attempt));
+ continue;
+ }
+ throw err;
+ }
+ }
+ });
+
+ const MAX_API_REQUESTS = 100;
+
+ // If a PRs not labeled stale, label them stale after no update for 60 days.
+ const STALE_LABEL_THRESHOLD_MS = 1000 * 60 * 60 * 24 * 60;
+ // For PRs already labeled stale, close after not update for 30 days.
+ const STALE_CLOSE_THRESHOLD_MS = 1000 * 60 * 60 * 24 * 30;
+
+ const STALE_MESSAGE =
+ "Looks like this PR hasn't been updated in a while so we're going to go ahead and mark this as `Stale`.
" +
+ "Feel free to remove the `Stale` label if you feel this was a mistake.
" +
+ "If you are unable to remove the `Stale` label please contact a maintainer in order to do so.
" +
+ "If you want the bot to never mark this PR stale again, add the `no-stale` label.
" +
+ "`Stale` pull requests will automatically be closed after 30 days of inactivity.
";
+
+ let numAPIRequests = 0;
+ let numProcessed = 0;
+
+ async function processPull(pull) {
+ core.info(`[${pull.number}] URL: ${pull.html_url}`);
+ numProcessed += 1;
+ const labels = pull.labels.map((label) => label.name);
+
+ // Skip if certain labels are present.
+ if (labels.includes("no-stale") || labels.includes("high priority")) {
+ core.info(`[${pull.number}] Skipping because PR has an exempting label.`);
+ return false;
+ }
+
+ // Check if the PR is stale, according to our configured thresholds.
+ let staleThresholdMillis;
+ if (labels.includes("Stale")) {
+ core.info(`[${pull.number}] PR is labeled stale, checking whether we should close it.`);
+ staleThresholdMillis = STALE_CLOSE_THRESHOLD_MS;
+ } else {
+ core.info(`[${pull.number}] Checking whether to label PR as stale.`);
+ staleThresholdMillis = STALE_LABEL_THRESHOLD_MS;
+ }
+
+ const millisSinceLastUpdated =
+ new Date().getTime() - new Date(pull.updated_at).getTime();
+
+ if (millisSinceLastUpdated < staleThresholdMillis) {
+ core.info(`[${pull.number}] Skipping because PR was updated recently`);
+ return false;
+ }
+
+ // At this point, we know we should do something.
+ // For PRs already labeled stale, close them.
+ if (labels.includes("Stale")) {
+ core.info(`[${pull.number}] Closing PR.`);
+ numAPIRequests += 1;
+ await github.rest.issues.update({
+ owner: "pytorch",
+ repo: "executorch",
+ issue_number: pull.number,
+ state: "closed",
+ });
+ } else {
+ // For PRs not labeled stale, label them stale.
+ core.info(`[${pull.number}] Labeling PR as stale.`);
+
+ numAPIRequests += 1;
+ await github.rest.issues.createComment({
+ owner: "pytorch",
+ repo: "executorch",
+ issue_number: pull.number,
+ body: STALE_MESSAGE,
+ });
+
+ numAPIRequests += 1;
+ await github.rest.issues.addLabels({
+ owner: "pytorch",
+ repo: "executorch",
+ issue_number: pull.number,
+ labels: ["Stale"],
+ });
+ }
+ }
+
+ for await (const response of github.paginate.iterator(
+ github.rest.pulls.list,
+ {
+ owner: "pytorch",
+ repo: "executorch",
+ state: "open",
+ sort: "created",
+ direction: "asc",
+ per_page: 100,
+ }
+ )) {
+ numAPIRequests += 1;
+ const pulls = response.data;
+ // Awaiting in a loop is intentional here. We want to serialize execution so
+ // that log groups are printed correctl
+ for (const pull of pulls) {
+ if (numAPIRequests > MAX_API_REQUESTS) {
+ core.warning("Max API requests exceeded, exiting.");
+ process.exit(0);
+ }
+ await core.group(`Processing PR #${pull.number}`, async () => {
+ await processPull(pull);
+ });
+ }
+ }
+ core.info(`Processed ${numProcessed} PRs total.`);
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 7cfd0ac5fc6..7162049ac02 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -55,48 +55,102 @@ jobs:
# Build and test executorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
- test-models-arm-zephyr:
- name: test-models-arm-zephyr
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
- strategy:
- matrix:
- model: [add]
- fail-fast: false
- with:
- runner: linux.2xlarge
- docker-image: ci-image:executorch-ubuntu-22.04-zephyr-sdk
- submodules: 'recursive'
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
- timeout: 120
- script: |
- MODEL_NAME=${{ matrix.model }}
- CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
- conda activate "${CONDA_ENV}"
-
- source .ci/scripts/utils.sh
- source .ci/scripts/zephyr-utils.sh
- mkdir -p zephyr_scratch/
- cd zephyr_scratch
- export ZEPHYR_PROJ_ROOT=$(realpath $(pwd))
-
- download_arm_zephyr_sdk
- ./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
-
- cd $ZEPHYR_PROJ_ROOT
- setup_zephyr_et_module
-
- cd $ZEPHYR_PROJ_ROOT/modules/lib/executorch
- install_executorch "--use-pt-pinned-commit"
- .ci/scripts/setup-arm-baremetal-tools.sh --target-toolchain zephyr
- source examples/arm/ethos-u-scratch/setup_path.sh
- source $ZEPHYR_PROJ_ROOT/zephyr/zephyr-env.sh
- cd $ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm/hello_world
- west build -p always -b mps3/corstone300/fvp
- FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf -C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 -C mps3_board.uart0.out_file='sim.out' -C cpu0.CFGITCMSZ=15 -C cpu0.CFGDTCMSZ=15 --simlimit 120
-
- grep -qF "Output[0][0]: (float) 2.000000" sim.out
- exit_status=$? #store 0 if found (success), 1 if not (failure)
- exit $exit_status
+# test-models-arm-zephyr:
+# name: test-models-arm-zephyr
+# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+# strategy:
+# matrix:
+# model: [add, softmax, mv2]
+# fail-fast: false
+# with:
+# runner: linux.2xlarge
+# docker-image: ci-image:executorch-ubuntu-22.04-zephyr-sdk
+# submodules: 'recursive'
+# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+# timeout: 120
+# script: |
+# MODEL_NAME=${{ matrix.model }}
+# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+# conda activate "${CONDA_ENV}"
+# if [[ ${{ matrix.model}} == "add" ]]; then
+# SIM_LIMIT_SEC=60
+# elif [[ ${{ matrix.model}} == "softmax" ]]; then
+# SIM_LIMIT_SEC=60
+# elif [[ ${{ matrix.model}} == "mv2" ]]; then
+# SIM_LIMIT_SEC=5000
+# else
+# echo "Failed unsupported model selection ${{ matrix.model }}"
+# exit 1
+# fi
+#
+# source .ci/scripts/utils.sh
+# source .ci/scripts/zephyr-utils.sh
+# mkdir -p zephyr_scratch/
+# cd zephyr_scratch
+# export ZEPHYR_PROJ_ROOT=$(realpath $(pwd))
+# export ARM_FVP_TUTORIALS_ROOT=$ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm-fvp-tutorials
+#
+# # TODO @Bujji: Should see if this can be moved into the docker image itself
+# download_arm_zephyr_sdk
+# ./zephyr-sdk-0.17.2/setup.sh -c -t arm-zephyr-eabi
+# cd $ZEPHYR_PROJ_ROOT
+# setup_zephyr_et_module
+#
+# # Run setup scripts for Arm FVP and Arm AOT Compilation
+# cd $ZEPHYR_PROJ_ROOT/modules/lib/executorch
+# install_executorch
+# .ci/scripts/setup-arm-baremetal-tools.sh --target-toolchain zephyr
+# source examples/arm/ethos-u-scratch/setup_path.sh
+# source $ZEPHYR_PROJ_ROOT/zephyr/zephyr-env.sh
+#
+# # Get the model as PTE
+# python -m examples.arm.aot_arm_compiler \
+# --model_name="${MODEL_NAME}" \
+# --output="${MODEL_NAME}.pte"
+#
+# # Generate the C-style header
+# cd $ARM_FVP_TUTORIALS_ROOT
+# python build_model.py \
+# --executorch-root $ZEPHYR_PROJ_ROOT/modules/lib/executorch \
+# --pte-file $ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte \
+# --output-path $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/src/
+#
+# cd $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/
+#
+# # Build the zephyr elf
+# west build -p always -b mps3/corstone300/fvp -- \
+# -DET_PTE_FILE_PATH_FOR_SELECTIVE_BUILD=$ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte
+#
+# # Run the simulation
+# FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf \
+# -C mps3_board.visualisation.disable-visualisation=1 \
+# -C mps3_board.telnetterminal0.start_telnet=0 \
+# -C mps3_board.uart0.out_file='sim.out' \
+# -C cpu0.CFGITCMSZ=15 \
+# -C cpu0.CFGDTCMSZ=15 \
+# --simlimit ${SIM_LIMIT_SEC}
+#
+# # Disable exit on error
+# set +e
+# # Report failure if any of the ouptut verification checks fail
+# grep -qF "ERROR" sim.out
+# exit_status=$? #store 0 if found (failure), 1 if not (success)
+# if [[ "$exit_status" -eq "0" ]]; then
+# cat sim.out
+# set -e
+# exit 1
+# fi
+#
+# # Report fail if simulation does not complete successfully
+# grep -qF "SUCCESS: Program complete, exiting." sim.out
+# exit_status=$? #store 0 if found (success), 1 if not (failure)
+# if [[ "$exit_status" -eq "1" ]]; then
+# cat sim.out
+# set -e
+# exit 1
+# fi
+# # Re-enable exit on error
+# set -e
test-models-linux-aarch64:
name: test-models-linux-aarch64
@@ -234,6 +288,7 @@ jobs:
- test_arm_baremetal: test_models_tosa
- test_arm_baremetal: test_models_ethos-u55
- test_arm_baremetal: test_models_ethos-u85
+ - test_arm_baremetal: test_smaller_stories_llama
fail-fast: false
with:
runner: linux.2xlarge.memory
@@ -285,12 +340,12 @@ jobs:
setup_script_args=""
if [[ ${{ matrix.os}} == "bare_metal" ]]; then
toolchain_prefix=arm-none-eabi-
- threshold="109000"
+ threshold="110592" # 108 KiB
toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
setup_script_args="--target-toolchain zephyr"
toolchain_prefix=arm-zephyr-eabi-
- threshold="135000"
+ threshold="135168" # 132 KiB
toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
else
echo "Fail unsupport OS selection ${{ matrix.os }}"
@@ -430,7 +485,7 @@ jobs:
eval "$(conda shell.bash hook)"
# Install requirements
- ${CONDA_RUN} EXECUTORCH_BUILD_TORCHAO=1 python install_executorch.py
+ ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 python install_executorch.py
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
# Run test
@@ -568,7 +623,7 @@ jobs:
strategy:
matrix:
dtype: [fp32]
- model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
+ model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
fail-fast: false
with:
runner: linux.2xlarge
@@ -677,10 +732,10 @@ jobs:
echo "::endgroup::"
done
- test-huggingface-transformers:
+ test-huggingface-transformers-xnnpack:
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
if: ${{ !github.event.pull_request.head.repo.fork }}
- name: test-huggingface-transformers
+ name: test-huggingface-transformers-xnnpack
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
@@ -688,12 +743,15 @@ jobs:
secrets: inherit
strategy:
matrix:
- hf_model_id: [
- google/gemma-3-1b-it,
- Qwen/Qwen3-0.6B,
- HuggingFaceTB/SmolLM2-135M,
- meta-llama/Llama-3.2-1B,
- allenai/OLMo-1B-hf,
+ config: [
+ # XNNPack.
+ llama3.2-1b|xnnpack|--quantize,
+ qwen3-0.6b|xnnpack|--quantize,
+ qwen3-1.7b|xnnpack|--quantize,
+ gemma3-1b|xnnpack|--quantize,
+ phi4-mini|xnnpack|--quantize,
+ smollm2-135m|xnnpack|--quantize,
+ smollm3-3b|xnnpack|--quantize
]
fail-fast: false
with:
@@ -705,6 +763,12 @@ jobs:
timeout: 90
upload-artifact: profiling-artifacts-${{ strategy.job-index }}
script: |
+ set -eux
+ IFS='|' read -r MODEL RECIPE QUANTIZE <<< "${{ matrix.config }}"
+ echo "Model: $MODEL"
+ echo "Recipe: $RECIPE"
+ echo "Quantize: $QUANTIZE"
+
echo "::group::Set up ExecuTorch"
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
@@ -742,63 +806,91 @@ jobs:
pip list
echo "::endgroup::"
- echo "::group::Export to ExecuTorch"
- # Pass matrix variable as environment variable
- export MODEL_ID="${{ matrix.hf_model_id }}"
- export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_kv_cache_8da4w"
- pushd optimum-executorch
-
- ARGS=(
- "--model" "${MODEL_ID}"
- "--task" "text-generation"
- "--recipe" "xnnpack"
- "--use_custom_sdpa"
- "--use_custom_kv_cache"
- "--qlinear" "8da4w"
- "--qembedding" "8w"
- "--output_dir" "${OUTPUT_DIR}"
- )
-
- optimum-cli export executorch "${ARGS[@]}"
-
- ls -FlAGhp ${OUTPUT_DIR}
- popd
+ echo "::group::Run tests"
+ export OUTPUT_DIR="$(pwd)/${MODEL}_${RECIPE}_${QUANTIZE}"
+ python .ci/scripts/test_huggingface_optimum_model.py --model ${MODEL} --recipe ${RECIPE} ${QUANTIZE} --model_dir ${OUTPUT_DIR}
echo "::endgroup::"
- echo "::group::Inference using python API"
- pushd optimum-executorch
- python -c "
- import os
- from optimum.executorch import ExecuTorchModelForCausalLM
- from transformers import AutoTokenizer
-
- model_id = os.getenv('MODEL_ID')
- pte_dir = os.getenv('OUTPUT_DIR')
- print(f'Loading model {model_id} from {pte_dir}.')
- model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
- generated_text = model.text_generation(
- tokenizer=AutoTokenizer.from_pretrained(model_id),
- prompt='Simply put, the theory of relativity states that',
- max_seq_len=64
- )
- print(generated_text)
- "
- popd
- echo "::endgroup::"
-
- echo "::group::Inference using executor_runner with ETDump"
+ echo "::group::Generate artifacts for performance profiling"
./cmake-out/executor_runner \
--model_path ${OUTPUT_DIR}/model.pte \
--etdump_path ${OUTPUT_DIR}/etdump.etdp
- export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
+ export TSV_PATH=artifacts-to-be-uploaded/${MODEL}_op_prof.tsv
mkdir -p $(dirname "$TSV_PATH")
python3 -m devtools.inspector.inspector_cli \
--etdump_path ${OUTPUT_DIR}/etdump.etdp \
--tsv_path ${TSV_PATH}
+ echo "::endgroup::"
+
+ test-huggingface-transformers-coreml:
+ # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+ if: ${{ !github.event.pull_request.head.repo.fork }}
+ name: test-huggingface-transformers-coreml
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ permissions:
+ id-token: write
+ contents: read
+ secrets: inherit
+ # Models below selected based on https://huggingface.co/models?pipeline_tag=text-generation&num_parameters=min:0,max:3B&sort=trending.
+ strategy:
+ matrix:
+ config: [
+ # # XNNPack. (Skipping for now due to intermittent segmentation faults, see https://github.com/huggingface/optimum-executorch/issues/122.)
+ # llama3.2-1b|xnnpack|--quantize,
+ # qwen3-0.6b|xnnpack|--quantize,
+ # qwen3-1.7b|xnnpack|--quantize,
+ # gemma3-1b|xnnpack|--quantize,
+ # phi4-mini|xnnpack|--quantize,
+ # smollm2-135m|xnnpack|--quantize,
+ # smollm3-3b|xnnpack|--quantize,
+ # CoreML.
+ llama3.2-1b|coreml_fp32_gpu|--quantize,
+ qwen3-0.6b|coreml_fp32_gpu|--quantize,
+ qwen3-1.7b|xnnpack|--quantize,
+ smollm2-135m|coreml_fp32_gpu|--quantize,
+ olmo-1b|coreml_fp32_gpu|--quantize,
+ bert|coreml_fp32_gpu|--quantize,
+ distilbert|coreml_fp32_gpu|--quantize
+ ]
+ fail-fast: false
+ with:
+ secrets-env: EXECUTORCH_HF_TOKEN
+ runner: macos-15-xlarge
+ python-version: '3.11'
+ submodules: 'recursive'
+ ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ timeout: 90
+ script: |
+ set -eux
+ IFS='|' read -r MODEL RECIPE QUANTIZE <<< "${{ matrix.config }}"
+ echo "Model: $MODEL"
+ echo "Recipe: $RECIPE"
+ echo "Quantize: $QUANTIZE"
+ echo "::group::Set up ExecuTorch"
+ bash .ci/scripts/setup-conda.sh
+ eval "$(conda shell.bash hook)"
+
+ # Install requirements
+ ${CONDA_RUN} python install_executorch.py
echo "::endgroup::"
+ echo "::group::Set up Hugging Face"
+ pip install -U "huggingface_hub[cli]"
+ huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+ OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+ git clone https://github.com/huggingface/optimum-executorch
+ pushd optimum-executorch
+ # There is no release yet, for CI stability, always test from the same commit on main
+ git checkout $OPTIMUM_ET_COMMIT
+ ${CONDA_RUN} python install_dev.py --skip_override_torch
+ popd
+ ${CONDA_RUN} pip list
+ echo "::endgroup::"
+
+ # Run test
+ ${CONDA_RUN} python .ci/scripts/test_huggingface_optimum_model.py --model ${MODEL} --recipe ${RECIPE} ${QUANTIZE}
test-llama-runner-qnn-linux:
name: test-llama-runner-qnn-linux
diff --git a/.gitignore b/.gitignore
index 08d14e13582..38029ba8458 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,10 +20,12 @@ dist/
ethos-u-scratch/
executorch.egg-info
pip-out/
+build-profiling/
# Any exported models and profiling outputs
*.bin
*.model
+*.etdump
tokenizer.json
*.pte
*.ptd
@@ -58,6 +60,8 @@ xcuserdata/
/include/
/share/
/version.py
+*.csv
+*_etdump
# Android
*.aar
diff --git a/.gitmodules b/.gitmodules
index 945ae5ed51e..5f4c5fca1d1 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,6 @@
[submodule "backends/arm/third-party/ethos-u-core-driver"]
path = backends/arm/third-party/ethos-u-core-driver
url = https://git.gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-core-driver.git
-[submodule "backends/arm/third-party/serialization_lib"]
- path = backends/arm/third-party/serialization_lib
- url = https://git.gitlab.arm.com/tosa/tosa-serialization.git
[submodule "backends/vulkan/third-party/Vulkan-Headers"]
path = backends/vulkan/third-party/Vulkan-Headers
url = https://github.com/KhronosGroup/Vulkan-Headers
diff --git a/.lintrunner.toml b/.lintrunner.toml
index 07227998c2c..c060836cb72 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -136,6 +136,36 @@ init_command = [
'--requirement=requirements-lintrunner.txt',
]
+[[linter]]
+code = 'CMAKEFORMAT'
+include_patterns = [
+ "**/*.cmake",
+ "**/*.cmake.in",
+ "**/CMakeLists.txt",
+]
+exclude_patterns = [
+ 'third-party/**',
+ '**/third-party/**',
+]
+command = [
+ 'python',
+ '-m',
+ 'lintrunner_adapters',
+ 'run',
+ 'cmake_format_linter',
+ '--',
+ '@{{PATHSFILE}}',
+]
+init_command = [
+ 'python',
+ '-m',
+ 'lintrunner_adapters',
+ 'run',
+ 'pip_init',
+ '--dry-run={{DRYRUN}}',
+ '--requirement=requirements-lintrunner.txt',
+]
+
[[linter]]
code = 'ETCAPITAL'
include_patterns = [
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb4c196668a..cbfea45b3c1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,10 @@
cmake_minimum_required(VERSION 3.29)
project(executorch)
+set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
+
include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
+include(${PROJECT_SOURCE_DIR}/tools/cmake/Codegen.cmake)
include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
include(CMakeDependentOption)
include(ExternalProject)
@@ -76,11 +79,6 @@ if(NOT PYTHON_EXECUTABLE)
endif()
announce_configured_options(PYTHON_EXECUTABLE)
-if(NOT BUCK2)
- resolve_buck2()
-endif()
-announce_configured_options(BUCK2)
-
announce_configured_options(CMAKE_CXX_COMPILER_ID)
announce_configured_options(CMAKE_TOOLCHAIN_FILE)
announce_configured_options(BUILD_TESTING)
@@ -123,8 +121,6 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
# Instead please use `find_package(executorch REQUIRED)` in the example
# directory and add a new executable in the example `CMakeLists.txt`.
-set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
-
if(NOT EXECUTORCH_ENABLE_LOGGING)
# Avoid pulling in the logging strings, which can be large. Note that this
# will set the compiler flag for all targets in this directory, and for all
@@ -278,6 +274,11 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
)
endif()
+if(EXECUTORCH_BUILD_TESTS)
+ set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
+ include(CTest)
+endif()
+
# TODO(dbort): Fix these warnings and remove this flag.
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
@@ -303,23 +304,15 @@ set(_common_include_directories
)
#
-# The `__srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
+# The `__srcs` lists are defined by executorch_load_build_variables.
#
-
-if(NOT EXECUTORCH_SRCS_FILE)
- # A file wasn't provided. Run a script to extract the source lists from the
- # buck2 build system and write them to a file we can include.
- #
- # NOTE: This will only happen once during cmake setup, so it will not re-run
- # if the buck2 targets change.
- message(STATUS "executorch: Generating source lists")
- set(EXECUTORCH_SRCS_FILE "${CMAKE_CURRENT_BINARY_DIR}/executorch_srcs.cmake")
- extract_sources(${EXECUTORCH_SRCS_FILE})
+if(EXECUTORCH_SRCS_FILE)
+ message(
+ WARNING
+ "EXECUTORCH_SRCS_FILE is no longer necessary and will not affect the build."
+ )
endif()
-
-# This file defines the `___srcs` variables used below.
-message(STATUS "executorch: Using sources file ${EXECUTORCH_SRCS_FILE}")
-include(${EXECUTORCH_SRCS_FILE})
+executorch_load_build_variables()
# Detect if an iOS toolchain is set.
if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
@@ -416,6 +409,12 @@ if(MAX_KERNEL_NUM)
)
endif()
+# Build devtools first if needed - some backends depend on protobuf from
+# devtools
+if(EXECUTORCH_BUILD_DEVTOOLS)
+ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/devtools)
+endif()
+
if(EXECUTORCH_BUILD_PYBIND AND APPLE)
# shared version
add_library(executorch_core_shared SHARED ${_executorch_core__srcs})
@@ -486,24 +485,29 @@ install(
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/core
FILES_MATCHING
PATTERN "*.h"
+ PATTERN "testing_util" EXCLUDE
)
install(
DIRECTORY runtime/executor/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/executor
FILES_MATCHING
PATTERN "*.h"
+ PATTERN "test" EXCLUDE
+ PATTERN "platform_memory_allocator.h" EXCLUDE
)
install(
DIRECTORY runtime/kernel/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/kernel
FILES_MATCHING
PATTERN "*.h"
+ PATTERN "test" EXCLUDE
)
install(
DIRECTORY runtime/platform/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/platform
FILES_MATCHING
PATTERN "*.h"
+ PATTERN "test" EXCLUDE
)
install(
DIRECTORY extension/kernel_util/
@@ -581,10 +585,6 @@ if(EXECUTORCH_BUILD_CORTEX_M)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cortex_m)
endif()
-if(EXECUTORCH_BUILD_DEVTOOLS)
- add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/devtools)
-endif()
-
if(EXECUTORCH_BUILD_EXTENSION_APPLE)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple)
list(APPEND _executorch_extensions apple_extension)
@@ -592,11 +592,15 @@ endif()
if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/data_loader)
+ if(NOT WIN32)
+ set(data_loader_exclude_pattern "*mman_windows.h")
+ endif()
install(
DIRECTORY extension/data_loader/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/extension/data_loader
FILES_MATCHING
PATTERN "*.h"
+ PATTERN ${data_loader_exclude_pattern} EXCLUDE
)
list(APPEND _executorch_extensions extension_data_loader)
endif()
@@ -677,6 +681,65 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
endif()
+if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
+ if(NOT TARGET cpuinfo)
+ message(
+ FATAL_ERROR
+ "EXECUTORCH_BUILD_KERNELS_TORCHAO requires EXECUTORCH_BUILD_CPUINFO be set ON"
+ )
+ endif()
+ if(NOT TARGET pthreadpool)
+ message(
+ FATAL_ERROR
+ "EXECUTORCH_BUILD_KERNELS_TORCHAO requires EXECUTORCH_BUILD_PTHREADPOOL be set ON"
+ )
+ endif()
+
+ # Configure TorchAO kernels
+ set(TORCHAO_BUILD_ATEN_OPS OFF)
+ set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
+ set(TORCHAO_BUILD_CPU_AARCH64 ON)
+ set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
+ set(TORCHAO_BUILD_KLEIDIAI ON)
+
+ # TorchAO kernels look for EXECUTORCH_INCLUDE_DIRS
+ if(DEFINED EXECUTORCH_INCLUDE_DIRS)
+ message(FATAL_ERROR "EXECUTORCH_INCLUDE_DIRS is already defined")
+ endif()
+ set(EXECUTORCH_INCLUDE_DIRS
+ ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
+ ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
+ )
+ add_subdirectory(
+ ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
+ )
+ unset(EXECUTORCH_INCLUDE_DIRS)
+
+ executorch_target_link_options_shared_lib(torchao_ops_executorch)
+ list(APPEND _executorch_kernels torchao_ops_executorch)
+
+ install(
+ TARGETS torchao_ops_executorch torchao_kernels_aarch64
+ EXPORT ExecuTorchTargets
+ DESTINATION lib
+ INCLUDES
+ DESTINATION ${_common_include_directories}
+ )
+ # If using KleidiAI and XNNPACK has not installed it already, install it
+ if(TORCHAO_BUILD_KLEIDIAI AND NOT (EXECUTORCH_BUILD_XNNPACK
+ AND EXECUTORCH_XNNPACK_ENABLE_KLEIDI)
+ )
+ install(
+ TARGETS kleidiai
+ EXPORT ExecuTorchTargets
+ DESTINATION lib
+ INCLUDES
+ DESTINATION ${_common_include_directories}
+ )
+ endif()
+
+endif()
+
if(EXECUTORCH_BUILD_PYBIND)
# Add codegen tools subdirectory for selective_build pybind module
@@ -690,6 +753,30 @@ if(EXECUTORCH_BUILD_PYBIND)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/devtools)
endif()
+ # Create bundled_module target only for pybindings when bundled_program exists
+ # This target has hard dependencies on devtools generated headers
+ if(TARGET bundled_program)
+ add_library(
+ bundled_module STATIC
+ ${CMAKE_CURRENT_SOURCE_DIR}/extension/module/bundled_module.cpp
+ )
+
+ # Ensure bundled_module waits for bundled_program's generated headers
+ add_dependencies(bundled_module bundled_program)
+
+ target_link_libraries(bundled_module PRIVATE extension_data_loader)
+ target_link_libraries(
+ bundled_module PUBLIC extension_module_static bundled_program
+ )
+
+ target_include_directories(
+ bundled_module PUBLIC ${_common_include_directories}
+ )
+ target_compile_options(
+ bundled_module PUBLIC -Wno-deprecated-declarations -fPIC
+ )
+ endif()
+
# find pytorch lib, to allow pybind to take at::Tensor as input/output
find_package_torch()
find_library(
@@ -707,6 +794,16 @@ if(EXECUTORCH_BUILD_PYBIND)
torch
)
+ if(EXECUTORCH_BUILD_EXTENSION_MODULE)
+ # Always use static linking for pybindings to avoid runtime symbol
+ # resolution issues
+ list(APPEND _dep_libs extension_module_static)
+ # Add bundled_module if available
+ if(TARGET bundled_module)
+ list(APPEND _dep_libs bundled_module)
+ endif()
+ endif()
+
if(EXECUTORCH_BUILD_TESTS)
list(APPEND _dep_libs test_backend_compiler_lib)
endif()
@@ -729,12 +826,20 @@ if(EXECUTORCH_BUILD_PYBIND)
list(APPEND _dep_libs openvino_backend)
endif()
+ if(EXECUTORCH_BUILD_QNN)
+ list(APPEND _dep_libs qnn_executorch_backend)
+ endif()
+
if(EXECUTORCH_BUILD_XNNPACK)
# need to explicitly specify XNNPACK and xnnpack-microkernels-prod here
# otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
list(APPEND _dep_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
endif()
+ if(EXECUTORCH_BUILD_VULKAN)
+ list(APPEND _dep_libs vulkan_backend)
+ endif()
+
# compile options for pybind
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
-fexceptions
@@ -775,6 +880,10 @@ if(EXECUTORCH_BUILD_WASM)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/wasm)
endif()
+if(EXECUTORCH_BUILD_TOKENIZERS_WASM)
+ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/wasm/tokenizers)
+endif()
+
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/training)
list(APPEND _executorch_extensions extension_training)
@@ -825,7 +934,7 @@ if(NOT EXECUTORCH_SELECT_OPS_YAML STREQUAL ""
LIB_NAME
"executorch_selected_kernels"
OPS_SCHEMA_YAML
- "${EXECUTORCH_SELECT_OPS_LIB}"
+ "${EXECUTORCH_SELECT_OPS_YAML}"
ROOT_OPS
"${EXECUTORCH_SELECT_OPS_LIST}"
INCLUDE_ALL_OPS
diff --git a/CMakePresets.json b/CMakePresets.json
index e637c73545c..bcf3bbc8d83 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -6,6 +6,36 @@
"hidden": true,
"binaryDir": "${sourceDir}/cmake-out"
},
+ {
+ "name": "android-arm64-v8a",
+ "displayName": "Build executorch core and JNI bindings on android arm64-v8a",
+ "inherits": ["common"],
+ "binaryDir": "${sourceDir}/cmake-out-android-arm64-v8a",
+ "cacheVariables": {
+ "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
+ "ANDROID_ABI": "arm64-v8a"
+ },
+ "condition": {
+ "type": "inList",
+ "string": "${hostSystemName}",
+ "list": ["Darwin", "Linux", "Windows"]
+ }
+ },
+ {
+ "name": "android-x86_64",
+ "displayName": "Build executorch core and JNI bindings on android x86_64",
+ "inherits": ["common"],
+ "binaryDir": "${sourceDir}/cmake-out-android-x86_64",
+ "cacheVariables": {
+ "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
+ "ANDROID_ABI": "x86_64"
+ },
+ "condition": {
+ "type": "inList",
+ "string": "${hostSystemName}",
+ "list": ["Darwin", "Linux", "Windows"]
+ }
+ },
{
"name": "macos",
"displayName": "Build ExecuTorch for macOS",
@@ -100,6 +130,41 @@
"list": ["Darwin", "Linux", "Windows"]
}
},
+ {
+ "name": "profiling",
+ "displayName": "Build ExecuTorch with Profiling Enabled",
+ "inherits": [
+ "common"
+ ],
+ "cacheVariables": {
+ "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/profiling.cmake",
+ "CMAKE_OSX_DEPLOYMENT_TARGET": "12.0"
+ },
+ "condition": {
+ "type": "inList",
+ "string": "${hostSystemName}",
+ "list": [
+ "Darwin",
+ "Linux",
+ "Windows"
+ ]
+ }
+ },
+ {
+ "name": "windows",
+ "displayName": "Build ExecuTorch for Windows",
+ "inherits": ["common"],
+ "cacheVariables": {
+ "CMAKE_SYSTEM_NAME": "Windows",
+ "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/windows.cmake"
+ },
+ "toolset": "ClangCL",
+ "condition": {
+ "lhs": "${hostSystemName}",
+ "type": "equals",
+ "rhs": "Windows"
+ }
+ },
{
"name": "zephyr",
"displayName": "Build ExecuTorch for Zephyr RTOS",
@@ -108,6 +173,15 @@
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/zephyr.cmake",
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake"
}
+ },
+ {
+ "name": "arm-baremetal",
+ "displayName": "Build ExecuTorch for Arm baremetal",
+ "inherits": ["common"],
+ "cacheVariables": {
+ "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/arm_baremetal.cmake",
+ "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake"
+ }
}
]
}
diff --git a/Package.swift b/Package.swift
index ba61d162527..3186284f5f6 100644
--- a/Package.swift
+++ b/Package.swift
@@ -84,6 +84,11 @@ let products = deliverables([
],
],
"kernels_quantized": [:],
+ "kernels_torchao": [
+ "targets": [
+ "threadpool",
+ ],
+ ],
])
let targets = deliverables([
diff --git a/backends/apple/coreml/TARGETS b/backends/apple/coreml/TARGETS
index 487bb2da4fa..22cb20d9065 100644
--- a/backends/apple/coreml/TARGETS
+++ b/backends/apple/coreml/TARGETS
@@ -17,6 +17,7 @@ runtime.python_library(
name = "backend",
srcs = glob([
"compiler/*.py",
+ "logging.py",
]),
visibility = [
"@EXECUTORCH_CLIENTS",
@@ -33,6 +34,7 @@ runtime.python_library(
name = "partitioner",
srcs = glob([
"partition/*.py",
+ "logging.py",
]),
visibility = [
"@EXECUTORCH_CLIENTS",
@@ -58,6 +60,26 @@ runtime.python_library(
],
)
+runtime.python_library(
+ name = "recipes",
+ srcs = glob([
+ "recipes/*.py",
+ ]),
+ visibility = [
+ "@EXECUTORCH_CLIENTS",
+ ],
+ deps = [
+ "fbsource//third-party/pypi/coremltools:coremltools",
+ ":backend",
+ "//caffe2:torch",
+ "//executorch/exir:lib",
+ "//executorch/exir/backend:compile_spec_schema",
+ "//executorch/exir/backend:partitioner",
+ "//executorch/exir/backend:utils",
+ "//executorch/export:lib",
+ ],
+)
+
runtime.cxx_python_extension(
name = "executorchcoreml",
srcs = [
@@ -98,10 +120,13 @@ runtime.python_test(
"test/*.py",
]),
deps = [
+ "fbsource//third-party/pypi/coremltools:coremltools",
"fbsource//third-party/pypi/pytest:pytest",
":partitioner",
":quantizer",
+ ":recipes",
"//caffe2:torch",
"//pytorch/vision:torchvision",
+ "fbsource//third-party/pypi/scikit-learn:scikit-learn",
],
)
diff --git a/backends/apple/coreml/compiler/coreml_preprocess.py b/backends/apple/coreml/compiler/coreml_preprocess.py
index bf390698705..edf7aa97241 100644
--- a/backends/apple/coreml/compiler/coreml_preprocess.py
+++ b/backends/apple/coreml/compiler/coreml_preprocess.py
@@ -16,8 +16,8 @@
import coremltools as ct
import coremltools.optimize as cto
-
from executorch.backends.apple.coreml import executorchcoreml
+from executorch.backends.apple.coreml.logging import get_coreml_log_level
from executorch.exir.backend.backend_details import (
BackendDetails,
ExportedProgram,
@@ -25,11 +25,11 @@
)
from executorch.exir.backend.compile_spec_schema import CompileSpec
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.WARNING)
-
from executorch.backends.apple.coreml.compiler.torch_ops import * # noqa: F401, F403
+logger = logging.getLogger(__name__)
+logger.setLevel(get_coreml_log_level(default_level=logging.WARNING))
+
class COMPILE_SPEC_KEYS(Enum):
COMPUTE_UNITS = "compute_units"
@@ -126,15 +126,18 @@ def model_compute_precision_from_compile_specs(
@staticmethod
def generate_minimum_deployment_target_compile_spec(
- min_deployment_target: ct.target,
+ min_deployment_target: Optional[ct.target],
) -> CompileSpec:
"""
Returns the compile spec representing the minimum deployment target on which the model can run,
for additional details please refer to the documentation for ``coremltools.target``.
"""
+ value = str("").encode("utf-8")
+ if min_deployment_target is not None:
+ value = str(min_deployment_target.value).encode("utf-8")
return CompileSpec(
COMPILE_SPEC_KEYS.MIN_DEPLOYMENT_TARGET.value,
- str(min_deployment_target.value).encode("utf-8"),
+ value,
)
@staticmethod
@@ -146,10 +149,13 @@ def min_deployment_target_from_compile_specs(
"""
for compile_spec in compile_specs:
if compile_spec.key == COMPILE_SPEC_KEYS.MIN_DEPLOYMENT_TARGET.value:
- compile_spec_value: int = int(compile_spec.value.decode("utf-8"))
+ value = compile_spec.value.decode("utf-8")
+ if value == "":
+ return None
+ compile_spec_value: int = int(value)
return ct.target(compile_spec_value)
- return ct.target.iOS15
+ return None
@staticmethod
def compute_unit_from_compile_specs(
@@ -211,7 +217,7 @@ def op_linear_quantizer_config_from_compile_specs(
@staticmethod
def generate_compile_specs(
compute_unit: ct.ComputeUnit = ct.ComputeUnit.ALL,
- minimum_deployment_target: ct.target = ct.target.iOS15,
+ minimum_deployment_target: Optional[ct.target] = None,
compute_precision: ct.precision = ct.precision.FLOAT16,
model_type: MODEL_TYPE = MODEL_TYPE.MODEL,
op_linear_quantizer_config: Optional[Dict] = None,
@@ -248,6 +254,13 @@ def model_metadata_from_spec(
input_names: List[str] = [input.name for input in model_spec.description.input]
output_names = [output.name for output in model_spec.description.output]
+ if len(output_names) == 0:
+ raise ValueError("Cannot lower a model with no outputs in CoreML.")
+ if len(input_names) == 0:
+ assert (
+ model_spec.specificationVersion >= 9
+ ), "Deploying a model with no inputs in CoreML requires you set minimum_deployment_target to iOS18 or later in the CoreMLPartitioner."
+
return ModelMetadata(
inputNames=input_names, outputNames=output_names, identifier=identifier
)
@@ -352,6 +365,12 @@ def preprocess_model(
dir_path: Path = Path("tmp") / identifier
model_dir_path: Path = dir_path / "lowered_module"
model_spec: ct.proto.Model_pb2 = mlmodel.get_spec()
+ logger.warning(
+ f"The model with identifier {identifier} was exported with CoreML specification version {model_spec.specificationVersion}, and it will not run on all version of iOS/macOS."
+ " See https://apple.github.io/coremltools/mlmodel/Format/Model.html#model for information on what OS versions are compatible with this specifcation version."
+ " If you want to control the deployment target, please set the minimum_deployment_target compile spec in the CoreMLPartitioner."
+ )
+
model_metadata: ModelMetadata = CoreMLBackend.model_metadata_from_spec(
model_spec=model_spec,
identifier=identifier,
@@ -409,6 +428,7 @@ def preprocess(
edge_program: ExportedProgram,
compile_specs: List[CompileSpec],
) -> PreprocessResult:
+ logger.info(f"Edge program: {edge_program}")
model_type: CoreMLBackend.MODEL_TYPE = (
CoreMLBackend.model_type_from_compile_specs(
compile_specs,
@@ -417,7 +437,7 @@ def preprocess(
model_compute_precision: ct.precision = (
CoreMLBackend.model_compute_precision_from_compile_specs(compile_specs)
)
- minimum_deployment_target: ct.target = (
+ minimum_deployment_target: Optional[ct.target] = (
CoreMLBackend.min_deployment_target_from_compile_specs(compile_specs)
)
compute_units: ct.ComputeUnit = CoreMLBackend.compute_unit_from_compile_specs(
diff --git a/backends/apple/coreml/compiler/torch_ops.py b/backends/apple/coreml/compiler/torch_ops.py
index 479d744a97e..e53670951e0 100644
--- a/backends/apple/coreml/compiler/torch_ops.py
+++ b/backends/apple/coreml/compiler/torch_ops.py
@@ -8,22 +8,25 @@
# coremltools than is used by ExecuTorch. Each op registered here should have a link to a PR in coremltools that adds
# the op to the coremltools library.
+import numpy as np
import torch as _torch
-from coremltools import _logger as logger
+from coremltools import _logger
from coremltools.converters.mil.frontend import _utils
from coremltools.converters.mil.frontend.torch.ops import (
_get_inputs,
+ _get_kwinputs,
NUM_TO_NUMPY_DTYPE,
NUM_TO_TORCH_DTYPE,
split,
+ to,
transpose,
unbind,
)
-
from coremltools.converters.mil.frontend.torch.torch_op_registry import (
register_torch_op,
)
from coremltools.converters.mil.mil import types
+from executorch.exir.dim_order_utils import get_memory_format
# https://github.com/apple/coremltools/pull/2556
@@ -44,6 +47,50 @@ def split_copy(context, node):
split(context, node)
+def is_fbcode():
+ return not hasattr(_torch.version, "git_version")
+
+
+if not is_fbcode():
+ from coremltools.converters.mil.frontend.torch.dim_order_ops import (
+ _empty_dim_order,
+ _to_dim_order_copy,
+ )
+
+ # This is a temporary hack to register the alias "dim_order_ops._to_dim_order_copy",
+ # which was missed by coremltools
+ @register_torch_op(torch_alias=["dim_order_ops._to_dim_order_copy"], override=False)
+ def _to_dim_order_copy_TMP_EXECUTORCH_ALIAS_HACK(context, node):
+ _to_dim_order_copy(context, node)
+
+ # This is a temporary hack to register the alias "dim_order_ops._empty_dim_order",
+ # which was missed by coremltools
+ @register_torch_op(torch_alias=["dim_order_ops._empty_dim_order"], override=False)
+ def _empty_dim_order_TMP_EXECUTORCH_ALIAS_HACK(context, node):
+ _empty_dim_order(context, node)
+
+else:
+ # TODO: remove this case when fbcode updates to coremltools 9.0
+ @register_torch_op(
+ torch_alias=[
+ "dim_order_ops::_to_dim_order_copy",
+ "dim_order_ops._to_dim_order_copy",
+ ],
+ override=False,
+ )
+ def _to_dim_order_copy(context, node):
+ dim_order = _get_kwinputs(context, node, "dim_order", default=[None])[0]
+ node.kwinputs.pop("dim_order")
+
+ # In CoreML, dim_order.val will be an ndarray, so we convert it to a list
+ dim_order = [int(d) for d in dim_order.val]
+ memory_format = get_memory_format(dim_order)
+ assert (
+ memory_format == _torch.contiguous_format
+ ), "Only contiguous memory format is supported in CoreML"
+ to(context, node)
+
+
# https://github.com/apple/coremltools/pull/2558
@register_torch_op(
torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
@@ -88,7 +135,7 @@ def dequantize_affine(context, node):
out_np_dtype = None
if len(inputs) > 7:
out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[7].val]
- logger.warning(
+ _logger.warning(
f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
)
@@ -109,3 +156,43 @@ def dequantize_affine(context, node):
name=node.name,
)
context.add(output, node.name)
+
+
+@register_torch_op(
+ torch_alias=["quant::dequantize_codebook", "quant.dequantize_codebook"],
+ override=False,
+)
+def dequantize_codebook(context, node):
+ inputs = _get_inputs(context, node, expected=[4, 5])
+ codes = inputs[0].val
+ codebook = inputs[1].val
+ nbits = inputs[2].val
+
+ # information in block_size is redundant with codebook.shape
+ block_size = inputs[3].val # noqa: F841
+
+ assert len(codes.shape) == 2, "Only rank 2 inputs are supported"
+
+ # Assert codebook is as expected. codebook.dim() = codes.dim() + 2
+ assert len(codebook.shape) == 4, "Only rank 4 inputs are supported for codebook"
+ assert codebook.shape[0] == 1, "Only grouped_channel granularity is supported"
+ n_luts = codebook.shape[1]
+ assert (
+ codes.shape[1] % n_luts == 0
+ ), "codes.shape[1] must be divisible by codebook.shape[1]"
+ assert codebook.shape[2] == 2**nbits
+ assert codebook.shape[3] == 1, "Only scalar look up values are supported"
+
+ if len(inputs) > 4:
+ output_dtype = inputs[4].val
+ out_np_dtype = NUM_TO_NUMPY_DTYPE[output_dtype]
+ _logger.warning(
+ f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
+ )
+
+ output = _utils._construct_constexpr_lut_op(
+ codes.astype(np.int8),
+ codebook,
+ name=node.name,
+ )
+ context.add(output, node.name)
diff --git a/backends/apple/coreml/logging.py b/backends/apple/coreml/logging.py
new file mode 100644
index 00000000000..2921e31e092
--- /dev/null
+++ b/backends/apple/coreml/logging.py
@@ -0,0 +1,24 @@
+# Copyright © 2023 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+import logging
+import os
+from typing import Optional
+
+
+def get_coreml_log_level(default_level: int) -> Optional[str]:
+ level_str = os.environ.get("ET_COREML_LOG_LEVEL", "").upper()
+ if level_str == "":
+ return default_level
+
+ level_map = {
+ "DEBUG": logging.DEBUG,
+ "INFO": logging.INFO,
+ "WARNING": logging.WARNING,
+ "ERROR": logging.ERROR,
+ "CRITICAL": logging.CRITICAL,
+ }
+ if level_str not in level_map:
+ raise ValueError(f"Invalid ET_COREML_LOG_LEVEL: {level_str}")
+ return level_map[level_str]
diff --git a/backends/apple/coreml/partition/coreml_partitioner.py b/backends/apple/coreml/partition/coreml_partitioner.py
index 8855a745166..93506e6d985 100644
--- a/backends/apple/coreml/partition/coreml_partitioner.py
+++ b/backends/apple/coreml/partition/coreml_partitioner.py
@@ -10,6 +10,8 @@
import torch
from executorch.backends.apple.coreml.compiler import CoreMLBackend
+
+from executorch.backends.apple.coreml.logging import get_coreml_log_level
from executorch.exir.backend.compile_spec_schema import CompileSpec
from executorch.exir.backend.partitioner import (
@@ -18,12 +20,13 @@
PartitionResult,
)
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
+from executorch.exir.dialects._ops import ops as exir_ops
from torch.export.exported_program import ExportedProgram
from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
from torch.fx.passes.operator_support import OperatorSupportBase
logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
+logger.setLevel(get_coreml_log_level(default_level=logging.INFO))
def _is_view_op(op: torch._ops.OpOverload) -> bool:
@@ -54,6 +57,80 @@ def log_once(self, msg: str) -> None:
logger.info(msg)
self._logged_msgs.add(msg)
+ def should_skip_op_for_delegation(self, node_target_name: str) -> bool:
+ skipped_ops = self.skip_ops_for_coreml_delegation or []
+ if node_target_name in skipped_ops:
+ assert (
+ not self.lower_full_graph
+ ), f"Cannot skip {node_target_name} because lower_full_graph is True. Please set skip_ops_for_coreml_delegation=None or lower_full_graph=False in the CoreMLPartitioner"
+ self.log_once(
+ "Skipping op for CoreML delegation because it is in skip_ops_for_coreml_delegation: "
+ + node_target_name
+ )
+ return True
+ return False
+
+ def should_override_support(self, node) -> bool:
+ # https://github.com/apple/coremltools/issues/2573
+ if (
+ node.target
+ in [
+ torch.ops.aten.sub.Tensor,
+ exir_ops.edge.aten.sub.Tensor,
+ torch.ops.aten.add.Tensor,
+ exir_ops.edge.aten.add.Tensor,
+ ]
+ and "alpha" in node.kwargs
+ and node.kwargs["alpha"] != 1
+ ):
+ self.log_once(
+ "torch.ops.aten.{sub, add}.Tensor with alpha != 1 is not supported by CoreML. Overriding support."
+ )
+ return True
+
+ # https://github.com/apple/coremltools/issues/2565
+ if node.target in [
+ torch.ops.aten.diagonal.default,
+ torch.ops.aten.diagonal_copy.default,
+ exir_ops.edge.aten.diagonal.default,
+ exir_ops.edge.aten.diagonal_copy.default,
+ ]:
+ self.log_once(
+ "torch.ops.aten.diagonal.default has a bug in CoreML. Overriding op support."
+ )
+ return True
+
+ # https://github.com/apple/coremltools/issues/2569
+ if node.target in [
+ torch.ops.aten.acosh.default,
+ exir_ops.edge.aten.acosh.default,
+ torch.ops.aten.asinh.default,
+ exir_ops.edge.aten.asinh.default,
+ ]:
+ self.log_once(
+ "torch.ops.aten.{acosh, asinh}.default is not supported by CoreML. Overriding op support."
+ )
+ return True
+
+ # TODO: enable this after bugs in ExecuTorch's partitioner are fixed
+ # # If lower_full_graph=False, do not partition nodes with symbolic args because it can result in symbolic args
+ # # in the placeholders due to partitioning, which CoreML does not support
+ # if not self.lower_full_graph and any(
+ # isinstance(arg, torch.fx.Node)
+ # and isinstance(
+ # arg.meta.get("val", None),
+ # (torch.SymInt, torch.SymBool, torch.SymFloat),
+ # )
+ # for arg in node.args
+ # ):
+ # self.log_once(
+ # "Skipping op for CoreML delegation because it contains symbolic args: "
+ # + node_target_name
+ # )
+ # return True
+
+ return False
+
def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
# get_attr node can always be supported on any backend
if node.op == "get_attr":
@@ -62,38 +139,17 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
elif node.op == "call_function":
# skip ops if specified by user
node_target_name = getattr(node.target, "__name__", "").lower()
- if node_target_name in (self.skip_ops_for_coreml_delegation or []):
- self.log_once(
- "Skipping op for CoreML delegation because it is in skip_ops_for_coreml_delegation: "
- + node_target_name
- )
- assert (
- not self.lower_full_graph
- ), "Cannot have skip_ops_for_coreml_delegation when lower_full_graph is True"
- return False
- # TODO: enable this after bugs in ExecuTorch's partitioner are fixed
- # # If lower_full_graph=False, do not partition nodes with symbolic args because it can result in symbolic args
- # # in the placeholders due to partitioning, which CoreML does not support
- # if not self.lower_full_graph and any(
- # isinstance(arg, torch.fx.Node)
- # and isinstance(
- # arg.meta.get("val", None),
- # (torch.SymInt, torch.SymBool, torch.SymFloat),
- # )
- # for arg in node.args
- # ):
- # self.log_once(
- # "Skipping op for CoreML delegation because it contains symbolic args: "
- # + node_target_name
- # )
- # assert not self.lower_full_graph
- # return False
+ if self.should_skip_op_for_delegation(node_target_name):
+ return False
# query coremltools to see if node is supported
is_supported = ct.converters.mil.frontend.torch.is_torch_fx_node_supported(
node
)
+ if self.should_override_support(node):
+ is_supported = False
+
if not is_supported:
if self.lower_full_graph:
raise NotImplementedError(
@@ -124,7 +180,6 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
class CoreMLPartitioner(Partitioner):
-
def __init__(
self,
*,
diff --git a/backends/apple/coreml/recipes/__init__.py b/backends/apple/coreml/recipes/__init__.py
new file mode 100644
index 00000000000..8bcd1c254a8
--- /dev/null
+++ b/backends/apple/coreml/recipes/__init__.py
@@ -0,0 +1,17 @@
+# Copyright © 2025 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+
+from executorch.export import recipe_registry
+
+from .coreml_recipe_provider import CoreMLRecipeProvider
+from .coreml_recipe_types import CoreMLRecipeType
+
+# Auto-register CoreML backend recipe provider
+recipe_registry.register_backend_recipe_provider(CoreMLRecipeProvider())
+
+__all__ = [
+ "CoreMLRecipeProvider",
+ "CoreMLRecipeType",
+]
diff --git a/backends/apple/coreml/recipes/coreml_recipe_provider.py b/backends/apple/coreml/recipes/coreml_recipe_provider.py
new file mode 100644
index 00000000000..90b798f9e0c
--- /dev/null
+++ b/backends/apple/coreml/recipes/coreml_recipe_provider.py
@@ -0,0 +1,392 @@
+# Copyright © 2025 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+
+from typing import Any, Optional, Sequence
+
+import coremltools as ct
+import torch
+
+from executorch.backends.apple.coreml.compiler import CoreMLBackend
+from executorch.backends.apple.coreml.partition.coreml_partitioner import (
+ CoreMLPartitioner,
+)
+from executorch.backends.apple.coreml.recipes.coreml_recipe_types import (
+ COREML_BACKEND,
+ CoreMLRecipeType,
+)
+
+from executorch.exir import EdgeCompileConfig
+from executorch.export import (
+ AOQuantizationConfig,
+ BackendRecipeProvider,
+ ExportRecipe,
+ LoweringRecipe,
+ QuantizationRecipe,
+ RecipeType,
+)
+from torchao.quantization.granularity import PerAxis, PerGroup
+from torchao.quantization.quant_api import IntxWeightOnlyConfig
+
+
+class CoreMLRecipeProvider(BackendRecipeProvider):
+ @property
+ def backend_name(self) -> str:
+ return COREML_BACKEND
+
+ def get_supported_recipes(self) -> Sequence[RecipeType]:
+ return list(CoreMLRecipeType)
+
+ def create_recipe(
+ self, recipe_type: RecipeType, **kwargs: Any
+ ) -> Optional[ExportRecipe]:
+ """Create CoreML recipe with precision and compute unit combinations"""
+
+ if recipe_type not in self.get_supported_recipes():
+ return None
+
+ if ct is None:
+ raise ImportError(
+ "coremltools is required for CoreML recipes. "
+ "Install it with: pip install coremltools"
+ )
+
+ # Validate kwargs
+ self._validate_recipe_kwargs(recipe_type, **kwargs)
+
+ if recipe_type == CoreMLRecipeType.FP32:
+ return self._build_fp_recipe(recipe_type, ct.precision.FLOAT32, **kwargs)
+ elif recipe_type == CoreMLRecipeType.FP16:
+ return self._build_fp_recipe(recipe_type, ct.precision.FLOAT16, **kwargs)
+ elif recipe_type == CoreMLRecipeType.PT2E_INT8_STATIC:
+ return self._build_pt2e_quantized_recipe(
+ recipe_type, activation_dtype=torch.quint8, **kwargs
+ )
+ elif recipe_type == CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY:
+ return self._build_pt2e_quantized_recipe(
+ recipe_type, activation_dtype=torch.float32, **kwargs
+ )
+ elif recipe_type == CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL:
+ return self._build_torchao_quantized_recipe(
+ recipe_type,
+ weight_dtype=torch.int4,
+ is_per_channel=True,
+ **kwargs,
+ )
+ elif recipe_type == CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP:
+ group_size = kwargs.pop("group_size", 32)
+ return self._build_torchao_quantized_recipe(
+ recipe_type,
+ weight_dtype=torch.int4,
+ is_per_channel=False,
+ group_size=group_size,
+ **kwargs,
+ )
+ elif recipe_type == CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL:
+ return self._build_torchao_quantized_recipe(
+ recipe_type, weight_dtype=torch.int8, is_per_channel=True, **kwargs
+ )
+ elif recipe_type == CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP:
+ group_size = kwargs.pop("group_size", 32)
+ return self._build_torchao_quantized_recipe(
+ recipe_type,
+ weight_dtype=torch.int8,
+ is_per_channel=False,
+ group_size=group_size,
+ **kwargs,
+ )
+ elif recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
+ bits = kwargs.pop("bits")
+ block_size = kwargs.pop("block_size")
+ return self._build_codebook_quantized_recipe(
+ recipe_type, bits=bits, block_size=block_size, **kwargs
+ )
+
+ return None
+
+ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> None:
+ """Validate kwargs for each recipe type"""
+ expected_keys = self._get_expected_keys(recipe_type)
+
+ unexpected = set(kwargs.keys()) - expected_keys
+ if unexpected:
+ raise ValueError(
+ f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
+ )
+
+ self._validate_base_parameters(kwargs)
+ self._validate_group_size_parameter(recipe_type, kwargs)
+ self._validate_codebook_parameters(recipe_type, kwargs)
+
+ def _get_expected_keys(self, recipe_type: RecipeType) -> set:
+ """Get expected parameter keys for a recipe type"""
+ common_keys = {"minimum_deployment_target", "compute_unit"}
+
+ if recipe_type in [
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
+ ]:
+ return common_keys | {"group_size", "filter_fn"}
+ elif recipe_type in [
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL,
+ ]:
+ return common_keys | {"filter_fn"}
+ elif recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
+ return common_keys | {"bits", "block_size", "filter_fn"}
+ else:
+ return common_keys
+
+ def _validate_base_parameters(self, kwargs: Any) -> None:
+ """Validate minimum_deployment_target and compute_unit parameters"""
+ if "minimum_deployment_target" in kwargs:
+ minimum_deployment_target = kwargs["minimum_deployment_target"]
+ if not isinstance(minimum_deployment_target, ct.target):
+ raise ValueError(
+ f"Parameter 'minimum_deployment_target' must be an enum of type ct.target, got {type(minimum_deployment_target)}"
+ )
+
+ if "compute_unit" in kwargs:
+ compute_unit = kwargs["compute_unit"]
+ if not isinstance(compute_unit, ct.ComputeUnit):
+ raise ValueError(
+ f"Parameter 'compute_unit' must be an enum of type ct.ComputeUnit, got {type(compute_unit)}"
+ )
+
+ def _validate_group_size_parameter(
+ self, recipe_type: RecipeType, kwargs: Any
+ ) -> None:
+ """Validate group_size parameter for applicable recipe types"""
+ if (
+ recipe_type
+ in [
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
+ ]
+ and "group_size" in kwargs
+ ):
+ group_size = kwargs["group_size"]
+ if not isinstance(group_size, int):
+ raise ValueError(
+ f"Parameter 'group_size' must be an integer, got {type(group_size).__name__}: {group_size}"
+ )
+ if group_size <= 0:
+ raise ValueError(
+ f"Parameter 'group_size' must be positive, got: {group_size}"
+ )
+
+ def _validate_codebook_parameters(
+ self, recipe_type: RecipeType, kwargs: Any
+ ) -> None:
+ """Validate bits and block_size parameters for codebook recipe type"""
+ if recipe_type != CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
+ return
+
+ # Both bits and block_size must be present
+ if not ("bits" in kwargs and "block_size" in kwargs):
+ raise ValueError(
+ "Parameters 'bits' and 'block_size' must be present for codebook recipes"
+ )
+
+ if "bits" in kwargs:
+ bits = kwargs["bits"]
+ if not isinstance(bits, int):
+ raise ValueError(
+ f"Parameter 'bits' must be an integer, got {type(bits).__name__}: {bits}"
+ )
+ if not (1 <= bits <= 8):
+ raise ValueError(
+ f"Parameter 'bits' must be between 1 and 8, got: {bits}"
+ )
+
+ if "block_size" in kwargs:
+ block_size = kwargs["block_size"]
+ if not isinstance(block_size, list):
+ raise ValueError(
+ f"Parameter 'block_size' must be a list, got {type(block_size).__name__}: {block_size}"
+ )
+
+ def _validate_and_set_deployment_target(
+ self, kwargs: Any, min_target: ct.target, quantization_type: str
+ ) -> None:
+ """Validate or set minimum deployment target for quantization recipes"""
+ minimum_deployment_target = kwargs.get("minimum_deployment_target", None)
+ if minimum_deployment_target and minimum_deployment_target < min_target:
+ raise ValueError(
+ f"minimum_deployment_target must be {str(min_target)} or higher for {quantization_type} quantization"
+ )
+ else:
+ # Default to the minimum target for this quantization type
+ kwargs["minimum_deployment_target"] = min_target
+
+ def _build_fp_recipe(
+ self,
+ recipe_type: RecipeType,
+ precision: ct.precision,
+ **kwargs: Any,
+ ) -> ExportRecipe:
+ """Build FP32/FP16 recipe"""
+ lowering_recipe = self._get_coreml_lowering_recipe(
+ compute_precision=precision,
+ **kwargs,
+ )
+
+ return ExportRecipe(
+ name=recipe_type.value,
+ lowering_recipe=lowering_recipe,
+ )
+
+ def _build_pt2e_quantized_recipe(
+ self,
+ recipe_type: RecipeType,
+ activation_dtype: torch.dtype,
+ **kwargs: Any,
+ ) -> ExportRecipe:
+ """Build PT2E-based quantization recipe"""
+ from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
+
+ self._validate_and_set_deployment_target(kwargs, ct.target.iOS17, "pt2e")
+
+ # Validate activation_dtype
+ assert activation_dtype in [
+ torch.quint8,
+ torch.float32,
+ ], f"activation_dtype must be torch.quint8 or torch.float32, got {activation_dtype}"
+
+ # Create quantization config
+ config = ct.optimize.torch.quantization.LinearQuantizerConfig(
+ global_config=ct.optimize.torch.quantization.ModuleLinearQuantizerConfig(
+ quantization_scheme="symmetric",
+ activation_dtype=activation_dtype,
+ weight_dtype=torch.qint8,
+ weight_per_channel=True,
+ )
+ )
+
+ quantizer = CoreMLQuantizer(config)
+ quantization_recipe = QuantizationRecipe(quantizers=[quantizer])
+
+ lowering_recipe = self._get_coreml_lowering_recipe(**kwargs)
+
+ return ExportRecipe(
+ name=recipe_type.value,
+ quantization_recipe=quantization_recipe,
+ lowering_recipe=lowering_recipe,
+ )
+
+ def _build_torchao_quantized_recipe(
+ self,
+ recipe_type: RecipeType,
+ weight_dtype: torch.dtype,
+ is_per_channel: bool,
+ group_size: int = 32,
+ **kwargs: Any,
+ ) -> ExportRecipe:
+ """Build TorchAO-based quantization recipe"""
+ if is_per_channel:
+ weight_granularity = PerAxis(axis=0)
+ else:
+ weight_granularity = PerGroup(group_size=group_size)
+
+ # Use user-provided filter_fn if provided
+ filter_fn = kwargs.get("filter_fn", None)
+ config = AOQuantizationConfig(
+ ao_base_config=IntxWeightOnlyConfig(
+ weight_dtype=weight_dtype,
+ granularity=weight_granularity,
+ ),
+ filter_fn=filter_fn,
+ )
+
+ quantization_recipe = QuantizationRecipe(
+ quantizers=None,
+ ao_quantization_configs=[config],
+ )
+
+ # override minimum_deployment_target to ios18 for torchao (GH issue #13122)
+ self._validate_and_set_deployment_target(kwargs, ct.target.iOS18, "torchao")
+ lowering_recipe = self._get_coreml_lowering_recipe(**kwargs)
+
+ return ExportRecipe(
+ name=recipe_type.value,
+ quantization_recipe=quantization_recipe,
+ lowering_recipe=lowering_recipe,
+ )
+
+ def _build_codebook_quantized_recipe(
+ self,
+ recipe_type: RecipeType,
+ bits: int,
+ block_size: list,
+ **kwargs: Any,
+ ) -> ExportRecipe:
+ """Build codebook/palettization quantization recipe"""
+ from torchao.prototype.quantization.codebook_coreml import (
+ CodebookWeightOnlyConfig,
+ )
+
+ self._validate_and_set_deployment_target(kwargs, ct.target.iOS18, "codebook")
+
+ # Get the appropriate dtype (torch.uint1 through torch.uint8)
+ dtype = getattr(torch, f"uint{bits}")
+
+ # Use user-provided filter_fn or default to Linear/Embedding layers
+ filter_fn = kwargs.get(
+ "filter_fn",
+ lambda m, fqn: (
+ isinstance(m, torch.nn.Embedding) or isinstance(m, torch.nn.Linear)
+ ),
+ )
+
+ config = AOQuantizationConfig(
+ ao_base_config=CodebookWeightOnlyConfig(
+ dtype=dtype,
+ block_size=block_size,
+ ),
+ filter_fn=filter_fn,
+ )
+
+ quantization_recipe = QuantizationRecipe(
+ quantizers=None,
+ ao_quantization_configs=[config],
+ )
+
+ lowering_recipe = self._get_coreml_lowering_recipe(**kwargs)
+
+ return ExportRecipe(
+ name=recipe_type.value,
+ quantization_recipe=quantization_recipe,
+ lowering_recipe=lowering_recipe,
+ )
+
+ def _get_coreml_lowering_recipe(
+ self,
+ compute_precision: ct.precision = ct.precision.FLOAT16,
+ **kwargs: Any,
+ ) -> LoweringRecipe:
+ """Get CoreML lowering recipe with optional precision"""
+ compile_specs = CoreMLBackend.generate_compile_specs(
+ compute_precision=compute_precision,
+ compute_unit=kwargs.get("compute_unit", ct.ComputeUnit.ALL),
+ minimum_deployment_target=kwargs.get("minimum_deployment_target", None),
+ )
+
+ minimum_deployment_target = kwargs.get("minimum_deployment_target", None)
+ take_over_mutable_buffer = True
+ if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
+ take_over_mutable_buffer = False
+
+ partitioner = CoreMLPartitioner(
+ compile_specs=compile_specs,
+ take_over_mutable_buffer=take_over_mutable_buffer,
+ )
+
+ edge_compile_config = EdgeCompileConfig(
+ _check_ir_validity=False,
+ _skip_dim_order=False,
+ )
+
+ return LoweringRecipe(
+ partitioners=[partitioner], edge_compile_config=edge_compile_config
+ )
diff --git a/backends/apple/coreml/recipes/coreml_recipe_types.py b/backends/apple/coreml/recipes/coreml_recipe_types.py
new file mode 100644
index 00000000000..fc7292c3c58
--- /dev/null
+++ b/backends/apple/coreml/recipes/coreml_recipe_types.py
@@ -0,0 +1,53 @@
+# Copyright © 2025 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+
+from executorch.export import RecipeType
+
+
+COREML_BACKEND: str = "coreml"
+
+
+class CoreMLRecipeType(RecipeType):
+ """CoreML-specific generic recipe types"""
+
+ ## All the recipes accept common kwargs
+ # 1. minimum_deployment_unit (default: None)
+ # 2. compute_unit (default: ct.ComputeUnit.ALL)
+
+ # FP32 precision recipe, defaults to values published by the CoreML backend and partitioner
+ FP32 = "coreml_fp32"
+
+ # FP16 precision recipe, defaults to values published by the CoreML backend and partitioner
+ FP16 = "coreml_fp16"
+
+ ## PT2E-based quantization recipes
+ # INT8 Static Quantization (weights + activations), requires calibration dataset
+ PT2E_INT8_STATIC = "coreml_pt2e_int8_static"
+ # INT8 Weight-only Quantization (activations remain FP32)
+ PT2E_INT8_WEIGHT_ONLY = "coreml_pt2e_int8_weight_only"
+
+ ## TorchAO-based quantization recipes
+ # All TorchAO recipes accept filter_fn kwarg to control which layers are quantized
+ # INT4 Weight-only Quantization, per-channel (axis=0)
+ # Additional kwargs: filter_fn (default: Embedding and linear layers)
+ TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL = "coreml_torchao_int4_weight_only_per_channel"
+ # INT4 Weight-only Quantization, per-group
+ # Additional kwargs: group_size (default: 32), filter_fn (default: Embedding and linear layers)
+ TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP = "coreml_torchao_int4_weight_only_per_group"
+ # INT8 Weight-only Quantization, per-channel (axis=0)
+ # Additional kwargs: filter_fn (default: Embedding and linear layers)
+ TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL = "coreml_torchao_int8_weight_only_per_channel"
+ # INT8 Weight-only Quantization, per-group
+ # Additional kwargs: group_size (default: 32), filter_fn (default: Embedding and linear layers)
+ TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP = "coreml_torchao_int8_weight_only_per_group"
+
+ ## Codebook/Palettization Quantization
+ # Additional mandatory kwargs: bits (range: 1-8), block_size (list of ints),
+ # filter_fn (default: targets Linear and Embedding layers)
+ CODEBOOK_WEIGHT_ONLY = "coreml_codebook_weight_only"
+
+ @classmethod
+ def get_backend_name(cls) -> str:
+ return COREML_BACKEND
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.h b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.h
index 11d957044e9..a9e06efa90d 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.h
@@ -99,6 +99,17 @@ NS_ASSUME_NONNULL_BEGIN
- (NSUInteger)compact:(NSUInteger)sizeInBytes error:(NSError* __autoreleasing*)error;
+/// Executes a block with a unique temporary directory.
+///
+/// A new temporary subdirectory URL is created inside the receiver’s designated
+/// base directory. The directory is passed to the block, which can use it to
+/// perform temporary file operations. After the block finishes executing,
+/// the directory and its contents are removed.
+///
+/// @param block A block to execute. The block receives a unique URL.
+- (void)withTemporaryDirectory:(void (^)(NSURL* directoryURL))block;
+
+
/// Purges the assets storage. The assets are moved to the trash directory and are asynchronously
/// deleted.
///
@@ -117,6 +128,12 @@ NS_ASSUME_NONNULL_BEGIN
/// contents are deleted asynchronously.
@property (copy, readonly, nonatomic) NSURL* trashDirectoryURL;
+
+/// The staging directory URL, used to hold assets that are being prepared or processed
+/// before they are moved into their final location. The contents of this directory
+/// are temporary and may be cleared when no longer needed.
+@property (copy, readonly, nonatomic) NSURL* stagingDirectoryURL;
+
/// The file manager.
@property (strong, readonly, nonatomic) NSFileManager* fileManager;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
index 256026e1f09..53c3d1cdc69 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
@@ -254,6 +254,29 @@ BOOL is_asset_alive(NSMapTable *assets_in_use_map,
return assets;
}
+
+NSURL * _Nullable move_to_directory(NSURL *url,
+ NSURL *directoryURL,
+ NSFileManager *fileManager,
+ NSError * __autoreleasing *error) {
+ if (!url) {
+ ETCoreMLLogErrorAndSetNSError(error, ETCoreMLErrorInternalError, "Move operation failed: source URL is nil.");
+ return nil;
+ }
+
+ if (!directoryURL) {
+ ETCoreMLLogErrorAndSetNSError(error, ETCoreMLErrorInternalError, "Move operation failed: destination URL is nil.");
+ return nil;
+ }
+
+ NSURL *dstURL = [directoryURL URLByAppendingPathComponent:[NSUUID UUID].UUIDString];
+ if (![fileManager moveItemAtURL:url toURL:dstURL error:error]) {
+ return nil;
+ }
+
+ return dstURL;
+}
+
} //namespace
@interface ETCoreMLAssetManager () {
@@ -299,12 +322,17 @@ - (nullable instancetype)initWithDatabase:(const std::shared_ptr&)data
if (!managedAssetsDirectoryURL) {
return nil;
}
-
+
NSURL *managedTrashDirectoryURL = ::create_directory_if_needed(trashDirectoryURL, @"models", fileManager, error);
if (!managedTrashDirectoryURL) {
return nil;
}
-
+
+ NSURL *managedStagingDirectoryURL = ::create_directory_if_needed(assetsDirectoryURL, @"staging", fileManager, error);
+ if (!managedStagingDirectoryURL) {
+ return nil;
+ }
+
// If directory is empty then purge the stores
if (::is_directory_empty(managedAssetsDirectoryURL, fileManager, nil)) {
assetsMetaStore.impl()->purge(ec);
@@ -315,6 +343,7 @@ - (nullable instancetype)initWithDatabase:(const std::shared_ptr&)data
_assetsStore = std::move(assetsStore);
_assetsMetaStore = std::move(assetsMetaStore);
_assetsDirectoryURL = managedAssetsDirectoryURL;
+ _stagingDirectoryURL = managedStagingDirectoryURL;
_trashDirectoryURL = managedTrashDirectoryURL;
_estimatedSizeInBytes = sizeInBytes.value();
_maxAssetsSizeInBytes = maxAssetsSizeInBytes;
@@ -346,15 +375,15 @@ - (nullable instancetype)initWithDatabaseURL:(NSURL *)databaseURL
error:error];
}
-- (nullable NSURL *)moveURL:(NSURL *)url
- toUniqueURLInDirectory:(NSURL *)directoryURL
- error:(NSError * __autoreleasing *)error {
- NSURL *dstURL = [directoryURL URLByAppendingPathComponent:[NSUUID UUID].UUIDString];
- if (![self.fileManager moveItemAtURL:url toURL:dstURL error:error]) {
- return nil;
+- (void)withTemporaryDirectory:(void (^)(NSURL *directoryURL))block {
+ NSURL *dstURL = [self.stagingDirectoryURL URLByAppendingPathComponent:[NSUUID UUID].UUIDString];
+ block(dstURL);
+ if (![self.fileManager fileExistsAtPath:dstURL.path]) {
+ return;
}
-
- return dstURL;
+
+ move_to_directory(dstURL, self.trashDirectoryURL, self.fileManager, nil);
+ [self cleanupTrashDirectory];
}
- (void)cleanupAssetIfNeeded:(ETCoreMLAsset *)asset {
@@ -407,9 +436,8 @@ - (nullable ETCoreMLAsset *)_storeAssetAtURL:(NSURL *)srcURL
return false;
}
- // If an asset exists move it
- [self moveURL:dstURL toUniqueURLInDirectory:self.trashDirectoryURL error:nil];
-
+ // If a file already exists at `dstURL`, move it to the trash for removal.
+ move_to_directory(dstURL, self.trashDirectoryURL, self.fileManager, nil);
// Move the asset to assets directory.
if (![self.fileManager moveItemAtURL:srcURL toURL:dstURL error:error]) {
return false;
@@ -433,16 +461,25 @@ - (nullable ETCoreMLAsset *)_storeAssetAtURL:(NSURL *)srcURL
}
- (void)triggerCompaction {
- if (self.estimatedSizeInBytes < self.maxAssetsSizeInBytes) {
- return;
+ if (self.estimatedSizeInBytes >= self.maxAssetsSizeInBytes) {
+ __weak __typeof(self) weakSelf = self;
+ dispatch_async(self.syncQueue, ^{
+ NSError *localError = nil;
+ if (![weakSelf _compact:self.maxAssetsSizeInBytes error:&localError]) {
+ ETCoreMLLogError(localError, "Failed to compact asset store.");
+ }
+ });
}
-
+
+ // Always clean the trash directory to ensure a minimal footprint.
+ // The `trashQueue` is serialized, so only one cleanup will run at a time.
+ [self cleanupTrashDirectory];
+}
+
+- (void)cleanupTrashDirectory {
__weak __typeof(self) weakSelf = self;
- dispatch_async(self.syncQueue, ^{
- NSError *localError = nil;
- if (![weakSelf _compact:self.maxAssetsSizeInBytes error:&localError]) {
- ETCoreMLLogError(localError, "Failed to compact asset store.");
- }
+ dispatch_async(self.trashQueue, ^{
+ [weakSelf removeFilesInTrashDirectory];
});
}
@@ -548,7 +585,7 @@ - (BOOL)_removeAssetWithIdentifier:(NSString *)identifier
NSURL *assetURL = ::get_asset_url(assetValue);
if ([self.fileManager fileExistsAtPath:assetURL.path] &&
- ![self moveURL:assetURL toUniqueURLInDirectory:self.trashDirectoryURL error:error]) {
+ !move_to_directory(assetURL, self.trashDirectoryURL, self.fileManager, error)) {
return false;
}
@@ -649,13 +686,7 @@ - (NSUInteger)_compact:(NSUInteger)sizeInBytes error:(NSError * __autoreleasing
identifier);
}
}
-
- // Trigger cleanup.
- __weak __typeof(self) weakSelf = self;
- dispatch_async(self.trashQueue, ^{
- [weakSelf removeFilesInTrashDirectory];
- });
-
+
return _estimatedSizeInBytes;
}
@@ -664,7 +695,10 @@ - (NSUInteger)compact:(NSUInteger)sizeInBytes error:(NSError * __autoreleasing *
dispatch_sync(self.syncQueue, ^{
result = [self _compact:sizeInBytes error:error];
});
-
+
+ // Always clean the trash directory to ensure a minimal footprint.
+ // The `trashQueue` is serialized, so only one cleanup will run at a time.
+ [self cleanupTrashDirectory];
return result;
}
@@ -708,7 +742,7 @@ - (BOOL)_purge:(NSError * __autoreleasing *)error {
}
// Move the the whole assets directory to the temp directory.
- if (![self moveURL:self.assetsDirectoryURL toUniqueURLInDirectory:self.trashDirectoryURL error:error]) {
+ if (!move_to_directory(self.assetsDirectoryURL, self.trashDirectoryURL, self.fileManager, error)) {
return false;
}
@@ -724,13 +758,7 @@ - (BOOL)_purge:(NSError * __autoreleasing *)error {
::set_error_from_error_code(ec, error);
// Trigger cleanup
- if (status) {
- __weak __typeof(self) weakSelf = self;
- dispatch_async(self.trashQueue, ^{
- [weakSelf removeFilesInTrashDirectory];
- });
- }
-
+ [self cleanupTrashDirectory];
return static_cast(status);
}
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm
index 05aa910d954..9e8ae04842e 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm
@@ -62,21 +62,12 @@ + (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL
if (model) {
return model;
}
-
- if (localError) {
- ETCoreMLLogError(localError,
- "Failed to load model from compiled asset with identifier = %@",
- identifier);
- }
-
- // If store failed then we will load the model from compiledURL.
- auto backingAsset = Asset::make(compiledModelURL, identifier, assetManager.fileManager, error);
- if (!backingAsset) {
- return nil;
+
+ if (error) {
+ *error = localError;
}
-
- asset = [[ETCoreMLAsset alloc] initWithBackingAsset:backingAsset.value()];
- return ::get_model_from_asset(asset, configuration, metadata, error);
+
+ return nil;
}
@end
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
index f4cfd2146ac..c27b42566dc 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
@@ -345,6 +345,10 @@ void add_compute_unit(std::string& identifier, MLComputeUnits compute_units) {
return [ETCoreMLModelDebugInfo modelDebugInfoFromData:file_data error:error];
}
+NSString *raw_model_identifier(NSString *identifier) {
+ return [NSString stringWithFormat:@"raw_%@", identifier];
+}
+
#endif
} //namespace
@@ -408,7 +412,7 @@ - (nullable ETCoreMLAsset *)assetWithIdentifier:(NSString *)identifier {
return modelAsset;
}
- NSError *localError = nil;
+ __block NSError *localError = nil;
modelAsset = [self.assetManager assetWithIdentifier:identifier error:&localError];
if (localError) {
ETCoreMLLogError(localError,
@@ -420,8 +424,9 @@ - (nullable ETCoreMLAsset *)assetWithIdentifier:(NSString *)identifier {
}
- (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
+ modelURL:(nullable NSURL *)modelURL
inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
- assetManager:(ETCoreMLAssetManager *)assetManager
+ dstURL:(NSURL *)dstURL
error:(NSError * __autoreleasing *)error {
auto modelAssetType = get_model_asset_type(inMemoryFS);
if (!modelAssetType) {
@@ -430,78 +435,132 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
"AOT blob is missing model file.");
return nil;
}
-
- NSURL *dstURL = [self.assetManager.trashDirectoryURL URLByAppendingPathComponent:[NSUUID UUID].UUIDString];
- NSURL *modelURL = ::write_model_files(dstURL, self.fileManager, identifier, modelAssetType.value(), inMemoryFS, error);
+
+ // If modelURL is not provided, write model files to the destination directory (dstURL)
+ // and obtain a URL pointing to them. Otherwise, use the provided modelURL.
+ modelURL = (modelURL == nil) ? ::write_model_files(dstURL, self.fileManager, identifier, modelAssetType.value(), inMemoryFS, error) : modelURL;
+ if (!modelURL) {
+ // Failed to generate or locate model files, return nil.
+ return nil;
+ }
+
+ // Handle based on the type of the model asset.
switch (modelAssetType.value()) {
case ModelAssetType::CompiledModel: {
- // Model is already compiled.
+ // The model is already compiled; no further action needed.
+ // Return the existing model URL.
return modelURL;
}
-
+
case ModelAssetType::Model: {
- // Compile the model.
+ // The model is not compiled yet.
+ // Compile the model at the specified URL with a maximum wait time of 5 minutes.
NSURL *compiledModelURL = [ETCoreMLModelCompiler compileModelAtURL:modelURL
maxWaitTimeInSeconds:(5 * 60)
error:error];
-
+ // Return the URL of the compiled model or nil if compilation fails.
return compiledModelURL;
}
}
}
-#if ET_EVENT_TRACER_ENABLED
-- (nullable id)modelExecutorWithMetadata:(const ModelMetadata&)metadata
- inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
- configuration:(MLModelConfiguration *)configuration
- error:(NSError * __autoreleasing *)error {
+- (nullable ETCoreMLAsset *)compiledModelAssetWithMetadata:(const ModelMetadata&)metadata
+ modelURL:(nullable NSURL *)modelURL
+ inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
+ error:(NSError * __autoreleasing *)error {
NSString *identifier = @(metadata.identifier.c_str());
- // Otherwise try to retrieve the compiled asset.
- ETCoreMLAsset *compiledModelAsset = [self assetWithIdentifier:identifier];
+ __block ETCoreMLAsset *compiledModelAsset = [self assetWithIdentifier:identifier];
if (compiledModelAsset) {
- ETCoreMLLogInfo("Cache Hit: Successfully retrieved model with identifier=%@ from the models cache.", identifier);
+ ETCoreMLLogInfo("Cache Hit: Successfully retrieved compiled model with identifier=%@ from the models cache.", identifier);
} else {
- ETCoreMLLogInfo("Cache Miss: Model with identifier=%@ was not found in the models cache.", identifier);
+ ETCoreMLLogInfo("Cache Miss: Compiled Model with identifier=%@ was not found in the models cache.", identifier);
}
-
- // Create a unique directory for writing model files.
- NSURL *dstURL = [self.assetManager.trashDirectoryURL URLByAppendingPathComponent:[NSUUID UUID].UUIDString];
- auto modelAssetType = get_model_asset_type(inMemoryFS);
- ETCoreMLAsset *modelAsset = nil;
- // Write the model files.
- if (modelAssetType == ModelAssetType::Model) {
- NSURL *modelURL = ::write_model_files(dstURL, self.fileManager, identifier, modelAssetType.value(), inMemoryFS, error);
- if (modelURL) {
- modelAsset = make_asset(modelURL,
- identifier,
- self.fileManager,
- error);
+
+ [self.assetManager withTemporaryDirectory:^(NSURL * _Nonnull directoryURL) {
+ if (compiledModelAsset) {
+ return;
}
- }
-
- if (!compiledModelAsset) {
- // Compile the model.
+
+ // The directory specified by `directoryURL` is unique and will be automatically cleaned up
+ // once the enclosing block completes.
NSURL *compiledModelURL = [self compiledModelURLWithIdentifier:identifier
+ modelURL:modelURL
inMemoryFS:inMemoryFS
- assetManager:self.assetManager
+ dstURL:directoryURL
error:error];
- compiledModelAsset = make_asset(compiledModelURL,
- identifier,
- self.fileManager,
- error);
- }
-
- if (!compiledModelAsset) {
- return nil;
+ if (compiledModelURL) {
+ // Move the compiled model to the asset manager to transfer ownership.
+ compiledModelAsset = [self.assetManager storeAssetAtURL:compiledModelURL withIdentifier:identifier error:error];
+ }
+ }];
+
+ return compiledModelAsset;
+}
+
+#if ET_EVENT_TRACER_ENABLED
+- (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadata
+ inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
+ error:(NSError * __autoreleasing *)error {
+ NSString *identifier = @(metadata.identifier.c_str());
+ NSString *rawIdentifier = raw_model_identifier(identifier);
+ __block ETCoreMLAsset *modelAsset = [self assetWithIdentifier:rawIdentifier];
+ if (modelAsset) {
+ ETCoreMLLogInfo("Cache Hit: Successfully retrieved model with identifier=%@ from the models cache.", identifier);
+ } else {
+ ETCoreMLLogInfo("Cache Miss: Model with identifier=%@ was not found in the models cache.", identifier);
}
-
+
+ [self.assetManager withTemporaryDirectory:^(NSURL * _Nonnull directoryURL) {
+ if (modelAsset) {
+ return;
+ }
+
+ auto modelAssetType = get_model_asset_type(inMemoryFS);
+ if (modelAssetType != ModelAssetType::Model) {
+ return;
+ }
+
+ // The directory specified by `directoryURL` is unique and will be automatically cleaned up
+ // once the enclosing block completes.
+ NSURL *modelURL = ::write_model_files(directoryURL,
+ self.fileManager,
+ identifier,
+ modelAssetType.value(),
+ inMemoryFS,
+ error);
+ if (modelURL) {
+ // Move the model to the asset manager to transfer ownership.
+ modelAsset = [self.assetManager storeAssetAtURL:modelURL withIdentifier:rawIdentifier error:error];
+ }
+ }];
+
+ return modelAsset;
+}
+
+- (nullable id)modelExecutorWithMetadata:(const ModelMetadata&)metadata
+ inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
+ configuration:(MLModelConfiguration *)configuration
+ error:(NSError * __autoreleasing *)error {
NSError *localError = nil;
- ETCoreMLModelDebugInfo *debug_info = get_model_debug_info(inMemoryFS, &localError);
+ ETCoreMLAsset *modelAsset = [self modelAssetWithMetadata:metadata inMemoryFS:inMemoryFS error:&localError];
if (localError) {
- ETCoreMLLogError(localError, "Failed to parse debug info file");
+ if (error) {
+ *error = localError;
+ }
+
+ return nil;
+ }
+
+ ETCoreMLAsset *compiledModelAsset = [self compiledModelAssetWithMetadata:metadata
+ modelURL:modelAsset.contentURL
+ inMemoryFS:inMemoryFS
+ error:error];
+ if (!compiledModelAsset) {
+ return nil;
}
-
+ ETCoreMLModelDebugInfo *debug_info = get_model_debug_info(inMemoryFS, error);
+ // The analyzer requires both the raw (uncompiled) asset and the compiled model asset to perform analysis.
return [[ETCoreMLModelAnalyzer alloc] initWithCompiledModelAsset:compiledModelAsset
modelAsset:modelAsset
modelDebugInfo:debug_info
@@ -510,41 +569,33 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
assetManager:self.assetManager
error:error];
}
-
#else
- (nullable id)modelExecutorWithMetadata:(const ModelMetadata&)metadata
inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
configuration:(MLModelConfiguration *)configuration
error:(NSError * __autoreleasing *)error {
- NSString *identifier = @(metadata.identifier.c_str());
- // Otherwise try to retrieve the compiled asset.
- ETCoreMLAsset *asset = [self assetWithIdentifier:identifier];
- ETCoreMLModel *model = asset ? get_model_from_asset(asset, configuration, metadata, error) : nil;
- if (model) {
- ETCoreMLLogInfo("Cache Hit: Successfully retrieved model with identifier=%@ from the models cache.", identifier);
- return [[ETCoreMLDefaultModelExecutor alloc] initWithModel:model];
+ ETCoreMLAsset *compiledModelAsset = [self compiledModelAssetWithMetadata:metadata
+ modelURL:nil
+ inMemoryFS:inMemoryFS
+ error:error];
+ if (!compiledModelAsset) {
+ return nil;
}
-
- ETCoreMLLogInfo("Cache Miss: Model with identifier=%@ was not found in the models cache.", identifier);
- // Compile the model.
- NSURL *compiledModelURL = [self compiledModelURLWithIdentifier:identifier
- inMemoryFS:inMemoryFS
- assetManager:self.assetManager
- error:error];
- if (!compiledModelURL) {
+
+ ETCoreMLModel *model = [ETCoreMLModelLoader loadModelWithContentsOfURL:compiledModelAsset.contentURL
+ configuration:configuration
+ metadata:metadata
+ assetManager:self.assetManager
+ error:error];
+ if (!model) {
return nil;
}
-
- model = [ETCoreMLModelLoader loadModelWithContentsOfURL:compiledModelURL
- configuration:configuration
- metadata:metadata
- assetManager:self.assetManager
- error:error];
-
+
return [[ETCoreMLDefaultModelExecutor alloc] initWithModel:model];
}
#endif
+
- (nullable id)_modelExecutorWithAOTData:(NSData *)data
configuration:(MLModelConfiguration *)configuration
error:(NSError * __autoreleasing *)error {
@@ -729,6 +780,7 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle
args.count);
return result;
}
+
NSError *localError = nil;
@autoreleasepool {
NSArray *inputs = [args subarrayWithRange:NSMakeRange(0, model.orderedInputNames.count)];
@@ -748,11 +800,11 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle
result = YES;
}
}
- if (!result) {
- if (error) {
- *error = localError;
- }
+
+ if (localError && error) {
+ *error = localError;
}
+
return result;
}
diff --git a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
index 9a0b4facc89..04a95e8a5a3 100644
--- a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
+++ b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
@@ -46,6 +46,7 @@
using executorch::runtime::get_backend_class;
using executorch::runtime::Result;
using executorch::aten::SizesType;
+using executorch::runtime::Span;
using executorch::aten::Tensor;
using executorch::runtime::kTensorDimensionLimit;
@@ -88,17 +89,17 @@
ET_LOG(Error, "%s: DataType=%d is not supported", ETCoreMLStrings.delegateIdentifier.UTF8String, (int)tensor.scalar_type());
return std::nullopt;
}
-
+
std::vector strides(tensor.strides().begin(), tensor.strides().end());
std::vector shape(tensor.sizes().begin(), tensor.sizes().end());
-
+
// If tensor is rank 0, wrap in rank 1
// See https://github.com/apple/coremltools/blob/8.2/coremltools/converters/mil/frontend/torch/exir_utils.py#L73
if (shape.size() == 0) {
shape.push_back(1);
strides.push_back(1);
}
-
+
MultiArray::MemoryLayout layout(dataType.value(), std::move(shape), std::move(strides));
switch (argType) {
case ArgType::Input: {
@@ -197,7 +198,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
Error CoreMLBackendDelegate::execute(BackendExecutionContext& context,
DelegateHandle* handle,
- EValue** args) const {
+ Span args) const {
const auto& nArgs = impl_->get_num_arguments(handle);
std::vector delegate_args;
size_t nInputs = nArgs.first;
@@ -281,9 +282,11 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
}
namespace {
-auto cls = CoreMLBackendDelegate();
-Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, &cls};
-static auto success_with_compiler = register_backend(backend);
+ #ifndef LAZY_LOAD_IOS_PYTORCH_INITIALIZER
+ auto cls = CoreMLBackendDelegate();
+ Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, &cls};
+ static auto success_with_compiler = register_backend(backend);
+ #endif
}
} // namespace coreml
diff --git a/backends/apple/coreml/runtime/delegate/executorch_operations.h b/backends/apple/coreml/runtime/delegate/executorch_operations.h
new file mode 100644
index 00000000000..4853c7645be
--- /dev/null
+++ b/backends/apple/coreml/runtime/delegate/executorch_operations.h
@@ -0,0 +1,5 @@
+#pragma once
+
+namespace executorch::core_ml_backend_delegate {
+void register_backend_coreml();
+} // namespace executorch::core_ml_backend_delegate
diff --git a/backends/apple/coreml/runtime/delegate/executorch_operations.mm b/backends/apple/coreml/runtime/delegate/executorch_operations.mm
new file mode 100644
index 00000000000..1206710d0a6
--- /dev/null
+++ b/backends/apple/coreml/runtime/delegate/executorch_operations.mm
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "executorch_operations.h"
+#import
+#import "ETCoreMLStrings.h"
+#import "backend_delegate.h"
+
+#import
+#import
+#import
+
+#include
+#import
+
+namespace executorch::core_ml_backend_delegate {
+ using executorch::runtime::get_backend_class;
+
+static std::unique_ptr backendInterfaceLazy_;
+
+void register_backend_coreml() {
+ auto backendInterface = executorch::runtime::get_backend_class(ETCoreMLStrings.delegateIdentifier.UTF8String);
+ if (backendInterface == nullptr) {
+ backendInterfaceLazy_ = std::make_unique();
+ executorch::runtime::Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, backendInterfaceLazy_.get()};
+ std::ignore = register_backend(backend);
+ }
+ }
+
+} // namespace executorch::core_ml_backend_delegate
diff --git a/backends/apple/coreml/runtime/delegate/model_metadata.h b/backends/apple/coreml/runtime/delegate/model_metadata.h
index 8d0c1f0914d..6b0f0807f9c 100644
--- a/backends/apple/coreml/runtime/delegate/model_metadata.h
+++ b/backends/apple/coreml/runtime/delegate/model_metadata.h
@@ -29,9 +29,7 @@ struct ModelMetadata {
inline ModelMetadata() noexcept { }
/// Returns `true` if the metadata is valid otherwise `false`.
- inline bool is_valid() const noexcept {
- return !identifier.empty() && !input_names.empty() && !output_names.empty();
- }
+ inline bool is_valid() const noexcept { return !identifier.empty() && !output_names.empty(); }
inline std::string to_json_string() const noexcept { return executorchcoreml::serde::json::to_json_string(*this); }
diff --git a/backends/apple/coreml/runtime/delegate/multiarray.mm b/backends/apple/coreml/runtime/delegate/multiarray.mm
index d38ac377799..447765bbd8d 100644
--- a/backends/apple/coreml/runtime/delegate/multiarray.mm
+++ b/backends/apple/coreml/runtime/delegate/multiarray.mm
@@ -123,6 +123,12 @@ bool init_bnns_descriptor(BNNSNDArrayDescriptor& bnns_descriptor, const MultiArr
}
bool copy_using_bnns(const MultiArray& src, MultiArray& dst) {
+ if (src.layout().dataType() != dst.layout().dataType()) {
+ // Copying from FP16 to FP32 is supported and this is a common use case
+ if (!(src.layout().dataType() == MultiArray::DataType::Float16 && dst.layout().dataType() == MultiArray::DataType::Float32)) {
+ return false;
+ }
+ }
if (dst.layout().num_bytes() < src.layout().num_bytes()) {
return false;
}
diff --git a/backends/apple/coreml/runtime/include/coreml_backend/delegate.h b/backends/apple/coreml/runtime/include/coreml_backend/delegate.h
index ec402e81717..39075e97a75 100644
--- a/backends/apple/coreml/runtime/include/coreml_backend/delegate.h
+++ b/backends/apple/coreml/runtime/include/coreml_backend/delegate.h
@@ -48,7 +48,7 @@ class CoreMLBackendDelegate final : public ::executorch::runtime::BackendInterfa
/// @retval On success, `Error::Ok` otherwise any other `Error` case.
executorch::runtime::Error execute(executorch::runtime::BackendExecutionContext& context,
executorch::runtime::DelegateHandle* handle,
- executorch::runtime::EValue** args) const override;
+ executorch::runtime::Span args) const override;
/// Returns `true` if the delegate is available otherwise `false`.
bool is_available() const override;
diff --git a/backends/apple/coreml/scripts/install_requirements.sh b/backends/apple/coreml/scripts/install_requirements.sh
index e9f73105bcd..5ec1ea6a1de 100755
--- a/backends/apple/coreml/scripts/install_requirements.sh
+++ b/backends/apple/coreml/scripts/install_requirements.sh
@@ -12,7 +12,7 @@ SCRIPT_DIR_PATH="$(
# TODO(jathu): remove the need to fetch coremltools to build deps for coreml_executor_runner.
# Keep this version in sync with: pyproject.toml
-COREMLTOOLS_VERSION="8.3"
+COREMLTOOLS_VERSION="9.0b1"
red=`tput setaf 1`
green=`tput setaf 2`
diff --git a/backends/apple/coreml/test/test_coreml_recipes.py b/backends/apple/coreml/test/test_coreml_recipes.py
new file mode 100644
index 00000000000..7a78836b2bc
--- /dev/null
+++ b/backends/apple/coreml/test/test_coreml_recipes.py
@@ -0,0 +1,574 @@
+# Copyright © 2025 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+
+import unittest
+
+import coremltools as ct
+import torch
+
+from executorch.backends.apple.coreml.recipes import (
+ CoreMLRecipeProvider,
+ CoreMLRecipeType,
+)
+
+from executorch.backends.apple.coreml.test.test_coreml_utils import (
+ IS_VALID_TEST_RUNTIME,
+)
+from executorch.exir.schema import DelegateCall
+from executorch.export import export, ExportRecipe, recipe_registry, StageType
+
+from torch import nn
+from torch.testing._internal.common_quantization import TestHelperModules
+from torchao.quantization.utils import compute_error
+
+
+class TestCoreMLRecipes(unittest.TestCase):
+ """Test suite for CoreML recipes focusing on quantization functionality"""
+
+ def setUp(self):
+ torch._dynamo.reset()
+ super().setUp()
+ self.provider = CoreMLRecipeProvider()
+ # Register the provider for recipe registry tests
+ recipe_registry.register_backend_recipe_provider(CoreMLRecipeProvider())
+
+ def tearDown(self):
+ super().tearDown()
+
+ def check_fully_delegated(self, session) -> None:
+ """Helper to verify a program is fully delegated to CoreML"""
+ session.print_delegation_info()
+ program = session.get_executorch_program()
+ instructions = program.execution_plan[0].chains[0].instructions
+ assert instructions is not None
+ self.assertEqual(len(instructions), 1)
+ self.assertIsInstance(instructions[0].instr_args, DelegateCall)
+
+ def _compare_eager_quantized_model_outputs(self, session, example_inputs, atol):
+ """Utility to compare eager quantized model output with session output after coreml lowering"""
+ if IS_VALID_TEST_RUNTIME:
+ source_transform_output = session.get_stage_artifacts()[
+ StageType.SOURCE_TRANSFORM
+ ]
+ eager_quantized_model = source_transform_output.data["forward"]
+ output = session.run_method("forward", example_inputs[0])[0]
+ expected = eager_quantized_model(*example_inputs[0])
+ self.assertTrue(torch.allclose(output, expected, atol=atol))
+
+ def _compare_eager_unquantized_model_outputs(
+ self, session, eager_unquantized_model, example_inputs, sqnr_threshold=20
+ ):
+ """Utility to compare eager unquantized model output with session output using SQNR"""
+ if IS_VALID_TEST_RUNTIME:
+ quantized_output = session.run_method("forward", example_inputs[0])[0]
+ original_output = eager_unquantized_model(*example_inputs[0])
+ error = compute_error(original_output, quantized_output)
+ print(f"SQNR: {error} dB")
+ self.assertTrue(error > sqnr_threshold)
+
+ def test_fp32_recipe(self):
+ """Test FP32 recipe functionality"""
+ model = TestHelperModules.TwoLinearModule().eval()
+ example_inputs = [(torch.randn(9, 8),)]
+
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(CoreMLRecipeType.FP32),
+ )
+ self.check_fully_delegated(session)
+
+ self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-3)
+ self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+ def test_fp16_recipe(self):
+ """Test FP16 recipe functionality"""
+ model = TestHelperModules.TwoLinearModule().eval()
+ example_inputs = [(torch.randn(9, 8),)]
+
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(CoreMLRecipeType.FP16),
+ )
+ self.check_fully_delegated(session)
+
+ self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-3)
+ self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+ def test_fp_recipes_with_custom_parameters(self):
+ """Test FP recipes with custom deployment target and compute unit"""
+ test_cases = [
+ (CoreMLRecipeType.FP32, {"minimum_deployment_target": ct.target.iOS16}),
+ (CoreMLRecipeType.FP16, {"compute_unit": ct.ComputeUnit.CPU_ONLY}),
+ ]
+
+ model = TestHelperModules.TwoLinearModule().eval()
+ example_inputs = [(torch.randn(9, 8),)]
+
+ for recipe_type, kwargs in test_cases:
+ with self.subTest(recipe=recipe_type.value, kwargs=kwargs):
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(recipe_type, **kwargs),
+ )
+ self.check_fully_delegated(session)
+
+ def test_int4_weight_only_per_channel(self):
+ """Test INT4 weight-only per-channel quantization"""
+ model = TestHelperModules.TwoLinearModule().eval()
+ example_inputs = [(torch.randn(9, 8),)]
+
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL
+ ),
+ )
+ self.check_fully_delegated(session)
+ self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-02)
+ self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+ def test_int4_weight_only_per_group(self):
+ """Test INT4 weight-only per-group quantization with different group sizes"""
+
+ class CustomTwoLinearModel(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.layer1 = nn.Linear(32, 32)
+ self.layer2 = nn.Linear(32, 8)
+
+ def forward(self, x):
+ x = torch.relu(self.layer1(x))
+ x = self.layer2(x)
+ return x
+
+ model = CustomTwoLinearModel().eval()
+ example_inputs = [(torch.randn(1, 32),)]
+ # Test with different group sizes
+ for group_size in [8, 16, 32]:
+ with self.subTest(group_size=group_size):
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
+ group_size=group_size,
+ ),
+ )
+ self.check_fully_delegated(session)
+
+ self._compare_eager_quantized_model_outputs(
+ session, example_inputs, atol=1e-3
+ )
+ self._compare_eager_unquantized_model_outputs(
+ session, model, example_inputs
+ )
+
+ def test_int4_weight_only_per_group_validation(self):
+ """Test INT4 per-group parameter validation"""
+ # Test invalid group size type
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP, group_size="32"
+ )
+ self.assertIn("must be an integer", str(cm.exception))
+
+ # Test negative group size
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP, group_size=-1
+ )
+ self.assertIn("must be positive", str(cm.exception))
+
+ # Test unexpected parameter
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
+ group_size=32, # group_size not valid for per-channel
+ )
+ self.assertIn("unexpected parameters", str(cm.exception))
+
+ def test_int8_weight_only_per_channel(self):
+ """Test INT8 weight-only per-channel quantization"""
+ model = TestHelperModules.TwoLinearModule().eval()
+ example_inputs = [(torch.randn(9, 8),)]
+
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL
+ ),
+ )
+ self.check_fully_delegated(session)
+
+ self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
+ self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+ def test_int8_weight_only_per_group(self):
+ """Test INT8 weight-only per-group quantization with different group sizes"""
+
+ class SimpleLinearModel(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.layer = nn.Linear(64, 2)
+
+ def forward(self, x):
+ return self.layer(x)
+
+ model = SimpleLinearModel().eval()
+ example_inputs = [(torch.randn(1, 64),)]
+
+ # Test with different group sizes
+ for group_size in [16, 32, 64]:
+ with self.subTest(group_size=group_size):
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
+ group_size=group_size,
+ ),
+ )
+ self.check_fully_delegated(session)
+
+ self._compare_eager_quantized_model_outputs(
+ session, example_inputs, atol=1e-2
+ )
+ self._compare_eager_unquantized_model_outputs(
+ session, model, example_inputs
+ )
+
+ def test_codebook_weight_only_recipe(self):
+ """Test codebook quantization recipe"""
+
+ class SimpleLinearModel(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.layer = nn.Linear(32, 2)
+
+ def forward(self, x):
+ return self.layer(x)
+
+ model = SimpleLinearModel().eval()
+ example_inputs = [(torch.randn(1, 32),)]
+
+ # Test different block sizes
+ test_cases = [
+ {"bits": 3, "block_size": [-1, 8]},
+ ]
+
+ for kwargs in test_cases:
+ with self.subTest(kwargs=kwargs):
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, **kwargs
+ ),
+ )
+ self.check_fully_delegated(session)
+
+ def test_codebook_parameter_validation(self):
+ """Test codebook parameter validation"""
+ # Test invalid bits type
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits="3", block_size=[-1, 8]
+ )
+ self.assertIn("must be an integer", str(cm.exception))
+
+ # Test bits out of range
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits=0, block_size=[-1, 8]
+ )
+ self.assertIn("must be between 1 and 8", str(cm.exception))
+
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits=9, block_size=[-1, 8]
+ )
+ self.assertIn("must be between 1 and 8", str(cm.exception))
+
+ # Test invalid block_size type
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits=3, block_size="[-1, 16]"
+ )
+ self.assertIn("must be a list", str(cm.exception))
+
+ def test_int8_static_quantization(self):
+ """Test INT8 static quantization (weights + activations)"""
+
+ class SimpleLinearModel(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.layer1 = nn.Linear(32, 16)
+ self.layer2 = nn.Linear(16, 2)
+
+ def forward(self, x):
+ x = torch.relu(self.layer1(x))
+ x = self.layer2(x)
+ return x
+
+ model = SimpleLinearModel().eval()
+ example_inputs = [(torch.randn(1, 32),)]
+
+ recipe = ExportRecipe.get_recipe(
+ CoreMLRecipeType.PT2E_INT8_STATIC, minimum_deployment_target=ct.target.iOS17
+ )
+
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=recipe,
+ )
+ self.check_fully_delegated(session)
+
+ self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-3)
+ self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+ def test_int8_weight_only_pt2e(self):
+ """Test PT2E-based INT8 weight-only quantization"""
+ model = TestHelperModules.TwoLinearModule().eval()
+ example_inputs = [(torch.randn(9, 8),)]
+
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(
+ CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY
+ ),
+ )
+ self.check_fully_delegated(session)
+
+ self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
+ self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+ def test_int8_weight_only_pt2e_with_conv(self):
+ """Test PT2E-based INT8 weight-only quantization with convolution layers"""
+
+ class ConvModel(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
+ self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
+ self.pool = nn.AdaptiveAvgPool2d((1, 1))
+ self.fc = nn.Linear(32, 10)
+
+ def forward(self, x):
+ x = torch.relu(self.conv1(x))
+ x = torch.relu(self.conv2(x))
+ x = self.pool(x)
+ x = x.view(x.size(0), -1)
+ x = self.fc(x)
+ return x
+
+ model = ConvModel().eval()
+ example_inputs = [(torch.randn(1, 3, 32, 32),)]
+
+ session = export(
+ model=model,
+ example_inputs=example_inputs,
+ export_recipe=ExportRecipe.get_recipe(
+ CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY
+ ),
+ )
+ self.check_fully_delegated(session)
+
+ self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
+ self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+ def test_pt2e_recipes_parameter_rejection(self):
+ """Test that PT2E recipes reject TorchAO-specific parameters"""
+ # PT2E recipes should reject TorchAO-specific parameters
+ pt2e_recipes = [
+ CoreMLRecipeType.PT2E_INT8_STATIC,
+ CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
+ ]
+ torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
+
+ for recipe_type in pt2e_recipes:
+ for param in torchao_params:
+ with self.subTest(recipe=recipe_type.value, param=param):
+ kwargs = {param: "dummy_value"}
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(recipe_type, **kwargs)
+ self.assertIn("unexpected parameters", str(cm.exception).lower())
+
+ def test_filter_fn_comprehensive(self):
+ """Comprehensive test for filter_fn parameter functionality"""
+
+ def custom_filter(module, fqn):
+ return isinstance(module, nn.Linear) and "target" in fqn
+
+ # Test 1: TorchAO recipes accept filter_fn and default to None
+ torchao_recipes = [
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL,
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
+ ]
+
+ for recipe_type in torchao_recipes:
+ with self.subTest(f"{recipe_type.value}_default"):
+ # Test default behavior (None)
+ recipe = self.provider.create_recipe(recipe_type)
+ config = recipe.quantization_recipe.ao_quantization_configs[0]
+ self.assertIsNone(config.filter_fn)
+
+ with self.subTest(f"{recipe_type.value}_custom"):
+ # Test custom filter_fn
+ recipe = self.provider.create_recipe(
+ recipe_type, filter_fn=custom_filter
+ )
+ config = recipe.quantization_recipe.ao_quantization_configs[0]
+ self.assertEqual(config.filter_fn, custom_filter)
+
+ # Test 2: Codebook recipe accepts filter_fn and has sensible default
+ with self.subTest("codebook_default"):
+ recipe = self.provider.create_recipe(
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits=3, block_size=[-1, 16]
+ )
+ config = recipe.quantization_recipe.ao_quantization_configs[0]
+ self.assertIsNotNone(config.filter_fn)
+
+ # Test default filter targets Linear and Embedding layers
+ linear_module = nn.Linear(10, 5)
+ embedding_module = nn.Embedding(100, 10)
+ conv_module = nn.Conv2d(3, 16, 3)
+
+ self.assertTrue(config.filter_fn(linear_module, "linear"))
+ self.assertTrue(config.filter_fn(embedding_module, "embedding"))
+ self.assertFalse(config.filter_fn(conv_module, "conv"))
+
+ with self.subTest("codebook_custom"):
+ recipe = self.provider.create_recipe(
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY,
+ filter_fn=custom_filter,
+ bits=3,
+ block_size=[-1, 16],
+ )
+ config = recipe.quantization_recipe.ao_quantization_configs[0]
+ self.assertEqual(config.filter_fn, custom_filter)
+
+ def test_quantization_recipe_structure(self):
+ """Test that quantization recipes have proper structure"""
+ quantization_recipes = [
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL,
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY,
+ ]
+
+ for recipe_type in quantization_recipes:
+ with self.subTest(recipe=recipe_type.value):
+ kwargs = (
+ {"bits": 3, "block_size": [-1, 16]}
+ if recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY
+ else {}
+ )
+ recipe = self.provider.create_recipe(recipe_type, **kwargs)
+ self.assertIsNotNone(recipe)
+
+ # Should have quantization recipe with ao_quantization_configs
+ self.assertIsNotNone(recipe.quantization_recipe)
+ self.assertIsNotNone(recipe.quantization_recipe.ao_quantization_configs)
+ self.assertEqual(
+ len(recipe.quantization_recipe.ao_quantization_configs), 1
+ )
+
+ # Should have lowering recipe
+ self.assertIsNotNone(recipe.lowering_recipe)
+ self.assertIsNotNone(recipe.lowering_recipe.partitioners)
+
+ def test_recipe_creation_with_defaults(self):
+ """Test that recipes work with default parameters"""
+ # Test that all recipes can be created without explicit parameters
+ all_recipes = [
+ CoreMLRecipeType.FP32,
+ CoreMLRecipeType.FP16,
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP, # should use default group_size=32
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL,
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP, # should use default group_size=32
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, # should use default bits=3, block_size=[-1,16]
+ ]
+
+ for recipe_type in all_recipes:
+ with self.subTest(recipe=recipe_type.value):
+ kwargs = (
+ {"bits": 3, "block_size": [-1, 16]}
+ if recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY
+ else {}
+ )
+ recipe = self.provider.create_recipe(recipe_type, **kwargs)
+ self.assertIsNotNone(recipe)
+ self.assertEqual(recipe.name, recipe_type.value)
+
+ def test_minimum_deployment_target_validation(self):
+ """Test that minimum_deployment_target validation works correctly for quantization recipes"""
+ test_cases = [
+ (CoreMLRecipeType.PT2E_INT8_STATIC, ct.target.iOS17, {}),
+ (CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY, ct.target.iOS17, {}),
+ (
+ CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
+ ct.target.iOS18,
+ {},
+ ),
+ (CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP, ct.target.iOS18, {}),
+ (
+ CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL,
+ ct.target.iOS18,
+ {},
+ ),
+ (CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP, ct.target.iOS18, {}),
+ (
+ CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY,
+ ct.target.iOS18,
+ {"bits": 3, "block_size": [-1, 16]},
+ ),
+ ]
+
+ for recipe_type, min_target, kwargs in test_cases:
+ with self.subTest(recipe=recipe_type.value):
+
+ # Test 1: Providing deployment target below minimum should raise ValueError
+ too_low_target = ct.target.iOS15
+ with self.assertRaises(ValueError) as cm:
+ self.provider.create_recipe(
+ recipe_type, minimum_deployment_target=too_low_target, **kwargs
+ )
+ error_msg = str(cm.exception)
+ self.assertIn(
+ f"minimum_deployment_target must be {str(min_target)} or higher",
+ error_msg,
+ )
+
+ # Test 2: Providing valid deployment target should work
+ valid_recipe = self.provider.create_recipe(
+ recipe_type, minimum_deployment_target=min_target, **kwargs
+ )
+ self.assertIsNotNone(valid_recipe)
+
+ # Test 3: Not providing deployment target should default to minimum
+ default_recipe = self.provider.create_recipe(recipe_type, **kwargs)
+ self.assertIsNotNone(default_recipe)
+
+ # Test 4: Providing deployment target higher than minimum should work
+ higher_target = (
+ ct.target.iOS18
+ if min_target == ct.target.iOS17
+ else ct.target.iOS18
+ )
+ higher_recipe = self.provider.create_recipe(
+ recipe_type, minimum_deployment_target=higher_target, **kwargs
+ )
+ self.assertIsNotNone(higher_recipe)
diff --git a/backends/apple/coreml/test/test_coreml_utils.py b/backends/apple/coreml/test/test_coreml_utils.py
new file mode 100644
index 00000000000..7d9ac7ba5a5
--- /dev/null
+++ b/backends/apple/coreml/test/test_coreml_utils.py
@@ -0,0 +1,19 @@
+# Copyright © 2025 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+import platform
+import sys
+
+import torch
+
+
+def is_fbcode():
+ return not hasattr(torch.version, "git_version")
+
+
+IS_VALID_TEST_RUNTIME: bool = (
+ (sys.platform == "darwin")
+ and not is_fbcode()
+ and tuple(map(int, platform.mac_ver()[0].split("."))) >= (15, 0)
+)
diff --git a/backends/apple/coreml/test/test_torch_ops.py b/backends/apple/coreml/test/test_torch_ops.py
index 323f76afd1b..0d6b581ee72 100644
--- a/backends/apple/coreml/test/test_torch_ops.py
+++ b/backends/apple/coreml/test/test_torch_ops.py
@@ -2,8 +2,6 @@
#
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
-import platform
-import sys
import unittest
import coremltools as ct
@@ -14,19 +12,15 @@
from executorch.backends.apple.coreml.compiler import CoreMLBackend
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
-from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
-
-
-def is_fbcode():
- return not hasattr(torch.version, "git_version")
+from executorch.backends.apple.coreml.test.test_coreml_utils import (
+ IS_VALID_TEST_RUNTIME,
+)
+from executorch.exir.backend.utils import format_delegated_graph
+from torchao.prototype.quantization.codebook_coreml import CodebookWeightOnlyConfig
+from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
-_TEST_RUNTIME = (
- (sys.platform == "darwin")
- and not is_fbcode()
- and tuple(map(int, platform.mac_ver()[0].split("."))) >= (15, 0)
-)
-if _TEST_RUNTIME:
+if IS_VALID_TEST_RUNTIME:
from executorch.runtime import Runtime
@@ -47,7 +41,7 @@ def _get_test_model(self):
return model, example_inputs
def _compare_outputs(self, executorch_program, eager_program, example_inputs):
- if not _TEST_RUNTIME:
+ if not IS_VALID_TEST_RUNTIME:
return
runtime = Runtime.get()
program = runtime.load_program(executorch_program.buffer)
@@ -164,6 +158,69 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
et_prog = delegated_program.to_executorch()
self._compare_outputs(et_prog, model, example_inputs)
+ @unittest.skipIf(
+ not hasattr(torch.version, "git_version"),
+ "Enable in fbcode once D79658061 lands",
+ )
+ def test_dequantize_codebook_linear(self):
+ model, example_inputs = self._get_test_model()
+ quantize_(
+ model,
+ CodebookWeightOnlyConfig(dtype=torch.uint2, block_size=[-1, 16]),
+ )
+ ep = torch.export.export(model, example_inputs)
+ assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code
+ delegated_program = executorch.exir.to_edge_transform_and_lower(
+ ep,
+ partitioner=[self._coreml_partitioner()],
+ )
+ for node in delegated_program.exported_program().graph.nodes:
+ if node.op == "call_function":
+ assert node.target.__name__ in [
+ "executorch_call_delegate",
+ "getitem",
+ ], f"Got unexpected node target after delegation: {node.target.__name__}"
+
+ assert (
+ "executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default"
+ in format_delegated_graph(delegated_program.exported_program().graph_module)
+ )
+
+ et_prog = delegated_program.to_executorch()
+ self._compare_outputs(et_prog, model, example_inputs)
+
+ @unittest.skipIf(
+ not hasattr(torch.version, "git_version"),
+ "Enable in fbcode once D79658061 lands",
+ )
+ def test_dequantize_codebook_embedding(self):
+ model, example_inputs = self._get_test_model()
+ quantize_(
+ model,
+ CodebookWeightOnlyConfig(dtype=torch.uint3, block_size=[-1, 16]),
+ lambda m, fqn: isinstance(m, torch.nn.Embedding),
+ )
+ ep = torch.export.export(model, example_inputs)
+ assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code
+ delegated_program = executorch.exir.to_edge_transform_and_lower(
+ ep,
+ partitioner=[self._coreml_partitioner()],
+ )
+ for node in delegated_program.exported_program().graph.nodes:
+ if node.op == "call_function":
+ assert node.target.__name__ in [
+ "executorch_call_delegate",
+ "getitem",
+ ], f"Got unexpected node target after delegation: {node.target.__name__}"
+
+ assert (
+ "executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default"
+ in format_delegated_graph(delegated_program.exported_program().graph_module)
+ )
+
+ et_prog = delegated_program.to_executorch()
+ self._compare_outputs(et_prog, model, example_inputs)
+
if __name__ == "__main__":
test_runner = TestTorchOps()
@@ -172,3 +229,5 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
test_runner.test_dequantize_affine_c4w_embedding()
test_runner.test_dequantize_affine_c4w_linear()
test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
+ test_runner.test_dequantize_codebook_linear()
+ test_runner.test_dequantize_codebook_embedding()
diff --git a/backends/apple/mps/runtime/MPSBackend.mm b/backends/apple/mps/runtime/MPSBackend.mm
index 261332436d4..3c136e536ec 100644
--- a/backends/apple/mps/runtime/MPSBackend.mm
+++ b/backends/apple/mps/runtime/MPSBackend.mm
@@ -30,6 +30,7 @@
using executorch::runtime::Error;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::Result;
+using executorch::runtime::Span;
class MPSBackend final : public ::executorch::runtime::BackendInterface {
public:
@@ -72,7 +73,7 @@ bool is_available() const override {
Error execute(
ET_UNUSED BackendExecutionContext& context,
DelegateHandle* handle,
- EValue** args) const override {
+ Span args) const override {
auto executor = static_cast(handle);
std::vector input_pointers;
std::vector output_pointers;
diff --git a/backends/apple/mps/setup.md b/backends/apple/mps/setup.md
index 0ecb4151e61..f4819c104a5 100644
--- a/backends/apple/mps/setup.md
+++ b/backends/apple/mps/setup.md
@@ -15,7 +15,7 @@ The MPS backend device maps machine learning computational graphs and primitives
* [Introduction to ExecuTorch](../../../docs/source/intro-how-it-works.md)
* [Setting up ExecuTorch](../../../docs/source/getting-started-setup.rst)
* [Building ExecuTorch with CMake](../../../docs/source/using-executorch-cpp.md#building-with-cmake)
-* [ExecuTorch iOS Demo App](https://github.com/pytorch-labs/executorch-examples/tree/main/mv3/apple/ExecuTorchDemo)
+* [ExecuTorch iOS Demo App](https://github.com/meta-pytorch/executorch-examples/tree/main/mv3/apple/ExecuTorchDemo)
* [ExecuTorch iOS LLaMA Demo App](../../../docs/source/llm/llama-demo-ios.md)
:::
::::
diff --git a/backends/apple/mps/targets.bzl b/backends/apple/mps/targets.bzl
index 74d79448362..99c97d2b318 100644
--- a/backends/apple/mps/targets.bzl
+++ b/backends/apple/mps/targets.bzl
@@ -3,6 +3,7 @@
# Provided subject to the LICENSE file in the top level directory.
#
+load("@fbsource//xplat/executorch/build:build_variables.bzl", "MPS_BACKEND_BUCK_SRCS")
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
def define_common_targets(is_xplat = False, platforms = []):
@@ -37,10 +38,7 @@ def define_common_targets(is_xplat = False, platforms = []):
"runtime/*.h",
"runtime/operations/*.h",
]),
- "srcs": native.glob([
- "runtime/*.mm",
- "runtime/operations/*.mm",
- ]),
+ "srcs": MPS_BACKEND_BUCK_SRCS,
"visibility": [
"//executorch/backends/apple/...",
"//executorch/examples/...",
diff --git a/backends/arm/CMakeLists.txt b/backends/arm/CMakeLists.txt
index 11f61c0dfee..cdde13a85a4 100644
--- a/backends/arm/CMakeLists.txt
+++ b/backends/arm/CMakeLists.txt
@@ -14,73 +14,76 @@ endif()
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
-set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+set(_common_include_directories
+ ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+)
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
-
# bare metal backend builds
if(EXECUTORCH_BUILD_ARM_BAREMETAL)
-add_compile_options("-Wall" "-Werror")
+ add_compile_options("-Wall" "-Werror")
-# Third-party folder and Ethos-U driver inclued
-set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")
-set(DRIVER_ETHOSU_INCLUDE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include")
-include_directories(${DRIVER_ETHOSU_INCLUDE_DIR})
+ # Third-party folder and Ethos-U driver inclued
+ set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")
+ set(DRIVER_ETHOSU_INCLUDE_DIR
+ "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include"
+ )
+ include_directories(${DRIVER_ETHOSU_INCLUDE_DIR})
-set(_arm_baremetal_sources backends/arm/runtime/EthosUBackend.cpp
- backends/arm/runtime/VelaBinStream.cpp
-)
-list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
+ set(_arm_baremetal_sources backends/arm/runtime/EthosUBackend.cpp
+ backends/arm/runtime/VelaBinStream.cpp
+ )
+ list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
-add_library(executorch_delegate_ethos_u STATIC ${_arm_baremetal_sources})
-target_include_directories(
- executorch_delegate_ethos_u PUBLIC ${_common_include_directories}
-)
-target_include_directories(
- executorch_delegate_ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR}
-)
+ add_library(executorch_delegate_ethos_u STATIC ${_arm_baremetal_sources})
+ target_link_libraries(
+ executorch_delegate_ethos_u PUBLIC executorch_core ethosu_core_driver
+ )
-# end config for bare metal builds
-endif()
+ install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets)
+ # end config for bare metal builds
+endif()
-# VGF backend builds
+# VGF backend builds
if(EXECUTORCH_BUILD_VGF)
-# include libvgf
-set(LIBVGF_PATH "${EXECUTORCH_ROOT}/examples/arm/ethos-u-scratch/ml-sdk-for-vulkan-manifest/sw/vgf-lib/")
-
-set(VULKAN_THIRD_PARTY_PATH ${EXECUTORCH_ROOT}/backends/vulkan/third-party)
-set(VULKAN_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/Vulkan-Headers/include)
-set(VOLK_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/volk)
-
-set(LIBVGF_STATIC "${LIBVGF_PATH}/build/src/libvgf.a")
-set(LIBVGF_INCLUDE "${LIBVGF_PATH}/include/")
-
-add_library(vgf STATIC IMPORTED)
-set_property( TARGET vgf PROPERTY IMPORTED_LOCATION "${LIBVGF_STATIC}" )
-target_include_directories(vgf INTERFACE "${LIBVGF_INCLUDE}")
-
-# Add backend delegate for VGF
-set(_vgf_backend_sources backends/arm/runtime/VGFBackend.cpp
- backends/arm/runtime/VGFSetup.cpp )
-
-# vgf backend
-list(TRANSFORM _vgf_backend_sources PREPEND "${EXECUTORCH_ROOT}/")
-add_library(vgf_backend ${_vgf_backend_sources})
-target_include_directories(
- vgf_backend PUBLIC
- ${_common_include_directories}
- ${VULKAN_HEADERS_PATH}
- ${VOLK_HEADERS_PATH}
-)
-target_compile_options(vgf_backend PRIVATE -DUSE_VULKAN_WRAPPER -DUSE_VULKAN_VOLK)
-
-
-target_link_libraries(vgf_backend PRIVATE executorch_core)
-target_link_libraries(vgf_backend PRIVATE vgf)
-executorch_target_link_options_shared_lib(vgf_backend)
-
-# end config for VGF builds
+ # include libvgf
+ set(LIBVGF_PATH
+ "${EXECUTORCH_ROOT}/examples/arm/ethos-u-scratch/ml-sdk-for-vulkan-manifest/sw/vgf-lib/"
+ )
+
+ set(VULKAN_THIRD_PARTY_PATH ${EXECUTORCH_ROOT}/backends/vulkan/third-party)
+ set(VULKAN_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/Vulkan-Headers/include)
+ set(VOLK_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/volk)
+
+ set(LIBVGF_STATIC "${LIBVGF_PATH}/build/src/libvgf.a")
+ set(LIBVGF_INCLUDE "${LIBVGF_PATH}/include/")
+
+ add_library(vgf STATIC IMPORTED)
+ set_property(TARGET vgf PROPERTY IMPORTED_LOCATION "${LIBVGF_STATIC}")
+ target_include_directories(vgf INTERFACE "${LIBVGF_INCLUDE}")
+
+ # Add backend delegate for VGF
+ set(_vgf_backend_sources backends/arm/runtime/VGFBackend.cpp
+ backends/arm/runtime/VGFSetup.cpp
+ )
+
+ # vgf backend
+ list(TRANSFORM _vgf_backend_sources PREPEND "${EXECUTORCH_ROOT}/")
+ add_library(vgf_backend ${_vgf_backend_sources})
+ target_include_directories(
+ vgf_backend PUBLIC ${_common_include_directories} ${VULKAN_HEADERS_PATH}
+ ${VOLK_HEADERS_PATH}
+ )
+ target_compile_options(
+ vgf_backend PRIVATE -DUSE_VULKAN_WRAPPER -DUSE_VULKAN_VOLK
+ )
+
+ target_link_libraries(vgf_backend PRIVATE executorch_core)
+ target_link_libraries(vgf_backend PRIVATE vgf)
+ executorch_target_link_options_shared_lib(vgf_backend)
+
+ # end config for VGF builds
endif()
diff --git a/backends/arm/README.md b/backends/arm/README.md
index 6bf46d3f3ae..e2e49c0c10f 100644
--- a/backends/arm/README.md
+++ b/backends/arm/README.md
@@ -1,47 +1,74 @@
-# ExecuTorch Arm/TOSA Delegate
+# ExecuTorch Arm® Delegate for TOSA devices
This subtree contains the Arm(R) Delegate implementation for ExecuTorch.
This delegate is structured to, over time, support a number of different Arm devices
through an AoT flow which targets multiple Arm IP using the TOSA standard.
-The expected flow is:
- * torch.nn.module -> TOSA -> command_stream for fully AoT flows e.g. embedded.
- * torch.nn.module -> TOSA for flows supporting a JiT compilation step.
-
-Current backend support is being developed for TOSA to Ethos(TM)-U55/65/85 via the
-ethos-u-vela compilation stack. which follows the fully AoT flow.
-
-## Layout
+For more information on TOSA see https://www.mlplatform.org/tosa/tosa_spec.html
+
+**The expected flows are:**
+* torch.nn.module -> TOSA for development and validation of model export
+* torch.nn.module -> TOSA/VGF for flows supporting a JiT compilation step.
+* torch.nn.module -> TOSA -> command_stream for fully AoT flows e.g. embedded.
+
+**Currently device support is for:**
+* TOSA to Ethos™-U55/65/85 via the ethos-u-vela compilation stack.
+ * This is cross-compiled to the appropriate target CPU
+ * There is a separate arm_executor_runner for bare-metal platforms
+* TOSA to VGF via the model-converter for devices supporting the ML SDK for Vulkan®
+ * The VGF graph represents TOSA directly in a SPIR-V™ standardized form.
+ * As the VGF delegate runs on Vulkan, it's required to be built with the Vulkan delegate also present.
+
+**Currently supported development platforms are:**
+* For ahead of time tooling
+ * Linux aarch64
+ * Linux x86_64
+ * macOS with Apple silicon
+* Bare metal builds For the Ethos-U target and Cortex-M targets
+ * Full testing is available in tree for the Corstone™ FVPs
+ * This is a reference implementation for porting to silicon targets
+* Linux target support For VGF capable targets
+ * This flow re-uses the common executor_runner
+
+## Layout of key components
Export:
-- `ethosu_backend.py` - Main entrypoint for the EthosUBackend. For more information see the section on
-[Arm Backend Architecture](#arm-backend-architecture). For examples of use see `executorch/examples/arm`.
-- `tosa_mapping.py` - utilities for mapping edge dialect to TOSA
-- `tosa_quant_utils.py` - utilities for mapping quantization information to TOSA encoding
+* `tosa_backend.py` - The TOSA conversion flow all other backends rely on.
+* `ethosu/backend.py` - Main entrypoint for the EthosUBackend.
+* `vgf_backend.py` - Main entrypoint for VgfBackend.
+ * For more information see the section on [Arm Backend Architecture](#arm-backend-architecture).
+* `scripts` - For the core scripts which prepare AoT dependencies such as backend compilers.
-Operators:
-- `node_visitor.py` - Base class for edge operator lowering
-- `op_*.py` - Edge operator lowering/serialization to TOSA
+Passes (which prepare the partitioned graphs for TOSA conversion):
+* `_passes\arm_pass_manager.py` - Pass manager. Will decide which passes need to be applied depending on the compile_spec.
+* `_passes\*_pass.py` - Compiler passes derived from ExportPass
-Passes:
-- `arm_pass_manager.py` - Pass manager. Will decide which passes need to be applied depending on the compile_spec.
-- `*_pass.py` - Compiler passes derived from ExportPass
+Operators (which handle mapping of operators to TOSA):
+* `operators/node_visitor.py` - Base class for edge operator lowering
+* `operators/op_*.py` - Edge operator lowering/serialization to TOSA
Quantization:
-- `arm_quantizer.py` - Quantizers for Arm backend. Contains the EthosUQuantizer which inherits from the TOSAQuantizer
-- `arm_quantizer_utils.py` - Utilities for quantization
+* `quantizer/arm_quantizer.py` - Quantizers for Arm backend.
+ * Contains the EthosUQuantizer which inherits from the TOSAQuantizer
+ * Contains the VgfQuantizer which inherits from the TOSAQuantizer
+* `arm_quantizer_utils.py` - Utilities for quantization
Runtime:
-- `runtime/ArmEthosUBackend.cpp` - The Arm backend implementation of the ExecuTorch runtime backend (BackendInterface) for Ethos-U
+- `runtime/ArmEthosUBackend.cpp` - The Arm delegate for Ethos-U targets
+- `runtime/VGFBackend.cpp` - The Arm delegate for VGF capable targets
+- `CMakeLists.txt` - the build configuration for both targets
Other:
-- `third-party/` - Dependencies on other code - in particular the TOSA serialization_lib for compiling to TOSA and the ethos-u-core-driver for the bare-metal backend supporting Ethos-U
+- `third-party/` - Dependencies for runtime builds
- `test/` - Unit test and test support functions
+
## Testing
-After a setup you can run unit tests with the test_arm_baremetal.sh script.
+The tests and related support scripts will test TOSA, Ethos-U and VGF behaviour based on the installed tools. It is expected that the relevant environment preparation has been performed as outlined in ./examples/arm/README.md.
+
+After setup you can run unit tests with the test_arm_baremetal.sh script.
To run the pytests suite run
@@ -62,6 +89,7 @@ backends/arm/test/test_arm_baremetal.sh test_full_ethosu_fvp
```
## Unit tests
+
This is the structure of the test directory
```
@@ -112,89 +140,51 @@ Please note that installing model test dependencies is a standalone process. Whe
List of models with specific dependencies:
- Stable Diffusion: [diffusers](https://github.com/huggingface/diffusers/tree/main)
-## Passes
-
-With the default passes in the Arm Ethos-U backend, assuming the model lowers fully to the
-Ethos-U, the exported program is composed of a Quantize node, Ethos-U custom delegate
-and a Dequantize node. In some circumstances, you may want to feed quantized input to the Neural
-Network straight away, e.g. if you have a camera sensor outputting (u)int8 data and keep all the
-arithmetic of the application in the int8 domain. For these cases, you can apply the
-`exir/passes/quantize_io_pass.py`. See the unit test in `executorch/backends/arm/
-test/passes/test_ioquantization_pass.py`for an example how to feed quantized inputs and
-obtain quantized outputs.
-
-
-### Code coverage
-
-To get code coverage:
-
-```
-coverage run --source= --rcfile=backends/arm/test/.coveragerc -m pytest \
---config-file=/dev/null backends/arm/test/
-```
-
-All files in `SRC` and its child directories will be analysed for code coverage,
-unless explicitly exluded in the .coveragerc file. If using venv this might be
-under `env/lib/python/site-packages/executorch/`. To get the
-absolute path, run:
-
-```
-python -c "import executorch; print(executorch.__path__)"
-```
-
-This contains a list of paths where the source directory is located. Pick the
-one that is located in `env/lib`. If that does not work try the others. Add
-`backends/arm` to the path in `--source` to only get code coverage for the Arm
-backend.
-
-### A note on unit tests
-There are currently 3 ways we unit test our code.
-1. TOSA main inference. These tests are using non-quantized data and ops. Edge IR representation of the module is lowered to a TOSA flatbuffer, which is tested for numerical correcteness using the ```tosa_reference_model``` tool.
-2. TOSA base inference. Same as above, but data and ops are quantized.
-3. Ethos-U55. These tests use quantized data and ops (aka TOSA base inference). Edge IR is lowered to a TOSA flatbuffer, which is fed into the Vela compiler. Theses tests are functional tests and do not test numerical correctness, since that should be guaranteed by TOSA.
+There are currently a number of ways we unit test our code:
+1. TOSA FP. These tests are using non-quantized data and ops. Edge IR representation of the module is lowered to a TOSA flatbuffer, which is tested for numerical correcteness using the ```tosa_reference_model``` tool.
+2. TOSA INT. Same as above, but data and ops integer, and represent a quantized domain.
+3. Ethos-U. These tests use quantized data and ops (aka TOSA base inference). Edge IR is lowered to a TOSA flatbuffer, which is fed into the Vela compiler. Theses tests are functional tests and do not test numerical correctness, since that should be guaranteed by TOSA.
+4. VGF. These tests enable both FP and INT testing for the VGF/SPIR-V representation of TOSA.
-In order to distinguise between the different tests, the following suffixes have been added to the respective test case.
-* ```_MI``` for main inference
-* ```_BI``` for base inference
-* ```_U55_BI``` for base inference on U55
+In order to distinguise between general, and more targeted tests, you will find suffixes with FP, INT, U55, VGF, etc.
## Help & Improvements
If you have problems or questions, or have suggestions for ways to make
implementation and testing better, please reach out to the Arm team developing this delegate, or
-create an issue on [github](https://www.github.com/pytorch/executorch/issues).
+create an issue on [github](https://www.github.com/pytorch/executorch/issues) and add the "Partner: Arm" label.
# Arm Backend Architecture
The broad principle with the Arm backend implemention for ExecuTorch is to support multiple Arm devices and device configurations through a largely Homogeneous flow with maximal sharing of class logic.
-The EthosUBackend is currently the one user facing API that target the Ethos-U55 and Ethos-U85 hardware IP. It is using the TOSABackend under the hood to share code and functionality, but also to separate testing possibilities to the TOSA flow itself.
+The EthosUBackend and VgfBackend are the user facing targets available for the the Ethos-U55 and Ethos-U85 hardware IP, and VGF targets. It is using the TOSABackend under the hood to share compiler passes and legalisation, along with other code and functionality, but also to enable separate testing for the TOSA flow itself.
In practice for compilation, this means that the flow goes via [Arm TOSA](https://www.mlplatform.org/tosa/tosa_spec.html) to produce a common IR and quantization behaviour compatible with our various IP, and typically, device-specific backends to further lower to a device specific binary which can happen ahead of time (within the Python development flow) or at runtime (during a JIT compilation stage).
-In practice for the runtime, this means we will share common runtime backend functionality, with the aim for features like debugging to be available through common tooling.
-
## Arm Backend Status and Maturity
-The Arm EthosU Backend should be considered a prototype quality at this point, likely subject to significant change and improvement, and with a limited coverage of functionality. We are actively developing this codebase.
+The Arm EthosU Backend should be considered reasonable quality at this point, supporting a large number of operators and major networks.
+The Arm VGF Backend should be considered of Alpha quality, likely subject to significant change and improvement, and with a limited coverage of functionality.
+We are actively developing the codebase for both targets.
## Current flows
-The EthosUBackend has a two stage process,
-- Compile to TOSA to rationalise the graph into known hardware support profiles. Currently this is to v0.80 TOSA BI with specific concern to a subset which gives support on Ethos-U55 and Ethos-U85, the target of the initial prototype efforts. This calls into the TOSABackend.
-- Lower via the ethos-u-vela compilation flow which takes TOSA v0.80 as an input and produces a low level commandstream for the hardware which is then passed via the delegate to the ethos-u-core-driver for direct execution.
+The Arm backends have a two stage process,
+1. Compile to TOSA to by applying FX passes and legalizing the graph into supported TOSA profiles. Currently this is to v1.0 TOSA INT/FP, this is via calls into the TOSABackend.
+1. Lower via the target compilation flow which takes TOSA v1.0 as an input and produces a lower level format for the hardware
+ * For Ethos-U this is a hardware command stream that is possible to directly execute on hardware
+ * For VGF this is a SPIR-V representation of TOSA to enable JiT compilation on the target platform
-The EthosUPartitioner is currenly used to ensure the operations converted are Ethos-U compatible, but will be extended to offer spec-correct TOSA Base inference and TOSA Main Inference generation in future.
+All targets provide a partitioner to enable the standard partially delegated flow offered by ExecuTorch.
-There is also a generic TOSABackend with accompanying TOSAPartitioner and TOSAQuantizer, which are used by the EthosUBackend and friends. The Arm TOSA Backend can be used by it's own to verify the lowering to the TOSA representation of the model (refer to the unit tests in backends/arm/test which uses the TOSA backend in the test suites).
+There is also a generic TOSABackend with accompanying TOSAPartitioner and TOSAQuantizer, these can be used directly to verify the lowering to the TOSA representation of the model (refer to the unit tests in backends/arm/test which uses the TOSA backend in the test suites).
### Controlling compilation
It is possible to control the compilation flow to aid in development and debug of both networks and the code itself.
-Configuration of the EthosUBackend export flow is controlled by CompileSpec information (essentially used as compilation flags) to determine which of these outputs is produced. In particular this allows for use of the tosa_reference_model to run intermediate output to check for correctness and quantization accuracy without a full loop via hardware implemntation.
-
-As this is in active development see the EthosUBackend for accurate information on [compilation flags](https://github.com/pytorch/executorch/blob/29f6dc9353e90951ed3fae3c57ae416de0520067/backends/arm/arm_backend.py#L319-L324)
+Configuration of the export flow is controlled by CompileSpec information (essentially used as compilation flags) to determine which of these outputs is produced. In particular this allows for compilation flags, capturing intermediate forms during lowering, and use of the tosa_reference_model to run intermediate output to check for correctness and quantization accuracy without a full loop via hardware implemntation.
## Model specific and optional passes
The current TOSA version does not support int64. However, int64 is commonly used in many models. In order to lower the operators with int64 inputs and/or outputs to TOSA, a few passes have been developed to handle the int64-related issues. The main idea behind these passes is to replace the uses of int64 with int32 where feasible.
diff --git a/backends/arm/TARGETS b/backends/arm/TARGETS
index 8e648c56e16..9897ebc15b3 100644
--- a/backends/arm/TARGETS
+++ b/backends/arm/TARGETS
@@ -1,10 +1,42 @@
# @noautodeps
load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+
+python_library(
+ name = "ethosu_partitioner",
+ srcs = [
+ "ethosu/__init__.py",
+ "ethosu/backend.py",
+ "ethosu/partitioner.py"
+ ],
+ deps = [
+ ":arm_partitioner",
+ ]
+)
+python_library(
+ name = "constants",
+ srcs = [
+ "constants.py",
+ ],
+ deps = [
+ "//executorch/exir/dialects:lib",
+ ],
+)
+python_library(
+ name = "common",
+ srcs = [
+ "common/__init__.py",
+ "common/debug.py",
+ ],
+ deps = [
+ "fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
+ "fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
+ "//caffe2:torch",
+ "//executorch/exir:lib",
+ ],
+)
python_library(
name = "arm_partitioner",
srcs = [
- "ethosu_backend.py",
- "ethosu_partitioner.py",
"tosa_backend.py",
"tosa_partitioner.py",
"vgf_backend.py",
@@ -12,6 +44,7 @@ python_library(
],
deps = [
":arm_backend",
+ ":constants",
"//executorch/backends/arm/operator_support:operator_support",
"//executorch/backends/arm/_passes:passes",
"//executorch/exir:lib",
@@ -80,6 +113,7 @@ python_library(
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/tosa:tosa",
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/tosa:tosa",
+ ":constants",
":tosa_mapping",
"//executorch/exir/dialects:lib",
],
diff --git a/backends/arm/_passes/TARGETS b/backends/arm/_passes/TARGETS
index bbb94c1d703..aebdbb315e5 100644
--- a/backends/arm/_passes/TARGETS
+++ b/backends/arm/_passes/TARGETS
@@ -4,6 +4,8 @@ python_library(
name = "passes",
srcs = glob(["*.py"]),
deps = [
+ "//executorch/backends/arm:common",
+ "//executorch/backends/arm:constants",
"//executorch/backends/arm:tosa_quant_utils",
"//executorch/backends/arm:tosa_utils",
"//executorch/backends/arm/tosa/dialect:lib",
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
index b2a6c52313a..c96a4f9738e 100644
--- a/backends/arm/_passes/__init__.py
+++ b/backends/arm/_passes/__init__.py
@@ -15,6 +15,7 @@
from .cast_to_int32_pass import CastToInt32Pass # noqa
from .conv1d_unsqueeze_pass import Conv1dUnsqueezePass # noqa
from .convert_any_default_dim_dims_pass import ConvertAnyDefaultDimDimsPass # noqa
+from .convert_elu_params import ConvertELUParamsPass # noqa
from .convert_expand_copy_to_repeat import ConvertExpandCopyToRepeatPass # noqa
from .convert_full_like_to_full_pass import ConvertFullLikeToFullPass # noqa
from .convert_int_pow_to_mul import ConvertIntPowToMuls # noqa
@@ -25,21 +26,28 @@
from .decompose_acosh_pass import DecomposeAcoshPass # noqa
from .decompose_adaptive_avg_pool2d_pass import DecomposeAdaptiveAvgPool2dPass # noqa
from .decompose_addmm_pass import DecomposeAddmmPass # noqa
-from .decompose_asin_pass import DecomposeAsinPass # noqa
+from .decompose_asin_and_acos_pass import DecomposeAsinAndAcosPass # noqa
+from .decompose_asinh_pass import DecomposeAsinhPass # noqa
from .decompose_atan_pass import DecomposeAtanPass # noqa
from .decompose_atanh_pass import DecomposeAtanhPass # noqa
from .decompose_avg_pool2d import DecomposeAvgPool2d # noqa
from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass # noqa
+from .decompose_cosh_pass import DecomposeCoshPass # noqa
from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass # noqa
+from .decompose_cumsum_pass import DecomposeCumsumPass # noqa
from .decompose_div_pass import DecomposeDivPass # noqa
+from .decompose_elu_pass import DecomposeEluPass # noqa
from .decompose_embedding_pass import DecomposeEmbeddingPass # noqa # noqa
+from .decompose_expm1_pass import DecomposeExpm1Pass # noqa
from .decompose_gelu_pass import DecomposeGeluPass # noqa
+from .decompose_glu_pass import DecomposeGluPass # noqa
from .decompose_grouped_conv import DecomposeGroupedConv # noqa
from .decompose_groupnorm_pass import DecomposeGroupNormPass # noqa
from .decompose_layernorm_pass import DecomposeLayerNormPass # noqa
from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass # noqa
from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass # noqa
from .decompose_linear_pass import DecomposeLinearPass # noqa
+from .decompose_logit_pass import DecomposeLogitPass # noqa
from .decompose_masked_fill import DecomposeMaskedFill # noqa
from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass # noqa
from .decompose_meandim_pass import DecomposeMeanDimPass # noqa
diff --git a/backends/arm/_passes/annotate_channels_last_dim_order_pass.py b/backends/arm/_passes/annotate_channels_last_dim_order_pass.py
index f8ead856fbb..0ce8d667b3c 100644
--- a/backends/arm/_passes/annotate_channels_last_dim_order_pass.py
+++ b/backends/arm/_passes/annotate_channels_last_dim_order_pass.py
@@ -14,36 +14,12 @@
from executorch.backends.arm.tosa_utils import is_consumer_node_depthwise_conv2d
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
-from torch.library import impl, Library
-
-# Define lib with passthrough operators. The operators have no real meaning in edge IR
-# except for argument validaiton and a passthrough output. The operators will be used
-# when lowering to TOSA, e.g. a passthrough_to_tosa._transpose will not affect
-# the edge IR graph but will be lowered to a TOSA-TRANSPOSE.
-lib = Library("passthrough_to_tosa", "DEF")
-# For certain operators we need the data in a specific data format. Changing tosa_dim_order
-# is not sufficient as we also need transpose the data.
-# By utilizing an edge IR passthrough operator we can keep the edge program in
-# channels-first/contiguous and get the desired behavior in the TOSA lowering.
-lib.define("_transpose(Tensor self, int[] dim_order) -> Tensor")
-
-
-@impl(lib, "_transpose")
-def _transpose_impl(*args, **kwargs):
- # Validate length of dim_order array
- dim = args[1]
- if len(dim) != 4 and len(dim) != 5:
- raise ValueError(
- f"Dim order length must be either 4 or 5, got {len(dim)}: {dim}"
- )
- # Pass-through in edge-IR
- return args[0]
class AnnotateChannelsLastDimOrder(ExportPass):
"""
Annotates each node with a tosa_dim_order. tosa_dim_order can be seen as a channels-last dim-order
- that in most cases will be (0, 2, 3, 1) for nodes with 4D-shapes. The pass also inserts passthrough_to_tosa._transpose
+ that in most cases will be (0, 2, 3, 1) for nodes with 4D-shapes. The pass also inserts backend.tosa.TRANSPOSE
when a transition between 3D and 4D/5D tensors happen.
The annotated tosa_dim_order is used to permute the node's shape such that it gives a TOSA-compliant shape.
"""
@@ -119,7 +95,7 @@ def insert_input_transpose(node, input_node, graph_module):
with graph_module.graph.inserting_before(node):
permute_node = create_node(
graph_module.graph,
- torch.ops.passthrough_to_tosa._transpose.default,
+ exir_ops.backend.tosa.TRANSPOSE.default,
args=(
input_node,
list(
@@ -141,7 +117,7 @@ def insert_output_transpose(node, graph_module):
with graph_module.graph.inserting_after(node):
permute_node = create_node(
graph_module.graph,
- torch.ops.passthrough_to_tosa._transpose.default,
+ exir_ops.backend.tosa.TRANSPOSE.default,
args=(
node,
list(
diff --git a/backends/arm/_passes/annotate_decomposed_matmul.py b/backends/arm/_passes/annotate_decomposed_matmul.py
index 9f9168d9238..8156ca0b89d 100644
--- a/backends/arm/_passes/annotate_decomposed_matmul.py
+++ b/backends/arm/_passes/annotate_decomposed_matmul.py
@@ -12,7 +12,7 @@
import torch
from executorch.backends.arm._passes.arm_pass_utils import create_node
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.dialects.edge._ops import EdgeOpOverload
from executorch.exir.pass_base import ExportPass, PassResult
@@ -62,7 +62,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
}
for partition in matmul_partitions:
quantized_input = all(
- input_node.target in dq_ops for input_node in partition.input_nodes
+ input_node.target in DQ_OPS for input_node in partition.input_nodes
)
matmul_node = [
node for node in partition.nodes if node.target in matmul_targets
@@ -93,7 +93,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
graph_module.graph.erase_node(partition_input)
partition_output = list(partition.output_nodes[0].users)[0]
- quantized_output = partition_output.target in q_ops
+ quantized_output = partition_output.target in Q_OPS
if quantized_output:
with graph_module.graph.inserting_after(matmul_node):
# Create q-node after matmul
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
index 6a25b8b3a8a..af14ef14cf7 100644
--- a/backends/arm/_passes/arm_pass_manager.py
+++ b/backends/arm/_passes/arm_pass_manager.py
@@ -19,6 +19,7 @@
ComputeConstantOpsAOT,
Conv1dUnsqueezePass,
ConvertAnyDefaultDimDimsPass,
+ ConvertELUParamsPass,
ConvertExpandCopyToRepeatPass,
ConvertFullLikeToFullPass,
ConvertIntPowToMuls,
@@ -30,21 +31,28 @@
DecomposeAcoshPass,
DecomposeAdaptiveAvgPool2dPass,
DecomposeAddmmPass,
- DecomposeAsinPass,
+ DecomposeAsinAndAcosPass,
+ DecomposeAsinhPass,
DecomposeAtanhPass,
DecomposeAtanPass,
DecomposeAvgPool2d,
DecomposeBatchNormNoStatsPass,
+ DecomposeCoshPass,
DecomposeCosineSimilarityPass,
+ DecomposeCumsumPass,
DecomposeDivPass,
+ DecomposeEluPass,
DecomposeEmbeddingPass,
+ DecomposeExpm1Pass,
DecomposeGeluPass,
+ DecomposeGluPass,
DecomposeGroupedConv,
DecomposeGroupNormPass,
DecomposeLayerNormPass,
DecomposeLeakyReLUPass,
DecomposeLinearPass,
DecomposeLinearVectorNormPass,
+ DecomposeLogitPass,
DecomposeMaskedFill,
DecomposeMaxPool2DPass,
DecomposeMeanDimPass,
@@ -105,7 +113,7 @@ def _transform(self, graph_module: GraphModule):
with TosaLoweringContext(self.tosa_spec):
return self(graph_module).graph_module
- def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
+ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
self.add_pass(FuseQuantizedActivationPass())
self.add_pass(RemoveGetItemPass())
self.add_pass(ConvertSplitToSlicePass())
@@ -114,7 +122,6 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
self.add_pass(
DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
)
-
self.add_pass(ConvertFullLikeToFullPass())
self.add_pass(ConvertToClampPass())
self.add_pass(ConvertMinMaxPass())
@@ -127,6 +134,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
self.add_pass(AnnotateDecomposedMatmulPass())
self.add_pass(QuantizeOperatorArguments())
+ self.add_pass(ConvertELUParamsPass())
self.add_pass(FoldAndAnnotateQParamsPass(exported_program)) # type: ignore[call-arg]
self.add_pass(RetraceFoldedDtypesPass())
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
@@ -144,11 +152,11 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
self.add_pass(UnsqueezeBeforeRepeatPass())
self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
self.add_pass(DecomposeSumPass())
+ self.add_pass(DecomposeCumsumPass(exported_program))
self.add_pass(Conv1dUnsqueezePass())
self.add_pass(DecomposeMaxPool2DPass())
self.add_pass(SizeAdjustInputPass())
self.add_pass(DecomposeSelectPass())
-
self.add_pass(ConvertSqueezesToViewPass())
self.add_pass(FuseViewCopyTransform())
@@ -162,15 +170,20 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
return self._transform(exported_program.graph_module)
- def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
+ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
+ self.add_pass(DecomposeLogitPass())
self.add_pass(DecomposeMaskedFill())
self.add_pass(DecomposeRoundPass())
self.add_pass(DecomposeAcoshPass())
- self.add_pass(DecomposeAsinPass())
+ self.add_pass(DecomposeAsinhPass())
+ self.add_pass(DecomposeCoshPass())
+ self.add_pass(DecomposeAsinAndAcosPass())
self.add_pass(DecomposeSqrtPass())
self.add_pass(DecomposeAtanPass())
self.add_pass(DecomposeAtanhPass())
self.add_pass(DecomposeAddmmPass())
+ self.add_pass(DecomposeEluPass())
+ self.add_pass(DecomposeExpm1Pass())
self.add_pass(ConvertIntPowToMuls())
self.add_pass(CastBoolToInt8Pass())
self.add_pass(DecomposeSinhPass())
@@ -182,6 +195,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
self.add_pass(ConvertSplitToSlicePass())
self.add_pass(FuseBatchnorm2DPass(exported_program))
self.add_pass(ConvertMmToBmmPass())
+ self.add_pass(DecomposeGluPass())
self.add_pass(DecomposeLinearPass())
self.add_pass(DecomposeLeakyReLUPass())
self.add_pass(DecomposeGroupNormPass())
@@ -219,6 +233,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
self.add_pass(UnsqueezeBeforeRepeatPass())
self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
self.add_pass(DecomposeSumPass())
+ self.add_pass(DecomposeCumsumPass(exported_program))
self.add_pass(Conv1dUnsqueezePass())
self.add_pass(DecomposeMaxPool2DPass())
self.add_pass(SizeAdjustInputPass())
@@ -235,22 +250,12 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
return self._transform(exported_program.graph_module)
- def _tosa_1_0_int_quantized_pipeline(self, exported_program: ExportedProgram):
- return self._tosa_080_BI_pipeline(exported_program)
-
- def _tosa_1_0_fp_pipeline(self, exported_program: ExportedProgram):
- return self._tosa_080_MI_pipeline(exported_program)
-
def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
"""Apply passes before transforming program to backend"""
- if self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+BI"):
- return self._tosa_080_BI_pipeline(exported_program)
- elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+MI"):
- return self._tosa_080_MI_pipeline(exported_program)
- elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-1.0+FP"):
- return self._tosa_1_0_fp_pipeline(exported_program)
+ if self.tosa_spec == TosaSpecification.create_from_string("TOSA-1.0+FP"):
+ return self._tosa_FP_pipeline(exported_program)
elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-1.0+INT"):
- return self._tosa_1_0_int_quantized_pipeline(exported_program)
+ return self._tosa_INT_pipeline(exported_program)
else:
raise NotImplementedError(
f"No pass pipeline implemented for {self.tosa_spec=}"
@@ -261,6 +266,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
self.add_pass(DecomposeEmbeddingPass())
self.add_pass(DecomposeScaledDotProductAttention())
self.add_pass(DecomposeRoundPass())
+ self.add_pass(DecomposeLogitPass())
self.add_pass(CastBoolToInt8Pass())
self.add_pass(DecomposeSignPass())
self.add_pass(DecomposeAddmmPass())
@@ -272,6 +278,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
self.add_pass(DecomposeMeanDimPass(graph_module, self.tosa_spec))
self.add_pass(DecomposeNotEqualPass())
self.add_pass(DecomposeCosineSimilarityPass())
+ self.add_pass(DecomposeGluPass())
self.add_pass(DecomposeDivPass())
self.add_pass(DecomposeLeakyReLUPass())
self.add_pass(DecomposeLinearVectorNormPass())
diff --git a/backends/arm/_passes/arm_pass_utils.py b/backends/arm/_passes/arm_pass_utils.py
index 1e0c21239e2..00eb395be9f 100644
--- a/backends/arm/_passes/arm_pass_utils.py
+++ b/backends/arm/_passes/arm_pass_utils.py
@@ -13,7 +13,7 @@
import torch
import torch.fx
-from executorch.backends.arm.tosa_utils import get_node_debug_info
+from executorch.backends.arm.common.debug import get_node_debug_info
from executorch.exir import ExportedProgram
from executorch.exir.dialects._ops import ops as exir_ops
diff --git a/backends/arm/_passes/convert_elu_params.py b/backends/arm/_passes/convert_elu_params.py
new file mode 100644
index 00000000000..7da58ae4bb4
--- /dev/null
+++ b/backends/arm/_passes/convert_elu_params.py
@@ -0,0 +1,53 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class ConvertELUParamsPass(ExportPass):
+ """
+ Pass to convert the input_scale kwarg of ELU operator from float to
+ int.
+
+ It has been set to 2 as the outputs seem to stay the same regardless of what
+ the value of input_scale is, as long as that value is not 1.
+ """
+
+ def call(self, graph_module: torch.fx.GraphModule):
+ modified_graph = False
+ graph = graph_module.graph
+ node_list = graph.find_nodes(
+ op="call_function", target=exir_ops.edge.aten.elu.default
+ )
+ for node in node_list:
+ with graph.inserting_after(node):
+ replace_node = create_node(graph, exir_ops.edge.aten.elu.default)
+ old_args = list(node.args)
+
+ alpha = old_args[1] if len(old_args) > 1 else 1.0
+ scale = 1.0
+ input_scale = 2.0
+
+ replace_node.args = (old_args[0],)
+
+ updated_kwargs = dict(node.kwargs)
+ updated_kwargs["alpha"] = int(alpha)
+ updated_kwargs["scale"] = int(scale)
+ updated_kwargs["input_scale"] = int(input_scale)
+
+ replace_node.kwargs = updated_kwargs
+
+ node.replace_all_uses_with(replace_node)
+ graph.erase_node(node)
+
+ modified_graph = True
+ if modified_graph:
+ graph_module.recompile()
+ graph_module = super().call(graph_module).graph_module
+
+ return PassResult(graph_module, modified_graph)
diff --git a/backends/arm/_passes/decompose_asin_pass.py b/backends/arm/_passes/decompose_asin_and_acos_pass.py
similarity index 72%
rename from backends/arm/_passes/decompose_asin_pass.py
rename to backends/arm/_passes/decompose_asin_and_acos_pass.py
index 0c0bcdf7f49..e067f17b0ca 100644
--- a/backends/arm/_passes/decompose_asin_pass.py
+++ b/backends/arm/_passes/decompose_asin_and_acos_pass.py
@@ -15,10 +15,11 @@
# For MI case
edge_asin_op = (exir_ops.edge.aten.asin.default,)
+edge_acos_op = (exir_ops.edge.aten.acos.default,)
-def get_asin_decomposition(op) -> tuple:
- if op in edge_asin_op:
+def get_decomposition(op) -> tuple:
+ if op in (edge_asin_op + edge_acos_op):
return (
exir_ops.edge.aten.mul.Tensor,
exir_ops.edge.aten.add.Tensor,
@@ -31,25 +32,26 @@ def get_asin_decomposition(op) -> tuple:
exir_ops.edge.aten.lt.Scalar,
exir_ops.edge.aten.sub.Tensor,
exir_ops.edge.aten.full_like.default,
- exir_ops.edge.aten.where.self,
exir_ops.edge.aten.neg.default,
)
- raise RuntimeError(f"Can't get asin decomposition for op {op}")
+ raise RuntimeError(f"Can't get decomposition for op {op}")
-class DecomposeAsinPass(ArmPass):
+class DecomposeAsinAndAcosPass(ArmPass):
"""
- This pass decomposes asin into a rational approximation for small values
+ This pass decomposes asin and acos into a rational approximation for small values
and a transformed rational approximation for large values.
- Example:
- y = asin(x)
- Becomes:
+
+ The decomposition is based on the following mathematical identities:
if abs(x) < 0.5:
- y = x + P(x^2) / Q(x^2)
+ asin(x) = x + P(x^2) / Q(x^2)
+ acos(x) = π/2 - asin(x)
else:
- y = π/2 - 2 * (s + s^3 * Q(z) / P(z))
- where P and Q are polynomials defined in the function.
+ asin(x) = π/2 - 2 * (s + s^3 * Q(z) / P(z))
+ acos(x) = 2 * (s + s^3 * Q(z) / P(z))
+ where P and Q are polynomials defined in the function and s is the square root of z.
+
"""
def _build_polynomial(
@@ -84,11 +86,25 @@ def _build_polynomial(
)
return result
+ def _combine_branches(
+ self,
+ bool_op,
+ bool_args: tuple[torch.Tensor, float],
+ branches: tuple[torch.Tensor, torch.Tensor],
+ meta: dict[str, str],
+ ) -> torch.Tensor:
+ where_op = exir_ops.edge.aten.where.self
+ mask = super().call_operator(bool_op, bool_args, {}, meta, True)
+ branch_true, branch_false = branches
+ return super().call_operator(
+ where_op, (mask, branch_true, branch_false), {}, meta, True
+ )
+
def call_operator(self, op, args, kwargs, meta):
- if op not in edge_asin_op:
+ if op not in (edge_asin_op + edge_acos_op):
return super().call_operator(op, args, kwargs, meta)
logging.info(
- f"Approximating asin. This may introduce small numerical errors. For details, see {__file__}."
+ f"Approximating {op}. This may introduce small numerical errors. For details, see {__file__}."
)
x = args[0]
half = 0.5
@@ -111,9 +127,8 @@ def call_operator(self, op, args, kwargs, meta):
lt_op,
sub_op,
full_like_op,
- where_op,
neg_op,
- ) = get_asin_decomposition(op)
+ ) = get_decomposition(op)
# Coefficients for the rational approximation, calculated with the Minimax (Remez) method
p_coefficients = [
@@ -129,7 +144,6 @@ def call_operator(self, op, args, kwargs, meta):
x_abs = super().call_operator(abs_op, (x,), {}, meta, True)
# Step 1: compute asin_small - rational approximation for [0,0.5]
-
y = super().call_operator(mul_op, (x_abs, x_abs), {}, meta, True)
x3 = super().call_operator(mul_op, (x_abs, y), {}, meta, True)
@@ -154,47 +168,40 @@ def call_operator(self, op, args, kwargs, meta):
Qz = self._build_polynomial(q_coefficients, z, meta)
numer = super().call_operator(mul_op, (s3, Pz), {}, meta, True)
+
# Calculate r_large = P(z) / Q(z)
r_large = super().call_operator(div_op, (numer, Qz), {}, meta, True)
# Calculate asin_large = pi/2 - 2 * (s + s^3 * Q(z) / P(z))
t1 = super().call_operator(add_op, (s, r_large), {}, meta, True)
t2 = super().call_operator(mul_op_scalar, (t1, two), {}, meta, True)
+
diff = super().call_operator(sub_op_scalar, (t2, pi_over_2), {}, meta, True)
tmp_neg_ones = super().call_operator(
full_like_op, (diff, neg_one), {}, meta, True
)
asin_large = super().call_operator(mul_op, (diff, tmp_neg_ones), {}, meta, True)
- # Combine branches
- is_large = super().call_operator(gt_op, (x_abs, half), {}, meta, True)
- asin_unsigned = super().call_operator(
- where_op,
- (
- is_large,
- asin_large,
- asin_small,
- ),
- {},
- meta,
- True,
+ asin_unsigned = self._combine_branches(
+ gt_op, (x_abs, half), (asin_large, asin_small), meta
)
# Handle x < 0
- is_neg = super().call_operator(lt_op, (x, zero), {}, meta, True)
- # Compute -asin_unsigned
negated_asin = super().call_operator(neg_op, (asin_unsigned,), {}, meta, True)
- # Combine branches for signed asin
- asin_signed = super().call_operator(
- where_op,
- (
- is_neg,
- negated_asin,
- asin_unsigned,
- ),
- {},
- meta,
- True,
+ asin = self._combine_branches(
+ lt_op, (x, zero), (negated_asin, asin_unsigned), meta
)
- return asin_signed
+ if op in edge_acos_op:
+ # If x <= 0.5: acos(x) = pi/2 - asin(x)
+ const_tensor = super().call_operator(
+ full_like_op, (x, pi_over_2), {}, meta, True
+ )
+ acos_small = super().call_operator(
+ sub_op, (const_tensor, asin), {}, meta, True
+ )
+ # If x > 0.5, acos(x) = 2 * (s + s^3 * Q(z) / P(z)) = t2
+ acos = self._combine_branches(gt_op, (x, half), (t2, acos_small), meta)
+ return acos
+
+ return asin
diff --git a/backends/arm/_passes/decompose_asinh_pass.py b/backends/arm/_passes/decompose_asinh_pass.py
new file mode 100644
index 00000000000..a0b78c51a77
--- /dev/null
+++ b/backends/arm/_passes/decompose_asinh_pass.py
@@ -0,0 +1,50 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+# For MI case
+edge_asinh_op = (exir_ops.edge.aten.asinh.default,)
+
+
+class DecomposeAsinhPass(ArmPass):
+ """
+ Decomposes asinh to supported TOSA-operations.
+ This decomposition is based on the mathematical identity:
+ asinh(x) = log(x + sqrt(x^2 + 1))
+ """
+
+ def call_operator(self, op, args, kwargs, meta):
+ if op not in edge_asinh_op:
+ return super().call_operator(op, args, kwargs, meta)
+
+ log_op, sqrt_op, mul_op, add_op_scalar, add_op = (
+ exir_ops.edge.aten.log.default,
+ exir_ops.edge.aten.sqrt.default,
+ exir_ops.edge.aten.mul.Tensor,
+ exir_ops.edge.aten.add.Scalar,
+ exir_ops.edge.aten.add.Tensor,
+ )
+
+ x = args[0]
+
+ # calculate t1 = x^2 + 1
+ x2 = super().call_operator(mul_op, (x, x), {}, meta, True)
+ t1 = super().call_operator(add_op_scalar, (x2, 1.0), {}, meta, True)
+
+ # t2 = sqrt(t1)
+ t2 = super().call_operator(sqrt_op, (t1,), {}, meta, True)
+
+ # t3 = x + t2
+ t3 = super().call_operator(add_op, (x, t2), {}, meta, True)
+
+ # out = ln(t3)
+ out = super().call_operator(log_op, (t3,), {}, meta, True)
+
+ return out
diff --git a/backends/arm/_passes/decompose_avg_pool2d.py b/backends/arm/_passes/decompose_avg_pool2d.py
index 0eb3ce34ecd..21ed6b518c7 100644
--- a/backends/arm/_passes/decompose_avg_pool2d.py
+++ b/backends/arm/_passes/decompose_avg_pool2d.py
@@ -45,7 +45,10 @@ def call_operator(self, op, args, kwargs, meta):
x = args[0]
kernel_h, kernel_w = args[1]
kernel_size = kernel_h * kernel_w
- stride_h, stride_w = args[2]
+ if len(args) > 2 and args[2] is not None:
+ stride_h, stride_w = args[2]
+ else:
+ stride_h, stride_w = kernel_h, kernel_w
pad_h, pad_w = new_pad_h, new_pad_w = args[3] if len(args) > 3 else (0, 0)
ceil_mode = args[4] if len(args) > 4 else False
count_include_pad = args[5] if len(args) > 5 else True
@@ -108,7 +111,14 @@ def call_operator(self, op, args, kwargs, meta):
x = super().call_operator(cat_op, (cat_nodes, 2), kwargs, meta)
new_pad_h = 0
- avgpool_args = (x, args[1], args[2], [new_pad_h, new_pad_w], ceil_mode, False)
+ avgpool_args = (
+ x,
+ args[1],
+ [stride_h, stride_w],
+ [new_pad_h, new_pad_w],
+ ceil_mode,
+ False,
+ )
x = super().call_operator(avgpool_op, avgpool_args, kwargs, meta)
# Multiply by factor (kernel_size / divisor_override) if divisor_override
diff --git a/backends/arm/_passes/decompose_cosh_pass.py b/backends/arm/_passes/decompose_cosh_pass.py
new file mode 100644
index 00000000000..a94cf9ecff0
--- /dev/null
+++ b/backends/arm/_passes/decompose_cosh_pass.py
@@ -0,0 +1,48 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+# For MI case
+edge_cosh = exir_ops.edge.aten.cosh.default
+
+
+class DecomposeCoshPass(ArmPass):
+ """
+ This pass replaces the cosh operator with a sequence of TOSA-equivalent operations that
+ compute the hyperbolic cosine using the formula:
+
+ cosh(x) = 0.5 * (e^x + e^(-x))
+
+ """
+
+ def call_operator(self, op, args, kwargs, meta, updated=False):
+ if op is not edge_cosh:
+ return super().call_operator(op, args, kwargs, meta, updated)
+
+ x = args
+
+ exp_op, mul_op, neg_op, add_op = (
+ exir_ops.edge.aten.exp.default,
+ exir_ops.edge.aten.mul.Scalar,
+ exir_ops.edge.aten.neg.default,
+ exir_ops.edge.aten.add.Tensor,
+ )
+
+ # exp1 = e^x
+ exp1 = super().call_operator(exp_op, x, {}, meta, updated=True)
+
+ # exp2 = e^(⁻x)
+ neg_x = super().call_operator(neg_op, x, {}, meta, updated=True)
+ exp2 = super().call_operator(exp_op, (neg_x,), {}, meta, updated=True)
+
+ # numer = exp1 + exp2
+ numer = super().call_operator(add_op, (exp1, exp2), {}, meta, updated=True)
+
+ # out = 0.5 * numer
+ out = super().call_operator(mul_op, (numer, 0.5), {}, meta, updated=True)
+
+ return out
diff --git a/backends/arm/_passes/decompose_cumsum_pass.py b/backends/arm/_passes/decompose_cumsum_pass.py
new file mode 100644
index 00000000000..155ccd11594
--- /dev/null
+++ b/backends/arm/_passes/decompose_cumsum_pass.py
@@ -0,0 +1,142 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from math import prod
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.backends.arm._passes.quant_args import QuantArgs
+
+from executorch.backends.transforms.utils import create_constant_placeholder
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import PassResult
+from torch.export.graph_signature import InputKind
+
+
+class DecomposeCumsumPass(ArmPass):
+ """
+ Decomposes cumsum into a 1D convolution with a kernel of ones.
+
+ For example, the cumsum of an input tensor [1, 1] is [1, 1 + 1] = [1, 2].
+ To decompose this, take the input tensor and pre-padded with len(input)-1 zeros and
+ slided over with a kernel [1,1], of length len(input):
+
+ Input: [0, 1, 1]
+ Kernel: [1, 1] = [1]
+ [1, 1] = [2]
+
+ Since pytorch only supports symmetric padding, in reality the result will have
+ an additional 1 calculated at the end, which leads to an required extra slice op.
+
+ To extend this to higher dimensions, the input is reshaped to [N, C, H, W] with
+ N =
+ C = 1
+ H =
+ W =
+ And the convolution is applied over dimension H.
+ """
+
+ def call(self, graph_module):
+ graph = graph_module.graph
+ targets = (exir_ops.edge.aten.cumsum.default, torch.ops.aten.cumsum.default)
+ modified = False
+ for node in list(graph.nodes):
+ if node.op != "call_function" or node.target not in targets:
+ continue
+
+ if len(node.args) != 2:
+ raise ValueError(
+ "Cumsum node should have exactly two arguments: input and dim."
+ )
+
+ # Get node data
+ input_node, dim = node.args
+ val = node.meta.get("val")
+ original_shape = list(val.shape)
+ dtype = input_node.meta.get("val").dtype
+ dim = dim % len(original_shape)
+
+ # Compute shapes
+ pre_cumsum_dim = prod(original_shape[:dim]) if dim > 0 else 1
+ cumsum_dim = original_shape[dim]
+ post_cumsum_dim = (
+ prod(original_shape[dim + 1 :]) if dim < len(original_shape) - 1 else 1
+ )
+ conv_shape = [
+ pre_cumsum_dim,
+ 1,
+ cumsum_dim,
+ post_cumsum_dim,
+ ]
+ pad_shape = [original_shape[dim] - 1, 0]
+ weight_shape = [1, 1, original_shape[dim], 1]
+
+ # Create convolution weight
+ with graph.inserting_before(list(graph.nodes)[0]):
+ weight_data = torch.ones(size=weight_shape, dtype=dtype)
+ weight_node = create_constant_placeholder(
+ self.exported_program,
+ graph,
+ node.name + "_kernel",
+ InputKind.PARAMETER,
+ weight_data,
+ )
+
+ # Create decomposed nodes
+ view_op = exir_ops.edge.aten.view_copy.default
+ conv_op = exir_ops.edge.aten.convolution.default
+ slice_op = exir_ops.edge.aten.slice_copy.Tensor
+ with graph.inserting_before(node):
+ # Reshape to 4D with
+ view_args = (input_node, conv_shape)
+ view_node = create_node(graph, view_op, args=view_args, from_node=node)
+
+ conv_args = (
+ view_node,
+ weight_node,
+ None,
+ [1, 1],
+ pad_shape,
+ [1, 1],
+ False,
+ [0],
+ 1,
+ )
+ conv_node = create_node(graph, conv_op, args=conv_args, from_node=node)
+
+ # The convolution is inserted after quantization, so we need to set our
+ # own quantization parameters for the weights here. However since the
+ # data is ones directly created as int8, they already have correct scale
+ # and so no scaling needs to be done, i.e. set scale=1.0, zero_point=0.0
+ if (
+ "input_qparams" in conv_node.meta
+ and len(conv_node.meta["input_qparams"]) > 0
+ ):
+ qparams = QuantArgs(1.0, 0.0, -128, 127, torch.int8)
+ conv_node.meta["input_qparams"][1] = qparams
+
+ slice_args = (conv_node, 2, 0, original_shape[dim])
+ slice_node = create_node(
+ graph, slice_op, args=slice_args, from_node=node
+ )
+
+ view_original_args = (slice_node, original_shape)
+ view_original_node = create_node(
+ graph, view_op, args=view_original_args, from_node=node
+ )
+
+ # Replace and remove original
+ node.replace_all_uses_with(view_original_node)
+ graph.erase_node(node)
+ modified = True
+
+ if modified:
+ # Cleanup
+ graph.eliminate_dead_code()
+ graph_module.recompile()
+ # Apply any operator-level transforms
+ graph_module = super().call(graph_module).graph_module
+ return PassResult(graph_module, modified)
diff --git a/backends/arm/_passes/decompose_elu_pass.py b/backends/arm/_passes/decompose_elu_pass.py
new file mode 100644
index 00000000000..743f1b46f4d
--- /dev/null
+++ b/backends/arm/_passes/decompose_elu_pass.py
@@ -0,0 +1,85 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+edge_elu_ops = (exir_ops.edge.aten.elu.default,)
+
+
+def get_elu_decomposition(op) -> tuple:
+ """
+ Returns the decomposition of the given aten.elu operation into
+ its equivalent TOSA-supported operations
+
+ This handles both edge dialect ops and core PyTorch ops. The decomposition strategy
+ is:
+ elu(x, y) → where(greater_or_eq(x, 0), (exp(x)-1), x)
+
+ Returns:
+ A tuple (expm1_op, ge_op, where_op, mul_op) corresponding to the appropriate operator
+ overloads for the input op.
+
+ Raises:
+ RuntimeError: If the provided operator is not a supported elu variant.
+ """
+
+ if op in edge_elu_ops:
+ return (
+ exir_ops.edge.aten.expm1.default,
+ exir_ops.edge.aten.ge.Scalar,
+ exir_ops.edge.aten.where.self,
+ exir_ops.edge.aten.mul.Scalar,
+ )
+
+ raise RuntimeError(f"Can't get elu decomposition for op {op}")
+
+
+class DecomposeEluPass(ArmPass):
+ """
+ A transformation pass that decomposes unsupported 'aten.elu' operations
+ into a combination of supported TOSA-equivalent operations.
+
+ Since TOSA does not provide a native ELU operator, this pass rewrites:
+ elu(x) → where(greater_or_eq(x, 0), (alpha*(exp(x)-1)), x)
+
+ Supported input ops:
+ - exir_ops.edge.aten.elu.Tensor(x)
+
+ These are replaced with:
+ - exir_ops.edge.aten.expm1.default
+ - exir_ops.edge.aten.ge.Scalar
+ - exir_ops.edge.aten.where.self
+ - exir_ops.edge.aten.mul.Scalar
+ """
+
+ def call_operator(self, op, args, kwargs, meta):
+ if op not in edge_elu_ops:
+ return super().call_operator(op, args, kwargs, meta, updated=False)
+
+ (
+ expm1_op,
+ ge_op,
+ where_op,
+ mul_op,
+ ) = get_elu_decomposition(op)
+
+ input = args[0]
+ alpha = args[1] if len(args) > 1 else 1.0
+
+ if alpha == 0:
+ relu_op = exir_ops.edge.aten.relu.default
+ return super().call_operator(relu_op, (input,), {}, meta, updated=True)
+
+ expm1_node = super().call_operator(expm1_op, (input,), {}, meta, updated=True)
+ mul_node = super().call_operator(
+ mul_op, (expm1_node, alpha), {}, meta, updated=True
+ )
+ ge_node = super().call_operator(ge_op, (input, 0.0), {}, meta, updated=True)
+ where_node = super().call_operator(
+ where_op, (ge_node, input, mul_node), {}, meta, updated=True
+ )
+
+ return where_node
diff --git a/backends/arm/_passes/decompose_expm1_pass.py b/backends/arm/_passes/decompose_expm1_pass.py
new file mode 100644
index 00000000000..5b1b90495b5
--- /dev/null
+++ b/backends/arm/_passes/decompose_expm1_pass.py
@@ -0,0 +1,135 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+edge_expm1_ops = (exir_ops.edge.aten.expm1.default,) # MI case
+
+
+def _get_expm1_decomposition(op) -> tuple:
+ """
+ Returns the decomposition of the given aten.expm1 operation into
+ its equivalent TOSA-supported operations
+
+ This handles both edge dialect ops and core PyTorch ops. The decomposition strategy
+ is:
+ expm1(x) → where(and(ge(x, -0.35), le(x, 0.35)), {taylor_series_expansion}, (exp(x)-1))
+
+ where {taylor_series_expansion} = x + (x^2/2) + (x^3/6) + (x^4/24)
+
+ Returns:
+ A tuple (op_pow, op_div, op_add, op_exp, op_sub, op_ge, op_where, op_le, op_and)
+ corresponding to the appropriate operator overloads for the input op.
+
+ Raises:
+ RuntimeError: If the provided operator is not a supported elu variant.
+ """
+ if op in edge_expm1_ops:
+ return (
+ exir_ops.edge.aten.pow.Tensor_Scalar,
+ exir_ops.edge.aten.div.Scalar,
+ exir_ops.edge.aten.add.Tensor,
+ exir_ops.edge.aten.exp.default,
+ exir_ops.edge.aten.sub.Scalar,
+ exir_ops.edge.aten.ge.Scalar,
+ exir_ops.edge.aten.where.self,
+ exir_ops.edge.aten.le.Scalar,
+ exir_ops.edge.aten.logical_and.default,
+ )
+
+ raise RuntimeError(f"Can't get expm1 decomposition for op {op}")
+
+
+class DecomposeExpm1Pass(ArmPass):
+ """
+ A transformation pass that decomposes unsupported 'aten.expm1' operations
+ into a combination of supported TOSA-equivalent operations.
+
+ Since TOSA does not provide a native expm1 operator, this pass rewrites:
+ expm1(x) → where(and(ge(x, -0.35), le(x, 0.35)), {taylor_series_expansion}, (exp(x)-1))
+ where {taylor_series_expansion} = x + (x^2/2) + (x^3/6) + (x^4/24)
+
+ Supported input ops:
+ - exir_ops.edge.aten.expm1.default(x)
+
+ These are replaced with:
+ - exir_ops.edge.aten.pow.Tensor_Scalar,
+ - exir_ops.edge.aten.div.Scalar,
+ - exir_ops.edge.aten.add.Tensor,
+ - exir_ops.edge.aten.exp.default,
+ - exir_ops.edge.aten.sub.Scalar,
+ - exir_ops.edge.aten.ge.Scalar,
+ - exir_ops.edge.aten.where.self,
+ - exir_ops.edge.aten.le.Scalar,
+ - exir_ops.edge.aten.logical_and.default
+ """
+
+ def call_operator(self, op, args, kwargs, meta):
+ if op not in edge_expm1_ops:
+ return super().call_operator(op, args, kwargs, meta, updated=False)
+
+ (
+ op_pow,
+ op_div,
+ op_add,
+ op_exp,
+ op_sub,
+ op_ge,
+ op_where,
+ op_le,
+ op_and,
+ ) = _get_expm1_decomposition(op)
+
+ input = args[0]
+
+ cutlo = -0.35
+ cuthi = 0.35
+
+ taylor_term_2_numerator = super().call_operator(
+ op_pow, (input, 2), {}, meta, updated=False
+ )
+ taylor_term_3_numerator = super().call_operator(
+ op_pow, (input, 3), {}, meta, updated=False
+ )
+ taylor_term_4_numerator = super().call_operator(
+ op_pow, (input, 4), {}, meta, updated=False
+ )
+
+ taylor_term_2 = super().call_operator(
+ op_div, (taylor_term_2_numerator, 2), {}, meta, updated=False
+ )
+ taylor_term_3 = super().call_operator(
+ op_div, (taylor_term_3_numerator, 6), {}, meta, updated=False
+ )
+ taylor_term_4 = super().call_operator(
+ op_div, (taylor_term_4_numerator, 24), {}, meta, updated=False
+ )
+
+ add_terms_1_2 = super().call_operator(
+ op_add, (input, taylor_term_2), {}, meta, updated=False
+ )
+ add_term_3 = super().call_operator(
+ op_add, (add_terms_1_2, taylor_term_3), {}, meta, updated=False
+ )
+ taylor_expansion = super().call_operator(
+ op_add, (add_term_3, taylor_term_4), {}, meta, updated=False
+ )
+
+ decomp_exp = super().call_operator(op_exp, (input,), {}, meta, updated=False)
+ decomp_sub = super().call_operator(
+ op_sub, (decomp_exp, 1.0), {}, meta, updated=False
+ )
+
+ ge = super().call_operator(op_ge, (input, cutlo), {}, meta, updated=False)
+ le = super().call_operator(op_le, (input, cuthi), {}, meta, updated=False)
+
+ cond_and = super().call_operator(op_and, (ge, le), {}, meta, updated=False)
+ where = super().call_operator(
+ op_where, (cond_and, taylor_expansion, decomp_sub), {}, meta, updated=True
+ )
+
+ return where
diff --git a/backends/arm/_passes/decompose_glu_pass.py b/backends/arm/_passes/decompose_glu_pass.py
new file mode 100644
index 00000000000..183dc89cf61
--- /dev/null
+++ b/backends/arm/_passes/decompose_glu_pass.py
@@ -0,0 +1,75 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+# For FP case
+edge_glu = exir_ops.edge.aten.glu.default
+
+# For INT case
+aten_glu = torch.ops.aten.glu.default
+
+
+def get_ops(op):
+ """Returns the appropriate operator functions based on the input operator."""
+ if op == edge_glu:
+ return (
+ exir_ops.edge.aten.mul.Tensor,
+ exir_ops.edge.aten.sigmoid.default,
+ exir_ops.edge.aten.slice_copy.Tensor,
+ )
+ elif op == aten_glu:
+ return (
+ torch.ops.aten.mul.Tensor,
+ torch.ops.aten.sigmoid.default,
+ torch.ops.aten.slice_copy.Tensor,
+ )
+ else:
+ raise ValueError(f"Unsupported operator: {op}")
+
+
+class DecomposeGluPass(ArmPass):
+ """Decomposes the GLU operator into hadamard product and sigmoid."""
+
+ def call_operator(self, op, args, kwargs, meta):
+ if op not in [edge_glu, aten_glu]:
+ return super().call_operator(op, args, kwargs, meta)
+
+ hadamard_prod, sigmoid, slice_op = get_ops(op)
+ X = args[0]
+
+ dim = args[1] if len(args) > 1 else kwargs.get("dim", -1)
+
+ if "val" not in X.node.meta:
+ raise Exception("Could not get dimension metadata in input.")
+
+ if dim < 0:
+ dim += X.node.meta["val"].dim()
+
+ n = X.node.meta["val"].size(dim)
+
+ if n % 2:
+ raise RuntimeError(
+ f"glu expects an even split along dim={dim}, got size {n}"
+ )
+
+ middle = n // 2
+
+ T1 = super().call_operator(
+ slice_op, (X, dim, 0, middle), {}, meta, updated=True
+ )
+
+ T2 = super().call_operator(
+ slice_op, (X, dim, middle, n), {}, meta, updated=True
+ )
+
+ T2_sigmoid = super().call_operator(sigmoid, (T2,), {}, meta, updated=True)
+
+ return super().call_operator(
+ hadamard_prod, (T1, T2_sigmoid), {}, meta, updated=True
+ )
diff --git a/backends/arm/_passes/decompose_grouped_conv.py b/backends/arm/_passes/decompose_grouped_conv.py
index 6bfdf4dea5e..ce9fe9c9937 100644
--- a/backends/arm/_passes/decompose_grouped_conv.py
+++ b/backends/arm/_passes/decompose_grouped_conv.py
@@ -6,7 +6,7 @@
from copy import copy
import torch
-from executorch.backends.arm.tosa_quant_utils import QuantArgs
+from executorch.backends.arm._passes.quant_args import QuantArgs
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass
diff --git a/backends/arm/_passes/decompose_logit_pass.py b/backends/arm/_passes/decompose_logit_pass.py
new file mode 100644
index 00000000000..40e2b22cb54
--- /dev/null
+++ b/backends/arm/_passes/decompose_logit_pass.py
@@ -0,0 +1,96 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+# For FP case
+edge_logit = exir_ops.edge.aten.logit.default
+# For INT case
+aten_logit = torch.ops.aten.logit.default
+
+
+def get_ops(op):
+ """Returns the appropriate operator functions based on the input operator."""
+ if op == edge_logit:
+ return (
+ exir_ops.edge.aten.log.default,
+ exir_ops.edge.aten.add.Scalar,
+ exir_ops.edge.aten.reciprocal.default,
+ exir_ops.edge.aten.mul.Tensor,
+ exir_ops.edge.aten.mul.Scalar,
+ exir_ops.edge.aten.clamp.default,
+ )
+ elif op == aten_logit:
+ return (
+ torch.ops.aten.log.default,
+ torch.ops.aten.add.Scalar,
+ torch.ops.aten.reciprocal.default,
+ torch.ops.aten.mul.Tensor,
+ torch.ops.aten.mul.Scalar,
+ torch.ops.aten.clamp.default,
+ )
+ else:
+ raise ValueError(f"Unsupported operator: {op}")
+
+
+class DecomposeLogitPass(ArmPass):
+ """
+ Decomposes the `logit` operator into a sequence of primitive operations.
+
+ If `eps` is provided, the input tensor `x` is first clamped to the range
+ [eps, 1 - eps].
+
+ The decomposition follows the identity:
+
+ logit(x) = log(x / (1 - x))
+
+ Examples:
+
+ logit(x) becomes:
+ log(x * reciprocal((-1) * x + 1))
+
+ logit(x, eps) becomes:
+ y = clamp(x, eps, 1 - eps)
+ log(y * reciprocal((-1) * y + 1))
+ """
+
+ def call_operator(self, op, args, kwargs, meta):
+ if op not in [edge_logit, aten_logit]:
+ return super().call_operator(op, args, kwargs, meta)
+
+ X = args[0]
+ eps = args[1] if len(args) > 1 else kwargs.get("eps", None)
+
+ (
+ log_op,
+ add_scalar_op,
+ recip_op,
+ mul_tensor_op,
+ mul_scalar_op,
+ clamp_op,
+ ) = get_ops(op)
+
+ if eps is not None:
+ X = super().call_operator(
+ clamp_op, (X, eps, 1.0 - eps), {}, meta, updated=True
+ )
+
+ neg_X = super().call_operator(mul_scalar_op, (X, -1.0), {}, meta, updated=True)
+
+ denom = super().call_operator(
+ add_scalar_op, (neg_X, 1.0), {}, meta, updated=True
+ )
+
+ frac = super().call_operator(recip_op, (denom,), {}, meta, updated=True)
+
+ log_input = super().call_operator(
+ mul_tensor_op, (X, frac), {}, meta, updated=True
+ )
+
+ return super().call_operator(log_op, (log_input,), {}, meta, updated=True)
diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
index 215bf21db2d..491b404f0a4 100644
--- a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
+++ b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
@@ -16,7 +16,8 @@
is_param_node,
)
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
+from executorch.backends.arm._passes.quant_args import QuantArgs
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.dialects.edge._ops import EdgeOpOverload
@@ -109,7 +110,7 @@ def fold_and_annotate_arg(
return
arg_quant_params = None
- if arg.target in dq_ops:
+ if arg.target in DQ_OPS:
args = arg.args
scales = args[1]
if (
@@ -137,9 +138,9 @@ def fold_and_annotate_arg(
if input_qparams is not None:
node.meta["input_qparams"][i] = input_qparams
for n in nodes_to_remove:
- if n.target not in dq_ops:
+ if n.target not in DQ_OPS:
raise RuntimeError(
- f"Expected one of {dq_ops} dq_op, got {n.target}"
+ f"Expected one of {DQ_OPS} dq_op, got {n.target}"
)
node.replace_input_with(n, cast(Node, n.args[0]))
@@ -154,7 +155,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
if n.op != "call_function":
continue
# Don't fold chains of quant-ops into each other.
- if n.target in (*q_ops, *dq_ops):
+ if n.target in (*Q_OPS, *DQ_OPS):
continue
# Make sure we haven't already set qparams meta information on the node
@@ -184,7 +185,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
# Copy the users, since we are modifying it.
users_copy = copy.copy(n.users)
for i, user in enumerate(users_copy):
- if user.target not in q_ops:
+ if user.target not in Q_OPS:
continue
# quantization node found here, store the quantization parameters in meta value
@@ -221,7 +222,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
# Make sure we have a quantized operator
user = list(n.users)[0]
- if user.target not in q_ops:
+ if user.target not in Q_OPS:
continue
qargs = QuantArgs.from_operator(user.target, user.args)
diff --git a/backends/arm/_passes/fuse_constant_ops_pass.py b/backends/arm/_passes/fuse_constant_ops_pass.py
index f70614d6231..f49565e3c38 100644
--- a/backends/arm/_passes/fuse_constant_ops_pass.py
+++ b/backends/arm/_passes/fuse_constant_ops_pass.py
@@ -6,6 +6,7 @@
import logging
import torch._export.utils
+import torch.fx
from executorch.backends.arm._passes.arm_pass_utils import (
get_constant_placeholder_kind,
get_first_fake_tensor,
@@ -50,22 +51,26 @@ def _fuse_nodes(self, node) -> bool:
the operations already carried out on the data.
"""
- # Extract tensors and args from the node
- data_list = [
- get_param_tensor(self.exported_program, input_node)
- for input_node in node.all_input_nodes
- ]
+ input_nodes = list(node.all_input_nodes)
+ qparams = node.meta.get("input_qparams", None)
- args = node.args[len(node.all_input_nodes) :]
- kwargs = node.kwargs
+ def resolve_arg(arg):
+ if isinstance(arg, torch.fx.Node) and arg in input_nodes:
+ idx = input_nodes.index(arg)
+ t = get_param_tensor(self.exported_program, arg)
+ if qparams:
+ t = qparams[idx].dequantize_value(t)
+ return t
+ if isinstance(arg, tuple):
+ return tuple(resolve_arg(x) for x in arg)
+ if isinstance(arg, list):
+ return [resolve_arg(x) for x in arg]
+ return arg
- if "input_qparams" in node.meta and len(node.meta["input_qparams"]) > 0:
- for i in range(len(node.all_input_nodes)):
- q_params = node.meta["input_qparams"][i]
- data_list[i] = q_params.dequantize_value(data_list[i])
+ new_args = tuple(resolve_arg(a) for a in node.args)
+ new_kwargs = {k: resolve_arg(v) for k, v in node.kwargs.items()}
- # Run the op on the extracted tensor
- data = node.target(*data_list, *args, **kwargs)
+ data = node.target(*new_args, **new_kwargs)
# Only fuse if the tensor does not get bigger.
if data.numel() > get_first_fake_tensor(node).numel():
@@ -102,7 +107,11 @@ def call(self, graph_module):
for node in graph_module.graph.nodes:
if node.op != "call_function":
continue
- if node.target == torch.ops.tosa._table.default:
+ if node.target in [
+ exir_ops.backend.tosa.TABLE.default,
+ exir_ops.backend.tosa.RESCALE.default,
+ exir_ops.backend.tosa.TRANSPOSE.default,
+ ]:
continue
input_nodes = node.all_input_nodes
diff --git a/backends/arm/_passes/fuse_equal_placeholders_pass.py b/backends/arm/_passes/fuse_equal_placeholders_pass.py
index 664a0f8ea6c..5631e2f32e9 100644
--- a/backends/arm/_passes/fuse_equal_placeholders_pass.py
+++ b/backends/arm/_passes/fuse_equal_placeholders_pass.py
@@ -3,6 +3,9 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
+import hashlib
+from collections import defaultdict
+
import torch
from executorch.backends.arm._passes.arm_pass_utils import (
get_constant_placeholder_kind,
@@ -21,7 +24,7 @@ class FuseEqualPlaceholdersPass(ExportPass):
"""
This pass optimizes memory usage by finding constant placeholders
pointing to identical tensors and fusing them to one single placeholder
- with multiple users.
+ with multiple users, using a cache for faster comparison.
"""
def __init__(self, exported_program: ExportedProgram):
@@ -30,58 +33,54 @@ def __init__(self, exported_program: ExportedProgram):
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
modified = False
- const_placeholder_nodes = []
- for node in graph_module.graph.nodes:
- if is_param_node(self.exported_program, node):
- const_placeholder_nodes.append(node)
-
- while const_placeholder_nodes:
- # Find equal tensors
- node1 = const_placeholder_nodes.pop()
- eq_nodes = [node1]
- tensor1 = get_param_tensor(self.exported_program, node1)
- if tensor1 is None:
+ # Build a cache of params: mapping hash_key -> list of (node, tensor)
+ hash_buckets = defaultdict(list)
+ for node in graph_module.graph.nodes:
+ if not is_param_node(self.exported_program, node):
continue
+ tensor = get_param_tensor(self.exported_program, node)
+ if tensor is None:
+ continue
+ # Create a lightweight fingerprint: dtype + shape + SHA1 of raw bytes
+ # Ensure tensor is on CPU and contiguous
+ t_cpu = tensor.detach().cpu().contiguous()
+ data_bytes = t_cpu.numpy().tobytes()
+ key = (
+ str(t_cpu.dtype),
+ tuple(t_cpu.shape),
+ hashlib.sha1(data_bytes).hexdigest(),
+ )
+ hash_buckets[key].append((node, t_cpu))
- for node2 in const_placeholder_nodes:
- tensor2 = get_param_tensor(self.exported_program, node2)
- if tensor2 is None:
- continue
-
- if (
- tensor1.dtype == tensor2.dtype
- and tensor1.shape == tensor2.shape
- and torch.allclose(tensor1, tensor2, atol=1e-08)
- ):
- eq_nodes.append(node2)
+ # For each bucket with more than one entry, fuse:
+ for nodes_tensors in hash_buckets.values():
+ if len(nodes_tensors) < 2:
+ continue
- if len(eq_nodes) > 1:
- common_name = node1.name + "_common"
- common_kind = get_constant_placeholder_kind(
- self.exported_program, node1
+ # Create a new placeholder from first in list of equal placeholders.
+ rep_node, rep_tensor = nodes_tensors[0]
+ common_name = rep_node.name + "_common"
+ common_kind = get_constant_placeholder_kind(self.exported_program, rep_node)
+ common_persistent = True
+ with graph_module.graph.inserting_before(rep_node):
+ common_node = create_constant_placeholder(
+ self.exported_program,
+ graph_module.graph,
+ common_name,
+ common_kind,
+ rep_tensor,
+ common_persistent,
)
- common_persisten_buffer = True
-
- with graph_module.graph.inserting_before(node1):
- common_node = create_constant_placeholder(
- self.exported_program,
- graph_module.graph,
- common_name,
- common_kind,
- tensor1,
- common_persisten_buffer,
- )
-
- for eq_node in eq_nodes:
- eq_node.replace_all_uses_with(common_node)
- delete_constant_placeholder(self.exported_program, eq_node)
- if eq_node != node1:
- const_placeholder_nodes.remove(eq_node)
+ # Replace uses and delete duplicates
+ for node, _ in nodes_tensors:
+ node.replace_all_uses_with(common_node)
+ delete_constant_placeholder(self.exported_program, node)
modified = True
if modified:
graph_module.recompile()
graph_module = super().call(graph_module).graph_module
+
return PassResult(graph_module=graph_module, modified=modified)
diff --git a/backends/arm/_passes/fuse_quantized_activation_pass.py b/backends/arm/_passes/fuse_quantized_activation_pass.py
index f70d6d8755b..46a7d7f6f98 100644
--- a/backends/arm/_passes/fuse_quantized_activation_pass.py
+++ b/backends/arm/_passes/fuse_quantized_activation_pass.py
@@ -6,7 +6,8 @@
# pyre-unsafe
import torch
-from executorch.backends.arm.tosa_quant_utils import q_ops, QuantArgs
+from executorch.backends.arm._passes.quant_args import QuantArgs
+from executorch.backends.arm.constants import Q_OPS
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx import Node
@@ -21,7 +22,7 @@ def _is_fuseable_quantized_activation(node: Node):
min_val = node.args[1]
is_fuseable = min_val == 0
- is_quantized = len(node.users) == 1 and next(iter(node.users)).target in q_ops
+ is_quantized = len(node.users) == 1 and next(iter(node.users)).target in Q_OPS
if is_fuseable and is_quantized:
quant_node = next(iter(node.users))
quant_args = QuantArgs.from_operator(quant_node.target, quant_node.args)
diff --git a/backends/arm/_passes/insert_rescales_pass.py b/backends/arm/_passes/insert_rescales_pass.py
index 97b8fb15711..7f75aecf24c 100644
--- a/backends/arm/_passes/insert_rescales_pass.py
+++ b/backends/arm/_passes/insert_rescales_pass.py
@@ -3,69 +3,25 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
-import logging
from copy import copy
from typing import cast
-import torch
from executorch.backends.arm._passes.arm_pass_utils import create_node
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
+from executorch.backends.arm._passes.quant_args import QuantArgs
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
+from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
-from torch import Tensor
from torch.fx import GraphModule, Node
-from torch.library import custom_op, register_fake
-
-logger = logging.getLogger(__name__)
-
-
-@custom_op("tosa::_rescale", mutates_args=()) # type: ignore[misc]
-def rescale(
- x: Tensor, dtype: torch.dtype, scale: float, in_zp: int, out_zp: int
-) -> Tensor:
- logger.warning(
- "Ran default implementation of tosa::_rescale."
- "This op is meant to always be inserted inside a partition and a correct default implementation is not implemented."
- )
- # Clone is needed to not return reference when rescaling to same dtype.
- # This is a neccessary requirement for non-mutating custom ops.
- return x.to(dtype=dtype).clone()
-
-
-@register_fake("tosa::_rescale") # type: ignore[misc]
-def rescale_fake(
- x: Tensor, dtype: torch.dtype, scale: float, in_zp: int, out_zp: int
-) -> Tensor:
- """Casts the input tensor to dtype `dtype` to produce the correct tensor meta for a _rescale op.
- Additionally validates TOSA constraints of a RESCALE op.
- """
- if dtype not in (torch.int32, torch.int8, torch.int16):
- raise NotImplementedError(
- f"tosa::rescale currently only supports int32, int16 and int8, not {dtype}"
- )
- if dtype in (torch.int32, torch.int16) and out_zp != 0:
- raise ValueError(
- f"TOSA requires output_zp to be zero when the output dtype is {dtype}."
- )
- if x.dtype in (torch.int32, torch.int16) and in_zp != 0:
- raise ValueError(
- f"TOSA requires input_zp to be zero when the input dtype is {dtype}"
- )
- if x.dtype == torch.int8 and not -128 <= in_zp <= 127:
- raise ValueError(f"{in_zp=} outside valid range (-128,127) for int8.")
- if dtype == torch.int8 and not -128 <= out_zp <= 127:
- raise ValueError(f"{out_zp=} outside valid range (-128,127) for int8.")
-
- return x.to(dtype=dtype).clone()
class InsertRescalePass(ExportPass):
"""Finds patterns of dq -> q, and replaces them
- with passthrough_to_tosa::rescales.
+ with backend dialect tosa::RESCALE op.
- Does not garantuee that the dtypes and zero points are valid
+ Does not guarantee that the dtypes and zero points are valid
in TOSA, that is the job of the quantization annotator that
produced the dq and q nodes. The TOSA constraints are validated
- in the fake implementation of passthrough_to_tosa:rescale.
+ in the fake implementation of.
"""
def fold_dq_q_to_rescale(self, node: Node, user: Node, graph_module: GraphModule):
@@ -76,7 +32,7 @@ def fold_dq_q_to_rescale(self, node: Node, user: Node, graph_module: GraphModule
with graph_module.graph.inserting_before(node):
rescale_node = create_node(
graph_module.graph,
- torch.ops.tosa._rescale.default,
+ exir_ops.backend.tosa.RESCALE.default,
(
node.all_input_nodes[0],
q_args.dtype,
@@ -94,11 +50,11 @@ def call(self, graph_module: GraphModule) -> PassResult:
for node in graph_module.graph.nodes:
node = cast(Node, node)
- if node.target not in dq_ops:
+ if node.target not in DQ_OPS:
continue
# Copy users since we remove them while iterating, modyfing the node.users list.
for user in copy(node.users):
- if user.target in q_ops:
+ if user.target in Q_OPS:
self.fold_dq_q_to_rescale(node, user, graph_module)
modified = True
if len(node.users) == 0:
diff --git a/backends/arm/_passes/insert_table_ops.py b/backends/arm/_passes/insert_table_ops.py
index 9a3e98b651b..fb5d7de5e12 100644
--- a/backends/arm/_passes/insert_table_ops.py
+++ b/backends/arm/_passes/insert_table_ops.py
@@ -10,27 +10,18 @@
import torch
from executorch.backends.arm._passes.arm_pass_utils import create_node
-from executorch.backends.arm.tosa_quant_utils import QuantArgs
+from executorch.backends.arm._passes.quant_args import QuantArgs
+from executorch.backends.transforms.utils import create_constant_placeholder
+
from executorch.exir import ExportedProgram
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.dialects.edge._ops import EdgeOpOverload
from executorch.exir.pass_base import ExportPass, PassResult
+from torch.export.graph_signature import InputKind
from torch.fx import GraphModule
from torch.fx.node import Node
-from torch.library import impl, Library
-
-lib = Library("tosa", "DEF")
-lib.define("_table(Tensor self) -> Tensor")
-
-
-@impl(lib, "_table")
-def _table_impl(*args, **kwargs): # pyre-ignore
- in_dtype = args[0].dtype
- if in_dtype == torch.int8:
- return args[0]
- return args[0].to(dtype=torch.int32)
class TableOps:
@@ -43,6 +34,7 @@ class TableOps:
exir_ops.edge.aten.ceil.default: torch.ceil,
exir_ops.edge.aten.erf.default: torch.erf,
exir_ops.edge.aten.exp.default: torch.exp,
+ exir_ops.edge.aten.expm1.default: torch.expm1,
exir_ops.edge.aten.floor.default: torch.floor,
exir_ops.edge.aten.log.default: torch.log,
exir_ops.edge.aten.reciprocal.default: torch.reciprocal,
@@ -58,12 +50,16 @@ class TableOps:
exir_ops.edge.aten.sinh.default: torch.sinh,
exir_ops.edge.aten.acosh.default: torch.acosh,
exir_ops.edge.aten.asin.default: torch.asin,
+ exir_ops.edge.aten.asinh.default: torch.asinh,
+ exir_ops.edge.aten.cosh.default: torch.cosh,
+ exir_ops.edge.aten.acos.default: torch.acos,
}
# Targets that must be treated explicitly
special_table_ops: Set[EdgeOpOverload] = {
exir_ops.edge.aten.pow.Tensor_Scalar,
exir_ops.edge.aten.gelu.default,
+ exir_ops.edge.aten.elu.default,
}
def __init__(self, exported_program: ExportedProgram):
@@ -97,6 +93,11 @@ def __getitem__(self, node: Node):
return lambda x: torch.nn.functional.gelu(
x, approximate=approximate
).flatten()
+ case exir_ops.edge.aten.elu.default:
+ input_alpha = cast(int, node.kwargs["alpha"])
+ return lambda x: torch.nn.functional.elu(
+ x, alpha=input_alpha
+ ).flatten()
case _:
# Op must be handled if it's inside self.special_ops
raise AssertionError("Unhandled table operation")
@@ -238,13 +239,8 @@ def call(self, graph_module: GraphModule) -> PassResult:
# We only want to replace the node if it's quantized
continue
# Create table node
- with graph_module.graph.inserting_before(node):
- table_node = create_node(
- graph=graph_module.graph,
- op_target=torch.ops.tosa._table.default,
- args=(node.args[0],),
- )
- output_node = table_node
+ insert_pos = list(node.graph.nodes)[0]
+ with graph_module.graph.inserting_before(insert_pos):
# Expect exactly one quantization parameter for input and output
if len(input_qparams) != 1:
raise ValueError(
@@ -264,27 +260,37 @@ def call(self, graph_module: GraphModule) -> PassResult:
out_quantargs=output_qparams[0],
)
# Register buffer in self.exported_program.state_dict
- # When the graph is retraced, the implementation _table is used and the suffix _default disappears from the node name
- # Remove it here to make it possible to find in the node_visitor
- self.register_buffer(
- buffer_name=table_node.name.replace("_default", ""), buffer=buffer
+ const_table_node = create_constant_placeholder(
+ exp_program=self.exported_program,
+ graph=node.graph,
+ kind=InputKind.BUFFER,
+ name=node.name + "_table_constant",
+ data=buffer,
+ persistent_buffer=True,
)
+ # Create table node
+ with graph_module.graph.inserting_before(node):
+ table_op_node = create_node(
+ graph=graph_module.graph,
+ op_target=exir_ops.backend.tosa.TABLE.default,
+ args=(node.args[0], const_table_node),
+ )
+ output_node = table_op_node
+
if lshift != 0:
scale = 2.0**lshift
rescale_node = create_node(
graph=graph_module.graph,
- op_target=torch.ops.tosa._rescale.default,
- args=(table_node, output_qparams[0].dtype, scale, 0, 0),
+ op_target=exir_ops.backend.tosa.RESCALE.default,
+ args=(table_op_node, output_qparams[0].dtype, scale, 0, 0),
)
output_node = rescale_node
node.replace_all_uses_with(output_node)
-
graph_module.graph.erase_node(node)
-
- output_node.meta["input_qparams"] = input_qparams
- output_node.meta["output_qparams"] = output_qparams
+ table_op_node.meta["input_qparams"] = input_qparams
+ table_op_node.meta["output_qparams"] = output_qparams
modified = True
if modified:
diff --git a/backends/arm/_passes/mm_to_bmm_pass.py b/backends/arm/_passes/mm_to_bmm_pass.py
index 519b755080c..69d8573013e 100644
--- a/backends/arm/_passes/mm_to_bmm_pass.py
+++ b/backends/arm/_passes/mm_to_bmm_pass.py
@@ -12,7 +12,7 @@
get_first_fake_tensor,
insert_q_dq_pair,
)
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx import Node
@@ -56,7 +56,7 @@ def call(self, graph_module: torch.fx.GraphModule):
node.replace_input_with(input_node, unsqueeze_before)
# If Quantized we must insert unsqueeze --> q --> dq --> node
- if input_node.target in dq_ops:
+ if input_node.target in DQ_OPS:
q_params = input_node.args[1:]
insert_q_dq_pair(graph, unsqueeze_before, q_params, from_node=node)
@@ -89,7 +89,7 @@ def call(self, graph_module: torch.fx.GraphModule):
user.replace_input_with(bmm_node, squeeze_after)
# If quantized, insert mm --> q --> dq --> squeeze
- if all(original_user.target in q_ops for original_user in original_users):
+ if all(original_user.target in Q_OPS for original_user in original_users):
q_params = original_users[0].args[1:]
insert_q_dq_pair(graph, bmm_node, q_params, from_node=node)
diff --git a/backends/arm/_passes/quant_args.py b/backends/arm/_passes/quant_args.py
new file mode 100644
index 00000000000..974d6dfdbd3
--- /dev/null
+++ b/backends/arm/_passes/quant_args.py
@@ -0,0 +1,125 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Any, cast, NamedTuple
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+
+exir_ops = cast(Any, exir_ops)
+from executorch.backends.arm.constants import PER_CHANNEL_QDQ_OPS, PER_TENSOR_QDQ_OPS
+from torch import Tensor
+
+
+class QuantArgs(NamedTuple):
+ scale: list[float] | float
+ zp: list[int] | int
+ qmin: int
+ qmax: int
+ dtype: torch.dtype
+ axis: int = 0
+ per_channel: bool = False
+
+ def quantize_value(self, x: torch.Tensor | float) -> Tensor:
+ """Quantizes the input tensor or value to a quantized tensor. If the input is
+ not a tensor, it is converted to a tensor first. If self.per_channel is True,
+ the quantization is done per channel, otherwise it is done per tensor.
+ """
+ if not isinstance(x, torch.Tensor):
+ x = torch.Tensor([x])
+ x = x.to(torch.float32)
+ if self.per_channel:
+ q_op = exir_ops.edge.quantized_decomposed.quantize_per_channel.default
+ args = (
+ x,
+ torch.tensor(self.scale),
+ torch.tensor(self.zp),
+ self.axis,
+ self.qmin,
+ self.qmax,
+ self.dtype,
+ )
+ else:
+ q_op = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
+ args = (x, self.scale, self.zp, self.qmin, self.qmax, self.dtype) # type: ignore[assignment]
+ return q_op(*args)
+
+ def dequantize_value(self, qx: torch.Tensor) -> torch.Tensor:
+ """Dequantizes the input tensor or value to a dequantized tensor If the input
+ is not a tensor, it is converted to a tensor first. If self.per_channel is True,
+ the dequantization is done per channel, otherwise it is done per tensor.
+ """
+ if self.per_channel:
+ dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_channel.default
+ args = (
+ qx,
+ torch.tensor(self.scale),
+ torch.tensor(self.zp),
+ self.axis,
+ self.qmin,
+ self.qmax,
+ self.dtype,
+ )
+ else:
+ dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
+ args = (qx, self.scale, self.zp, self.qmin, self.qmax, self.dtype) # type: ignore[assignment]
+ return dq_op(*args)
+
+ @classmethod
+ def from_operator(cls, op, args):
+ if op in PER_TENSOR_QDQ_OPS:
+ return cls(
+ scale=cast(float, args[1]),
+ zp=cast(int, args[2]),
+ qmin=cast(int, args[3]),
+ qmax=cast(int, args[4]),
+ dtype=cast(torch.dtype, args[5]),
+ axis=0,
+ per_channel=False,
+ )
+ elif op in PER_CHANNEL_QDQ_OPS:
+ return cls(
+ scale=cast(list[float], args[1].tolist()),
+ zp=cast(list[int], args[2].tolist()),
+ axis=cast(int, args[3]),
+ qmin=cast(int, args[4]),
+ qmax=cast(int, args[5]),
+ dtype=cast(torch.dtype, args[6]),
+ per_channel=True,
+ )
+ else:
+ # We're only handling per tensor and per channel quantization
+ raise NotImplementedError(f"Unsupported quantization operation: {op}")
+
+ def get_scale_per_tensor(self) -> float:
+ if not isinstance(self.scale, float):
+ raise TypeError(
+ f"Expected scale {self.scale} to be a float but found scale of "
+ f"type {type(self.scale)}"
+ )
+ return self.scale
+
+ def get_zp_per_tensor(self) -> int:
+ if not isinstance(self.zp, int):
+ raise TypeError(
+ f"Expected zero point {self.zp} to be an int but found zp of "
+ f"type {type(self.zp)}"
+ )
+ return self.zp
+
+ def get_scale_per_channel(self) -> list[float]:
+ if not isinstance(self.scale, list):
+ raise TypeError(
+ f"Expected scale {self.scale} to be a list but found scale of "
+ f"type {type(self.scale)}"
+ )
+ return self.scale
+
+ def get_zp_per_channel(self) -> list[int]:
+ if not isinstance(self.zp, list):
+ raise TypeError(
+ f"Expected zero point {self.zp} to be a list but found zp of "
+ f"type {type(self.zp)}"
+ )
+ return self.zp
diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py
index fc638647b46..909be88f867 100644
--- a/backends/arm/arm_backend.py
+++ b/backends/arm/arm_backend.py
@@ -57,7 +57,7 @@ def vgf_compile_spec(
f"Invalid TOSA version: {tosa_version}"
)
- if not ("FP" or "INT" in tosa_profiles):
+ if "FP" not in tosa_profiles and "INT" not in tosa_profiles:
raise ValueError(
"Arm backend only supports converter-backend for FP or INT. "
f"Invalid TOSA profile: {tosa_profiles}"
@@ -128,7 +128,7 @@ def ethosu_compile_spec(
self.compiler_flags.append("--output-format=raw")
self.compiler_flags.append("--debug-force-regor")
- base_tosa_version = "TOSA-1.0+INT"
+ base_tosa_version = "TOSA-1.0+INT+int16"
if "u55" in target:
# Add the Ethos-U55 extension marker
base_tosa_version += "+u55"
@@ -217,13 +217,6 @@ def is_vgf(compile_spec: List[CompileSpec]) -> bool:
return False
-def get_tosa_spec(compile_spec: List[CompileSpec]) -> TosaSpecification:
- for spec in compile_spec:
- if spec.key == "tosa_spec":
- return TosaSpecification.create_from_string(spec.value.decode())
- raise ValueError("Could not find TOSA version in CompileSpec")
-
-
def get_intermediate_path(compile_spec: List[CompileSpec]) -> Optional[str]:
for spec in compile_spec:
if spec.key == "debug_artifact_path":
diff --git a/backends/arm/common/__init__.py b/backends/arm/common/__init__.py
new file mode 100644
index 00000000000..c8d1c683da3
--- /dev/null
+++ b/backends/arm/common/__init__.py
@@ -0,0 +1,4 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/backends/arm/common/debug.py b/backends/arm/common/debug.py
new file mode 100644
index 00000000000..bca6c06d140
--- /dev/null
+++ b/backends/arm/common/debug.py
@@ -0,0 +1,87 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+from typing import Optional
+
+import serializer.tosa_serializer as ts # type: ignore
+import torch
+from executorch.exir.print_program import inspect_node
+
+logger = logging.getLogger(__name__)
+
+
+def debug_node(node: torch.fx.Node, graph_module: torch.fx.GraphModule):
+ # Debug output of node information
+ logger.info(get_node_debug_info(node, graph_module))
+
+
+def get_node_debug_info(
+ node: torch.fx.Node, graph_module: torch.fx.GraphModule | None = None
+) -> str:
+ output = (
+ f" {inspect_node(graph=graph_module.graph, node=node)}\n"
+ if graph_module
+ else ""
+ "-- NODE DEBUG INFO --\n"
+ f" Op is {node.op}\n"
+ f" Name is {node.name}\n"
+ f" Node target is {node.target}\n"
+ f" Node args is {node.args}\n"
+ f" Node kwargs is {node.kwargs}\n"
+ f" Node users is {node.users}\n"
+ " Node.meta = \n"
+ )
+ for k, v in node.meta.items():
+ if k == "stack_trace":
+ matches = v.split("\n")
+ output += " 'stack_trace =\n"
+ for m in matches:
+ output += f" {m}\n"
+ else:
+ output += f" '{k}' = {v}\n"
+
+ if isinstance(v, list):
+ for i in v:
+ output += f" {i}\n"
+ return output
+
+
+# Output TOSA flatbuffer and test harness file
+def debug_tosa_dump(tosa_graph: ts.TosaSerializer, path: str, suffix: str = ""):
+ filename = f"output{suffix}.tosa"
+
+ logger.info(f"Emitting debug output to: {path=}, {suffix=}")
+
+ os.makedirs(path, exist_ok=True)
+
+ fb = tosa_graph.serialize()
+ js = tosa_graph.writeJson(filename)
+
+ filepath_tosa_fb = os.path.join(path, filename)
+ with open(filepath_tosa_fb, "wb") as f:
+ f.write(fb)
+ if not os.path.exists(filepath_tosa_fb):
+ raise IOError("Failed to write TOSA flatbuffer")
+
+ filepath_desc_json = os.path.join(path, f"desc{suffix}.json")
+ with open(filepath_desc_json, "w") as f:
+ f.write(js)
+ if not os.path.exists(filepath_desc_json):
+ raise IOError("Failed to write TOSA JSON")
+
+
+def debug_fail(
+ node,
+ graph_module,
+ tosa_graph: Optional[ts.TosaSerializer] = None,
+ path: Optional[str] = None,
+):
+ logger.warning("Internal error due to poorly handled node:")
+ if tosa_graph is not None and path is not None:
+ debug_tosa_dump(tosa_graph, path)
+ logger.warning(f"Debug output captured in '{path}'.")
+ debug_node(node, graph_module)
diff --git a/backends/arm/constants.py b/backends/arm/constants.py
new file mode 100644
index 00000000000..fd8710d3ead
--- /dev/null
+++ b/backends/arm/constants.py
@@ -0,0 +1,31 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, cast, Final
+
+from executorch.exir.dialects._ops import ops as exir_ops
+
+exir_ops = cast(Any, exir_ops)
+
+qd = exir_ops.edge.quantized_decomposed
+
+QUANT_PER_TENSOR_OP: Final = qd.quantize_per_tensor.default
+QUANT_PER_TENSOR_OP_T: Final = qd.quantize_per_tensor.tensor
+QUANT_PER_CHANNEL_OP: Final = qd.quantize_per_channel.default
+
+DEQUANT_PER_TENSOR_OP: Final = qd.dequantize_per_tensor.default
+DEQUANT_PER_TENSOR_OP_T: Final = qd.dequantize_per_tensor.tensor
+DEQUANT_PER_CHANNEL_OP: Final = qd.dequantize_per_channel.default
+
+Q_OPS: Final = (QUANT_PER_TENSOR_OP, QUANT_PER_TENSOR_OP_T, QUANT_PER_CHANNEL_OP)
+DQ_OPS: Final = (DEQUANT_PER_TENSOR_OP, DEQUANT_PER_TENSOR_OP_T, DEQUANT_PER_CHANNEL_OP)
+
+PER_TENSOR_QDQ_OPS: Final = (
+ QUANT_PER_TENSOR_OP,
+ QUANT_PER_TENSOR_OP_T,
+ DEQUANT_PER_TENSOR_OP,
+ DEQUANT_PER_TENSOR_OP_T,
+)
+PER_CHANNEL_QDQ_OPS: Final = (QUANT_PER_CHANNEL_OP, DEQUANT_PER_CHANNEL_OP)
diff --git a/backends/arm/ethosu/__init__.py b/backends/arm/ethosu/__init__.py
new file mode 100644
index 00000000000..f6cc1329dfe
--- /dev/null
+++ b/backends/arm/ethosu/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# pyre-unsafe
+
+from .backend import EthosUBackend # noqa: F401
+from .partitioner import EthosUPartitioner # noqa: F401
+
+__all__ = [
+ "EthosUBackend",
+ "EthosUPartitioner",
+]
diff --git a/backends/arm/ethosu_backend.py b/backends/arm/ethosu/backend.py
similarity index 100%
rename from backends/arm/ethosu_backend.py
rename to backends/arm/ethosu/backend.py
diff --git a/backends/arm/ethosu_partitioner.py b/backends/arm/ethosu/partitioner.py
similarity index 94%
rename from backends/arm/ethosu_partitioner.py
rename to backends/arm/ethosu/partitioner.py
index 27102592e15..efbd6705615 100644
--- a/backends/arm/ethosu_partitioner.py
+++ b/backends/arm/ethosu/partitioner.py
@@ -10,7 +10,7 @@
from executorch.backends.arm.arm_backend import (
is_ethosu,
) # usort: skip
-from executorch.backends.arm.ethosu_backend import EthosUBackend
+from executorch.backends.arm.ethosu import EthosUBackend
from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
from executorch.exir.backend.compile_spec_schema import CompileSpec
from executorch.exir.backend.partitioner import DelegationSpec
diff --git a/backends/arm/operator_support/TARGETS b/backends/arm/operator_support/TARGETS
index e14552fd016..2f65c080181 100644
--- a/backends/arm/operator_support/TARGETS
+++ b/backends/arm/operator_support/TARGETS
@@ -4,6 +4,7 @@ python_library(
name = "operator_support",
srcs = glob(["*.py"]),
deps = [
+ "//executorch/backends/arm:constants",
"//executorch/backends/arm/_passes:passes",
"//executorch/backends/arm:tosa_specification",
"//executorch/backends/transforms:remove_getitem_op",
diff --git a/backends/arm/operator_support/convolution_support.py b/backends/arm/operator_support/convolution_support.py
index 3e3149f3443..692d744025f 100644
--- a/backends/arm/operator_support/convolution_support.py
+++ b/backends/arm/operator_support/convolution_support.py
@@ -21,8 +21,6 @@ class ConvolutionSupported(SupportedTOSAOperatorCheck):
targets = [exir_ops.edge.aten.convolution.default]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/embedding_support.py b/backends/arm/operator_support/embedding_support.py
index 02460965a34..58a3a3e3edb 100644
--- a/backends/arm/operator_support/embedding_support.py
+++ b/backends/arm/operator_support/embedding_support.py
@@ -20,8 +20,6 @@ class EmbeddingSupported(SupportedTOSAOperatorCheck):
targets = [exir_ops.edge.aten.embedding.default]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/ethos_u55_support.py b/backends/arm/operator_support/ethos_u55_support.py
index a1b5de85d08..2ef0831af16 100644
--- a/backends/arm/operator_support/ethos_u55_support.py
+++ b/backends/arm/operator_support/ethos_u55_support.py
@@ -149,6 +149,8 @@ class EthosU55NotSupported(OperatorSupportBase):
exir_ops.edge.aten.ne.Scalar,
exir_ops.edge.aten.flip.default, # REVERSE
exir_ops.edge.aten.grid_sampler_2d, # GATHER
+ exir_ops.edge.aten.index.Tensor, # GATHER
+ exir_ops.edge.aten.index_select.default, # GATHER
exir_ops.edge.aten.scatter.src,
exir_ops.edge.aten.scatter.value,
exir_ops.edge.aten.select_scatter.default,
diff --git a/backends/arm/operator_support/index_select_support.py b/backends/arm/operator_support/index_select_support.py
index 81d0785b86a..9a48012f603 100644
--- a/backends/arm/operator_support/index_select_support.py
+++ b/backends/arm/operator_support/index_select_support.py
@@ -18,8 +18,6 @@ class IndexSelectSupported(SupportedTOSAOperatorCheck):
targets = [exir_ops.edge.aten.index_select.default]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/index_tensor_support.py b/backends/arm/operator_support/index_tensor_support.py
index 7330f98667d..65ea5755d7e 100644
--- a/backends/arm/operator_support/index_tensor_support.py
+++ b/backends/arm/operator_support/index_tensor_support.py
@@ -100,8 +100,6 @@ class IndexTensorSupported(SupportedTOSAOperatorCheck):
targets = [exir_ops.edge.aten.index.Tensor]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/minmax_support.py b/backends/arm/operator_support/minmax_support.py
index 86b949082eb..1c4b0dd6c78 100644
--- a/backends/arm/operator_support/minmax_support.py
+++ b/backends/arm/operator_support/minmax_support.py
@@ -21,7 +21,6 @@ class MinMaxSupported(SupportedTOSAOperatorCheck):
# TODO : "MLETORCH-718 : Quantization of indices in arm_quantizer"
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/pool_2d_support.py b/backends/arm/operator_support/pool_2d_support.py
index 677436ddc50..4ce0f7d75e7 100644
--- a/backends/arm/operator_support/pool_2d_support.py
+++ b/backends/arm/operator_support/pool_2d_support.py
@@ -43,8 +43,6 @@ class AvgPool2dSupported(SupportedTOSAOperatorCheck):
]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
@@ -122,8 +120,6 @@ class MaxPool2dSupported(SupportedTOSAOperatorCheck):
]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/reduce_sum_support.py b/backends/arm/operator_support/reduce_sum_support.py
index 4d0614d4b1a..0c614eb2bd5 100644
--- a/backends/arm/operator_support/reduce_sum_support.py
+++ b/backends/arm/operator_support/reduce_sum_support.py
@@ -19,8 +19,6 @@ class SumSupported(SupportedTOSAOperatorCheck):
targets = [exir_ops.edge.aten.sum.dim_IntList]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/right_shift_support.py b/backends/arm/operator_support/right_shift_support.py
index d18950a58a2..454a3b525e3 100644
--- a/backends/arm/operator_support/right_shift_support.py
+++ b/backends/arm/operator_support/right_shift_support.py
@@ -27,8 +27,6 @@ class RightShiftSupported(SupportedTOSAOperatorCheck):
]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/sin_cos_support.py b/backends/arm/operator_support/sin_cos_support.py
index 9dd63e8258d..03ce1da684b 100644
--- a/backends/arm/operator_support/sin_cos_support.py
+++ b/backends/arm/operator_support/sin_cos_support.py
@@ -23,7 +23,6 @@ class SinCosSupported(SupportedTOSAOperatorCheck):
]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/slice_copy_support.py b/backends/arm/operator_support/slice_copy_support.py
index 3c0c69969c5..ad9b5b250dd 100644
--- a/backends/arm/operator_support/slice_copy_support.py
+++ b/backends/arm/operator_support/slice_copy_support.py
@@ -22,8 +22,6 @@ class SliceCopySupported(SupportedTOSAOperatorCheck):
targets = [exir_ops.edge.aten.slice_copy.Tensor]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/to_copy_support.py b/backends/arm/operator_support/to_copy_support.py
index 7f27d0b5b36..a10f3acb766 100644
--- a/backends/arm/operator_support/to_copy_support.py
+++ b/backends/arm/operator_support/to_copy_support.py
@@ -29,8 +29,6 @@ class ToCopySupported(SupportedTOSAOperatorCheck):
]
tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py
index 29ef36aa658..5a3d2621565 100644
--- a/backends/arm/operator_support/tosa_supported_operators.py
+++ b/backends/arm/operator_support/tosa_supported_operators.py
@@ -19,13 +19,13 @@
FuseQuantizedActivationPass,
)
from executorch.backends.arm._passes.insert_table_ops import TableOps
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
from executorch.backends.arm.operator_support.ethos_u55_support import (
EthosU55DtypeSupport,
EthosU55NotSupported,
EthosU55TransposeCheck,
EthosU55ViewCheck,
)
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
from executorch.backends.arm.tosa_specification import TosaSpecification
from executorch.exir import ExportedProgram
from executorch.exir.backend.utils import WhyNoPartitionReporter
@@ -69,8 +69,6 @@ def is_node_tosa_supported(
# container for all SupportedTosaOperatorCheck classes
_tosa_spec_support: dict[TosaSpecification, list[Type[SupportedTOSAOperatorCheck]]] = {
- TosaSpecification.create_from_string("TOSA-0.80+BI"): [],
- TosaSpecification.create_from_string("TOSA-0.80+MI"): [],
TosaSpecification.create_from_string("TOSA-1.0+INT"): [],
TosaSpecification.create_from_string("TOSA-1.0+FP"): [],
}
@@ -171,6 +169,7 @@ def is_node_supported(
exir_ops.edge.aten.cat.default,
exir_ops.edge.aten.ceil.default,
exir_ops.edge.aten.clamp.default,
+ exir_ops.edge.aten.cumsum.default,
exir_ops.edge.aten.bmm.default,
exir_ops.edge.aten.permute_copy.default,
exir_ops.edge.aten.hardsigmoid.default,
@@ -181,6 +180,7 @@ def is_node_supported(
exir_ops.edge.aten.eq.Scalar,
exir_ops.edge.aten.erf.default,
exir_ops.edge.aten.exp.default,
+ exir_ops.edge.aten.expm1.default,
exir_ops.edge.aten.log.default,
exir_ops.edge.aten.linear.default,
exir_ops.edge.aten.split_with_sizes_copy.default,
@@ -258,6 +258,13 @@ def is_node_supported(
exir_ops.edge.aten.atanh.default,
exir_ops.edge.aten.addmm.default,
exir_ops.edge.aten.masked_fill.Scalar,
+ exir_ops.edge.aten.elu.default,
+ exir_ops.edge.aten.asinh.default,
+ exir_ops.edge.aten.cosh.default,
+ exir_ops.edge.aten.glu.default,
+ exir_ops.edge.aten.logit.default,
+ exir_ops.edge.aten.acos.default,
+ exir_ops.edge.aten.elu.default,
]
return supported
@@ -299,6 +306,8 @@ def is_node_supported(
exir_ops.edge.aten.leaky_relu.default: None,
exir_ops.edge.aten.round.default: None,
exir_ops.edge.aten.addmm.default: None,
+ exir_ops.edge.aten.glu.default: None,
+ exir_ops.edge.aten.logit.default: None,
}
if node.target in needs_decomp_dict:
@@ -369,7 +378,7 @@ def _is_matmul_node_supported(
matched_partition = partition
if matched_partition is not None:
input_quantized = all(
- input_node.target in dq_ops
+ input_node.target in DQ_OPS
for input_node in matched_partition.input_nodes
)
if not input_quantized:
@@ -378,7 +387,7 @@ def _is_matmul_node_supported(
)
return False
output_quantized = all(
- output_node_user.target in q_ops
+ output_node_user.target in Q_OPS
for output_node_user in matched_partition.output_nodes[0].users
)
if not output_quantized:
@@ -414,7 +423,7 @@ def is_node_supported(
users = node.users
output_quantized = all(
user.target == operator.getitem
- and all(user_user.target in q_ops for user_user in user.users)
+ and all(user_user.target in Q_OPS for user_user in user.users)
for user in users
)
elif FuseQuantizedActivationPass._is_fuseable_input(node):
@@ -428,7 +437,7 @@ def is_node_supported(
input_quantized = FuseQuantizedActivationPass._is_fuseable_input(input_node)
input_quantized = input_quantized or all(
- (input_node.target in dq_ops)
+ (input_node.target in DQ_OPS)
or (not get_first_fake_tensor(input_node).dtype.is_floating_point)
for input_node in node.all_input_nodes
)
@@ -437,7 +446,7 @@ def is_node_supported(
self.reporter.report_reject(node, "One or more inputs were not quantized.")
return False
- all_q_users = all((output_node.target in q_ops) for output_node in node.users)
+ all_q_users = all((output_node.target in Q_OPS) for output_node in node.users)
is_floating_point = get_first_fake_tensor(node).dtype.is_floating_point
output_quantized = output_quantized or all_q_users or not is_floating_point
diff --git a/backends/arm/operators/node_visitor.py b/backends/arm/operators/node_visitor.py
index 5056c5f7f54..afc80bbb849 100644
--- a/backends/arm/operators/node_visitor.py
+++ b/backends/arm/operators/node_visitor.py
@@ -24,18 +24,11 @@ class NodeVisitor:
# a specific TOSA version.
# When all node_visitors has been refactored to target a specific
# version, this list should be removed.
- tosa_specs_1_00 = [
+ tosa_specs = [
TosaSpecification.create_from_string("TOSA-1.0+INT"),
TosaSpecification.create_from_string("TOSA-1.0+FP"),
]
- tosa_specs_0_80 = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- tosa_specs = tosa_specs_0_80 + tosa_specs_1_00
-
def __init__(self, exported_program: ExportedProgram, tosa_spec: TosaSpecification):
self._exported_program = exported_program
self.tosa_spec = tosa_spec
@@ -52,8 +45,6 @@ def define_node(
# container for all node visitors
_node_visitor_dicts: Dict[TosaSpecification, Dict] = {
- TosaSpecification.create_from_string("TOSA-0.80+BI"): {},
- TosaSpecification.create_from_string("TOSA-0.80+MI"): {},
TosaSpecification.create_from_string("TOSA-1.0+INT"): {},
TosaSpecification.create_from_string("TOSA-1.0+FP"): {},
}
diff --git a/backends/arm/operators/op_abs.py b/backends/arm/operators/op_abs.py
index 65933c8012a..3000af50ed7 100644
--- a/backends/arm/operators/op_abs.py
+++ b/backends/arm/operators/op_abs.py
@@ -23,111 +23,6 @@
from torch.fx import Node
-@register_node_visitor
-class AbsVisitor_080_BI(NodeVisitor):
- target = "aten.abs.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- # Handle int8 (quantized) and int32
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.INT32],
- output.tosa_spec,
- )
-
- if inputs[0].dtype == ts.DType.INT8:
- rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- ) # type: ignore[possibly-undefined]
- else:
- # input[0].dtype == ts.DType.INT32
- # Non quantized input, natively support by TOSA.abs
- rescaled_inputs = inputs
-
- if output.dtype == ts.DType.INT8:
- broadcasted_shape = tutils.tosa_shape(output.shape, output.dim_order)
- abs_output = tosa_graph.addIntermediate(broadcasted_shape, ts.DType.INT32)
- else:
- # output.dtype == ts.DType.INT32
- abs_output = output
-
- # Do the INT32 Abs
- tosa_graph.addOperator(
- ts.TosaOp.Op().ABS,
- [
- rescaled_inputs[0].name,
- ],
- [abs_output.name],
- None,
- )
-
- if output.dtype == ts.DType.INT8:
- # Scale output back to 8 bit
- # pyre-ignore
- tqutils.insert_rescale_op_to_int8(tosa_graph, abs_output, scale_back, node) # type: ignore[possibly-undefined]
-
-
-@register_node_visitor
-class AbsVisitor_080_MI(AbsVisitor_080_BI):
- # inheriting 'target' from BI class
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
-
- if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
- # Call the inherited define_node for handling integers
- super().define_node(node, tosa_graph, inputs, output)
- else:
- # FP32 Abs lowering
- validate_valid_dtype(
- self.target, [*inputs, output], ts.DType.FP32, output.tosa_spec
- )
-
- # MI lowering
- tosa_graph.addOperator(
- ts.TosaOp.Op().ABS,
- [inputs[0].name],
- [output.name],
- None,
- )
-
-
@register_node_visitor
class AbsVisitor_INT(NodeVisitor):
target = "aten.abs.default"
diff --git a/backends/arm/operators/op_add.py b/backends/arm/operators/op_add.py
index 7851fecf53d..7a022b54395 100644
--- a/backends/arm/operators/op_add.py
+++ b/backends/arm/operators/op_add.py
@@ -24,122 +24,6 @@
from torch.fx import Node
-@register_node_visitor
-class AddVisitor_080_BI(NodeVisitor):
- target = "aten.add.Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.INT32],
- output.tosa_spec,
- )
-
- dim_order = (
- inputs[0].dim_order
- if len(inputs[0].shape) > len(inputs[1].shape)
- else inputs[1].dim_order
- )
- scale_back = 1.0
- if inputs[0].dtype == ts.DType.INT8:
- rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
- else:
- # input[0].dtype == ts.DType.INT32
- # Non quantized input, natively support by TOSA.ADD
- rescaled_inputs = inputs
-
- if output.dtype == ts.DType.INT8:
- broadcasted_shape = tutils.tosa_shape(output.shape, output.dim_order)
- add_output = tosa_graph.addIntermediate(broadcasted_shape, ts.DType.INT32)
- else:
- # output.dtype == ts.DType.INT32
- add_output = output
-
- input1, input2 = tutils.reshape_for_broadcast(
- tosa_graph, rescaled_inputs, dim_order
- )
-
- # Do the INT32 Add
- tosa_graph.addOperator(
- ts.TosaOp.Op().ADD,
- [input1.name, input2.name],
- [add_output.name],
- None,
- )
-
- if output.dtype == ts.DType.INT8:
- # Scale output back to 8 bit
- # pyre-ignore
- tqutils.insert_rescale_op_to_int8(
- tosa_graph, add_output, scale_back, node
- ) # type: ignore[possibly-undefined]
-
-
-@register_node_visitor
-class AddVisitor_080_MI(AddVisitor_080_BI):
- # inheriting 'target' from BI class
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
-
- if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
- # Call the inherited define_node for handling integers
- super().define_node(node, tosa_graph, inputs, output)
- else:
- # FP32 Add lowering
- validate_valid_dtype(
- self.target, [*inputs, output], ts.DType.FP32, output.tosa_spec
- )
-
- input1, input2 = inputs
-
- # MI lowering
- tosa_graph.addOperator(
- ts.TosaOp.Op().ADD,
- [input1.name, input2.name],
- [output.name],
- None,
- )
-
-
@register_node_visitor
class AddVisitor_INT(NodeVisitor):
target = "aten.add.Tensor"
diff --git a/backends/arm/operators/op_amax.py b/backends/arm/operators/op_amax.py
index 3c4c0b1e5cc..526d6ff35ec 100644
--- a/backends/arm/operators/op_amax.py
+++ b/backends/arm/operators/op_amax.py
@@ -18,60 +18,11 @@
from torch.fx import Node
-@register_node_visitor
-class MaxVisitor_0_80(NodeVisitor):
- target = "aten.amax.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts
-
- validate_num_inputs(self.target, inputs, 3)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT16, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- input = inputs[0]
- dim = inputs[1].number
-
- if dim < 0:
- tensor = get_first_fake_tensor(node)
- rank = len(tensor.size())
- dim = rank + dim
-
- keep_dims = inputs[2].number
- if not keep_dims:
- raise RuntimeError(
- "TOSA only supports keepdims == True; Did you run the convert_minmax pass?"
- )
-
- attr = ts.TosaSerializerAttribute()
- attr.AxisAttribute(input.dim_order.index(dim))
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().REDUCE_MAX, [input.name], [output.name], attr
- )
-
-
@register_node_visitor
class MaxVisitor(NodeVisitor):
target = "aten.amax.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_amin.py b/backends/arm/operators/op_amin.py
index f19520f04e8..85b0b757c85 100644
--- a/backends/arm/operators/op_amin.py
+++ b/backends/arm/operators/op_amin.py
@@ -18,60 +18,11 @@
from torch.fx import Node
-@register_node_visitor
-class MinVisitor_0_80(NodeVisitor):
- target = "aten.amin.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts
-
- validate_num_inputs(self.target, inputs, 3)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT16, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- input = inputs[0]
- dim = inputs[1].number
-
- if dim < 0:
- tensor = get_first_fake_tensor(node)
- rank = len(tensor.size())
- dim = rank + dim
-
- keep_dims = inputs[2].number
- if not keep_dims:
- raise RuntimeError(
- "TOSA only supports keepdims == True; Did you run the convert_minmax pass?"
- )
-
- attr = ts.TosaSerializerAttribute()
- attr.AxisAttribute(input.dim_order.index(dim))
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().REDUCE_MIN, [input.name], [output.name], attr
- )
-
-
@register_node_visitor
class MinVisitor(NodeVisitor):
target = "aten.amin.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_any.py b/backends/arm/operators/op_any.py
index e90b51302d5..0ac307aedd4 100644
--- a/backends/arm/operators/op_any.py
+++ b/backends/arm/operators/op_any.py
@@ -20,48 +20,11 @@
from torch.fx import Node
-@register_node_visitor
-class AnyVisitor_0_80(NodeVisitor):
- target = "aten.any.dim"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 3)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target, [inputs[0], output], ts.DType.BOOL, output.tosa_spec
- )
-
- input_shape = list(inputs[0].shape)
- dim = cast(int, inputs[1].number) % len(
- input_shape
- ) # process the negative index
- keep_dim = cast(bool, inputs[2].number if len(inputs) > 2 else False)
- if not keep_dim:
- raise ValueError("This case should be handled by ConvertAnyDimDimsPass")
-
- attr = ts.TosaSerializerAttribute()
- attr.AxisAttribute(inputs[0].dim_order.index(dim))
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().REDUCE_ANY, [inputs[0].name], [output.name], attr
- )
-
-
@register_node_visitor
class AnyVisitor(NodeVisitor):
target = "aten.any.dim"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def define_node(
self,
diff --git a/backends/arm/operators/op_avg_pool2d.py b/backends/arm/operators/op_avg_pool2d.py
index f839ca380ec..9faf8272473 100644
--- a/backends/arm/operators/op_avg_pool2d.py
+++ b/backends/arm/operators/op_avg_pool2d.py
@@ -26,151 +26,6 @@
from executorch.backends.arm.tosa_specification import TosaSpecification
-@register_node_visitor
-class AvgPool2dVisitor_0_80_BI(NodeVisitor):
- target = "aten.avg_pool2d.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def _build_generic_avgpool2d(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- input_zp: int,
- output_zp: int,
- accumulator_type: Any,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- input_tensor = inputs[0]
- kernel_size_list = inputs[1].special
- stride_size_list = inputs[2].special
-
- if len(inputs) > 4:
- ceil_mode = bool(inputs[4].number)
- else:
- ceil_mode = False
-
- try:
- pad_size_list = inputs[3].special
- pad_size_list = [
- pad_size_list[0],
- pad_size_list[0],
- pad_size_list[1],
- pad_size_list[1],
- ]
- except IndexError:
- pad_size_list = [0, 0, 0, 0]
-
- # Adjust the padding as necessary
- pad_size_list[1] = adjust_pooling_pad_if_needed(
- input_tensor.shape[2],
- kernel_size_list[0],
- stride_size_list[0],
- pad_size_list[1],
- ceil_mode,
- )
- pad_size_list[3] = adjust_pooling_pad_if_needed(
- input_tensor.shape[3],
- kernel_size_list[1],
- stride_size_list[1],
- pad_size_list[3],
- ceil_mode,
- )
-
- attr = ts.TosaSerializerAttribute()
- attr.PoolAttribute(
- kernel=kernel_size_list,
- stride=stride_size_list,
- pad=pad_size_list,
- input_zp=input_zp,
- output_zp=output_zp,
- accum_dtype=accumulator_type,
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().AVG_POOL2D,
- [input_tensor.name],
- [output.name],
- attr,
- )
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, [3, 4, 5, 6, 7])
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target, [inputs[0], output], ts.DType.INT8, output.tosa_spec
- )
-
- accumulator_type = ts.DType.INT32
-
- input_qargs = get_input_qparams(node)
- input_zp = input_qargs[0].get_zp_per_tensor()
-
- output_qargs = get_output_qparams(node)
- output_zp = output_qargs[0].get_zp_per_tensor()
-
- self._build_generic_avgpool2d(
- node, tosa_graph, inputs, output, input_zp, output_zp, accumulator_type
- )
-
-
-@register_node_visitor
-class AvgPool2dVisitor_0_80_MI(AvgPool2dVisitor_0_80_BI):
- # inheriting 'target' from BI class
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, [3, 4, 5, 6, 7])
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.FP32],
- output.tosa_spec,
- )
-
- if inputs[0].dtype == ts.DType.INT8:
- super().define_node(node, tosa_graph, inputs, output)
-
- if inputs[0].dtype == ts.DType.FP32:
- accumulator_type = ts.DType.FP32
- # Initilize zero point to zero.
- input_zp = 0
- output_zp = 0
-
- self._build_generic_avgpool2d(
- node, tosa_graph, inputs, output, input_zp, output_zp, accumulator_type
- )
-
-
@register_node_visitor
class AvgPool2dVisitor(NodeVisitor):
target = "aten.avg_pool2d.default"
diff --git a/backends/arm/operators/op_bmm.py b/backends/arm/operators/op_bmm.py
index 68b5b363703..c9bb0b003ee 100644
--- a/backends/arm/operators/op_bmm.py
+++ b/backends/arm/operators/op_bmm.py
@@ -23,87 +23,11 @@
validate_valid_dtype,
)
from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_quant_utils import build_rescale, build_rescale_v0_80
+from executorch.backends.arm.tosa_quant_utils import build_rescale
from executorch.backends.arm.tosa_specification import TosaSpecification
from tosa.RoundingMode import RoundingMode # type: ignore
-@register_node_visitor
-class BMMVisitor_0_80(NodeVisitor):
- target = "aten.bmm.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.FP32],
- output.tosa_spec,
- )
-
- # aten.bmm maps directly to MATMUL
-
- # For INT8, we need to get the zero points and add an intermediate tensor
- # for a later rescale.
- if inputs[0].dtype == ts.DType.INT8:
- input_qparams = get_input_qparams(node)
- input0_zp = input_qparams[0].get_zp_per_tensor()
- input1_zp = input_qparams[1].get_zp_per_tensor()
- bmm_result = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
- bmm_output_name = bmm_result.name
- else:
- bmm_output_name = output.name
- input0_zp, input1_zp = 0, 0
-
- # Add the MATMUL to the TOSA graph.
- attr = ts.TosaSerializerAttribute()
- attr.MatMulAttribute(A_zp=input0_zp, B_zp=input1_zp)
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().MATMUL,
- [inputs[0].name, inputs[1].name],
- [bmm_output_name],
- attr,
- )
-
- # As INT8 accumulates into INT32, we need to rescale it back to INT8
- if output.dtype == ts.DType.INT8:
- output_qparams = get_output_qparams(node)[0]
- final_output_scale = (
- input_qparams[0].get_scale_per_tensor() * input_qparams[1].get_scale_per_tensor() # type: ignore[possibly-undefined] # pyre-ignore[61]
- ) / output_qparams.get_scale_per_tensor()
-
- build_rescale_v0_80(
- tosa_fb=tosa_graph,
- scale=[final_output_scale],
- # pyre-ignore[61]: Uninitialized local [61]: Local variable `bmm_result` is undefined, or not always defined.
- input_node=bmm_result, # type: ignore[possibly-undefined]
- output_name=output.name,
- output_type=ts.DType.INT8,
- input_zp=[0],
- output_zp=[output_qparams.get_zp_per_tensor()],
- is_double_round=False,
- )
-
-
@register_node_visitor
class BMMVisitor(NodeVisitor):
target = "aten.bmm.default"
diff --git a/backends/arm/operators/op_cat.py b/backends/arm/operators/op_cat.py
index c7bad9e4429..884bfb22a40 100644
--- a/backends/arm/operators/op_cat.py
+++ b/backends/arm/operators/op_cat.py
@@ -18,48 +18,11 @@
from torch.fx import Node
-@register_node_visitor
-class CatVisitor_0_80(NodeVisitor):
- target = "aten.cat.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, [1, 2])
-
- tensors = inputs[0].special
- dim = 0 if len(inputs) < 2 else inputs[1].number
- rank = len(output.shape)
- dim = (dim + rank) % rank
- dim = output.dim_order.index(dim)
-
- attr = ts.TosaSerializerAttribute()
- attr.AxisAttribute(dim)
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().CONCAT,
- [tensor.name for tensor in tensors],
- [output.name],
- attr,
- )
-
-
@register_node_visitor
class CatVisitor(NodeVisitor):
target = "aten.cat.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_clamp.py b/backends/arm/operators/op_clamp.py
index 778f9559be9..2bdeb89a713 100644
--- a/backends/arm/operators/op_clamp.py
+++ b/backends/arm/operators/op_clamp.py
@@ -26,148 +26,6 @@
from torch.fx import Node
-@register_node_visitor
-class ClampVisitor_080_BI(NodeVisitor):
- target = "aten.clamp.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def _create_clamp_node(
- self,
- tosa_graph: Any,
- input_name: str,
- output_name: str,
- min_int: int,
- max_int: int,
- min_fp32: float,
- max_fp32: float,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- attr = ts.TosaSerializerAttribute()
- attr.ClampAttribute(
- tosa_graph.builder,
- min_int,
- max_int,
- min_fp32,
- max_fp32,
- )
- tosa_graph.addOperator(ts.TosaOp.Op().CLAMP, [input_name], [output_name], attr)
-
- def _get_min_max_arguments(
- self, node: Node, dtype_min: int | float, dtype_max: int | float
- ) -> Tuple[int | float, int | float]:
-
- def cast_type(value: Any) -> int | float:
- if isinstance(value, int):
- return value
- else:
- # Attempt to cast to float
- return float(value)
-
- min_arg = dtype_min
- max_arg = dtype_max
-
- if node.args[1] is not None:
- min_arg = cast_type(node.args[1])
-
- if len(node.args) > 2:
- if node.args[2] is not None:
- max_arg = cast_type(node.args[2])
-
- return min_arg, max_arg
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts
-
- validate_num_inputs(self.target, inputs, [2, 3])
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8],
- output.tosa_spec,
- )
-
- min_int8, max_int8 = self._get_min_max_arguments(
- node,
- torch.iinfo(torch.int8).min,
- torch.iinfo(torch.int8).max,
- )
-
- # NOTE: Quantization of the min/max arguments is handled by QuantizeOperatorArguments
- self._create_clamp_node(
- tosa_graph,
- inputs[0].name,
- output.name,
- int(min_int8),
- int(max_int8),
- 0,
- 0,
- )
-
-
-@register_node_visitor
-class ClampVisitor_080_MI(ClampVisitor_080_BI):
- # inheriting 'target' from BI class
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, [2, 3])
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.FP16, ts.DType.FP32],
- output.tosa_spec,
- )
-
- if inputs[0].dtype == ts.DType.INT8:
- # Call the inherited define_node for handling integers
- super().define_node(node, tosa_graph, inputs, output)
- else:
- min_fp32, max_fp32 = self._get_min_max_arguments(
- node,
- torch.finfo(torch.float32).min,
- torch.finfo(torch.float32).max,
- )
-
- self._create_clamp_node(
- tosa_graph,
- inputs[0].name,
- output.name,
- 0,
- 0,
- min_fp32,
- max_fp32,
- )
-
-
@register_node_visitor
class ClampVisitor_INT(NodeVisitor):
target = "aten.clamp.default"
diff --git a/backends/arm/operators/op_constant_pad_nd.py b/backends/arm/operators/op_constant_pad_nd.py
index b8f28acb3c3..147a1544ce9 100644
--- a/backends/arm/operators/op_constant_pad_nd.py
+++ b/backends/arm/operators/op_constant_pad_nd.py
@@ -25,81 +25,6 @@
from executorch.backends.arm.tosa_specification import TosaSpecification
-@register_node_visitor
-class ConstantPadNDVisitor_0_80(NodeVisitor):
-
- target = "aten.constant_pad_nd.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts
-
- validate_num_inputs(self.target, inputs, 3)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [
- ts.DType.INT8,
- ts.DType.INT32,
- ts.DType.FP32,
- ts.DType.BOOL,
- ],
- output.tosa_spec,
- )
-
- if inputs[0].dtype == ts.DType.INT8:
- input_qparams = get_input_qparams(node)
- qargs = input_qparams[0]
- pad_const_qs = qargs.quantize_value(inputs[2].number).item()
- pad_const_fp = 0.0
- else:
- pad_const_fp = inputs[2].number
- pad_const_qs = 0
-
- rank = len(output.shape)
- # Each dim needs 2 padding values. For example, to pad the last dimension, the pad has the form
- # (padding_left, padding_right); to pad the last two dimensions, the pad has the form
- # (padding_left, padding_right, padding_top, padding_bottom), and so on. For PyTorch NCHW format, the padding
- # values are in the reverse order. So, firstly we need to reverse the input padding parameters.
- input_pad = sum(
- [
- [inputs[1].special[i], inputs[1].special[i + 1]]
- for i in range(0, len(inputs[1].special), 2)
- ][::-1],
- [],
- )
- # Then, add dummy zeros to make sure that both input_pad and output_pad has the same size.
- input_pad = [0] * (rank * 2 - len(inputs[1].special)) + input_pad
- # For PyTorch NCHW format, dim order is [0,...,rank-1]
- input_dim_order = list(range(rank))
- output_pad = [0] * rank * 2
-
- # Map input padding parameters into output padding parameters. TOSA is NHWC format.
- for input_dim_idx, input_dim in enumerate(input_dim_order):
- output_dim_idx = output.dim_order.index(input_dim)
- output_pad[output_dim_idx * 2 : (output_dim_idx + 1) * 2] = input_pad[
- input_dim_idx * 2 : (input_dim_idx + 1) * 2
- ]
-
- attr = ts.TosaSerializerAttribute()
- attr.PadAttribute(tosa_graph.builder, output_pad, pad_const_qs, pad_const_fp)
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().PAD, [inputs[0].name], [output.name], attr
- )
-
-
@register_node_visitor
class ConstantPadNDVisitor(NodeVisitor):
diff --git a/backends/arm/operators/op_conv2d.py b/backends/arm/operators/op_conv2d.py
index 3c73e7b32c0..0bbe67c4beb 100644
--- a/backends/arm/operators/op_conv2d.py
+++ b/backends/arm/operators/op_conv2d.py
@@ -21,175 +21,9 @@
validate_num_inputs,
)
from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_quant_utils import build_rescale, build_rescale_v0_80
+from executorch.backends.arm.tosa_quant_utils import build_rescale
from executorch.backends.arm.tosa_specification import TosaSpecification
-from executorch.backends.arm.tosa_utils import build_reshape, tosa_shape
-
-
-@register_node_visitor
-class Conv2dVisitor_0_80(NodeVisitor):
- target = "aten.convolution.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- # torch.nn.Conv2d does not require the result of
- # `(input + 2 * pad - dilation * (weight - 1) - 1) / stride`
- # must be an integer, but tosa currently strictly require this property.
- # This function adjusts the pad value to meet the requirement.
- def adjust_pad_if_needed(
- self, input_size: int, input_weight: int, stride: int, pad: int, dilation: int
- ) -> int:
- mod_remainder = (
- input_size + 2 * pad - dilation * (input_weight - 1) - 1
- ) % stride
-
- # No need to adjust
- if mod_remainder == 0:
- return pad
-
- if mod_remainder > pad:
- raise RuntimeError(
- "This case should be handled by the SizeAdjustConv2d pass, is it enabled?"
- )
- return pad - mod_remainder
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- input, weight, bias, stride, pad, dilation, _, _, group = inputs
- validate_num_inputs(self.target, inputs, 9)
-
- # Get the attributes of convolution.
- attr = ts.TosaSerializerAttribute()
- pad_attr = [val for val in pad.special for _ in (0, 1)]
- stride_attr = stride.special
- dilation_attr = dilation.special
-
- # Adjust the pad value if needed to meet the strict convolution output shape calculation.
- pad_attr[1] = self.adjust_pad_if_needed(
- input.shape[2],
- weight.shape[2],
- stride_attr[0],
- pad_attr[1],
- dilation_attr[0],
- )
- pad_attr[3] = self.adjust_pad_if_needed(
- input.shape[3],
- weight.shape[3],
- stride_attr[1],
- pad_attr[3],
- dilation_attr[1],
- )
-
- input_zp = 0
- if inputs[0].dtype == ts.DType.INT8:
- # int8 input requires quantization information
- input_qparams = get_input_qparams(node)
- input_zp = input_qparams[0].get_zp_per_tensor()
-
- attr.ConvAttribute(
- pad=pad_attr,
- stride=stride_attr,
- dilation=dilation_attr,
- input_zp=input_zp,
- weight_zp=0,
- local_bound=False,
- )
-
- # The output type is int32 when input type is int8.
- conv2d_output_name = output.name
- if output.dtype == ts.DType.INT8:
- conv2d_res = tosa_graph.addIntermediate(
- tosa_shape(output.shape, output.dim_order), ts.DType.INT32
- )
- conv2d_output_name = conv2d_res.name
-
- # Given input.shape is (N, Ci, H, W), and weight.shape is (Co, Ci/G, H, W)
- in_channels = input.shape[1]
- out_channels = weight.shape[0]
- if (in_channels == group.number) and (out_channels % in_channels) == 0:
- """Depthwise convolution case"""
- # Reshape torch shape format of weight tensor to tosa required format.
- # https://www.mlplatform.org/tosa/tosa_spec.html#_depthwise_conv2d
- m_length = int(out_channels / in_channels)
- weight_post_shape = (
- weight.shape[2],
- weight.shape[3],
- in_channels,
- m_length,
- )
-
- weight_reshaped = tosa_graph.addIntermediate(
- weight_post_shape,
- weight.dtype,
- )
- build_reshape(
- tosa_graph, weight.name, weight_post_shape, weight_reshaped.name
- )
- tosa_op = ts.TosaOp.Op().DEPTHWISE_CONV2D
- weight_name = weight_reshaped.name
- else:
- """Regular convolution case"""
- tosa_op = ts.TosaOp.Op().CONV2D
- weight_name = weight.name
-
- tosa_graph.addOperator(
- tosa_op,
- [
- input.name,
- weight_name,
- bias.name,
- ],
- [conv2d_output_name],
- attr,
- )
-
- # For quantized convolution, rescale the output value back to the same
- # integer value domain of the next op. Otherwise return float32 output.
- if inputs[0].dtype == ts.DType.INT8:
- # Get scale_factor from input, weight, and output.
- input_scale = input_qparams[0].get_scale_per_tensor() # type: ignore[possibly-undefined] # pyre-ignore [61]
-
- per_channel_quant = input_qparams[1].per_channel # pyre-ignore [61]
- if per_channel_quant:
- weight_scale = input_qparams[1].get_scale_per_channel()
- else:
- weight_scale = [
- input_qparams[1].get_scale_per_tensor()
- ] # pyre-ignore [61]
- output_qargs = get_output_qparams(node)
- post_conv2d_scale = [
- (inp * w) / out
- for inp, w, out in zip(
- itertools.cycle([input_scale]),
- weight_scale,
- itertools.cycle([output_qargs[0].get_scale_per_tensor()]),
- )
- ]
-
- build_rescale_v0_80(
- tosa_fb=tosa_graph,
- scale=post_conv2d_scale,
- input_node=conv2d_res, # type: ignore[possibly-undefined]
- output_name=output.name,
- output_type=output.dtype,
- input_zp=[0],
- output_zp=[output_qargs[0].get_zp_per_tensor()],
- per_channel=per_channel_quant,
- ) # type: ignore[call-arg]
+from executorch.backends.arm.tosa_utils import tosa_shape
@register_node_visitor
diff --git a/backends/arm/operators/op_eq.py b/backends/arm/operators/op_eq.py
index c4b60d37036..eb5b3000d6c 100644
--- a/backends/arm/operators/op_eq.py
+++ b/backends/arm/operators/op_eq.py
@@ -24,58 +24,6 @@
from torch.fx import Node
-@register_node_visitor
-class EqualVisitor_0_80(NodeVisitor):
- target = "aten.eq.Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, inputs, ts)
- validate_valid_dtype(
- self.target,
- inputs,
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
- validate_valid_dtype(self.target, output, ts.DType.BOOL, output.tosa_spec)
-
- input_nodes = inputs
- # Handle quantization
- if inputs[0].dtype == ts.DType.INT8:
- # Rescale inputs to 32 bit
- rescaled_inputs, _ = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
-
- # Update IO
- input_nodes = rescaled_inputs
-
- # Do the equal comparison
- tosa_graph.addOperator(
- ts.TosaOp.Op().EQUAL,
- [input_nodes[0].name, input_nodes[1].name],
- output.name,
- None,
- )
-
-
@register_node_visitor
class EqualVisitor(NodeVisitor):
target = "aten.eq.Tensor"
diff --git a/backends/arm/operators/op_erf.py b/backends/arm/operators/op_erf.py
index f828cae9c8d..e238c4fd80a 100644
--- a/backends/arm/operators/op_erf.py
+++ b/backends/arm/operators/op_erf.py
@@ -19,38 +19,6 @@
from executorch.backends.arm.tosa_specification import TosaSpecification
-@register_node_visitor
-class ERFVisitor_080_MI(NodeVisitor):
- target = "aten.erf.default"
-
- # BI case handled by op_table
- tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- ts.DType.FP32,
- output.tosa_spec,
- )
-
- # MI lowering
- tosa_graph.addOperator(ts.TosaOp.Op().ERF, [inputs[0].name], [output.name])
-
-
@register_node_visitor
class ERFVisitor(NodeVisitor):
target = "aten.erf.default"
diff --git a/backends/arm/operators/op_exp.py b/backends/arm/operators/op_exp.py
index 2dcf2c2f250..96c077c838b 100644
--- a/backends/arm/operators/op_exp.py
+++ b/backends/arm/operators/op_exp.py
@@ -20,37 +20,6 @@
from torch.fx import Node
-@register_node_visitor
-class ExpVisitor_0_80_MI(NodeVisitor):
- target = "aten.exp.default"
-
- # BI case should be handled by op_table
- tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- ts.DType.FP32,
- output.tosa_spec,
- )
-
- tosa_graph.addOperator(ts.TosaOp.Op().EXP, [inputs[0].name], [output.name])
-
-
@register_node_visitor
class ExpVisitor(NodeVisitor):
target = "aten.exp.default"
diff --git a/backends/arm/operators/op_ge.py b/backends/arm/operators/op_ge.py
index 02815dde489..723706702f0 100644
--- a/backends/arm/operators/op_ge.py
+++ b/backends/arm/operators/op_ge.py
@@ -24,57 +24,6 @@
from torch.fx import Node
-@register_node_visitor
-class GreaterEqualVisitor_0_80(NodeVisitor):
- target = "aten.ge.Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, inputs, ts)
- validate_valid_dtype(
- self.target,
- inputs,
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
- validate_valid_dtype(self.target, output, ts.DType.BOOL, output.tosa_spec)
-
- input_nodes = inputs
- # Handle quantization
- if inputs[0].dtype == ts.DType.INT8:
- # Rescale inputs to 32 bit
- rescaled_inputs, _ = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
-
- # Update IO
- input_nodes = rescaled_inputs
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().GREATER_EQUAL,
- [input_nodes[0].name, input_nodes[1].name],
- [output.name],
- None,
- )
-
-
@register_node_visitor
class GreaterEqualVisitor(NodeVisitor):
target = "aten.ge.Tensor"
diff --git a/backends/arm/operators/op_gt.py b/backends/arm/operators/op_gt.py
index fb2d3fa100c..e79ed009e24 100644
--- a/backends/arm/operators/op_gt.py
+++ b/backends/arm/operators/op_gt.py
@@ -24,57 +24,6 @@
from torch.fx import Node
-@register_node_visitor
-class GreaterThanVisitor_0_80(NodeVisitor):
- target = "aten.gt.Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, inputs, ts)
- validate_valid_dtype(
- self.target,
- inputs,
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
- validate_valid_dtype(self.target, output, ts.DType.BOOL, output.tosa_spec)
-
- input_nodes = inputs
- # Handle quantization
- if inputs[0].dtype == ts.DType.INT8:
- # Rescale inputs to 32 bit
- rescaled_inputs, _ = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
-
- # Update IO
- input_nodes = rescaled_inputs
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().GREATER,
- [input_nodes[0].name, input_nodes[1].name],
- [output.name],
- None,
- )
-
-
@register_node_visitor
class GreaterThanVisitor(NodeVisitor):
target = "aten.gt.Tensor"
diff --git a/backends/arm/operators/op_index_select.py b/backends/arm/operators/op_index_select.py
index 7f8f582d0f9..a42f85abc4c 100644
--- a/backends/arm/operators/op_index_select.py
+++ b/backends/arm/operators/op_index_select.py
@@ -15,7 +15,7 @@
)
from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_utils import build_reshape, build_reshape_tosa_1_0
+from executorch.backends.arm.tosa_utils import build_reshape_tosa_1_0
from torch.fx import Node
@@ -34,7 +34,7 @@ class IndexSelectVisitor(NodeVisitor):
"""
target = "aten.index_select.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
@@ -98,88 +98,3 @@ def define_node(
build_reshape_tosa_1_0(
tosa_graph, output_name, output_real_shape, output.name
)
-
-
-@register_node_visitor
-class IndexSelectVisitor_0_80(NodeVisitor):
- """
- Simple example:
- o = index_select(weights, index, indices)
- Becomes:
- i = view_copy(i) # reshape flattened indicies, i.e. [I] => [1, I]
- o = index_select(w, index, i)
-
- Additional steps in case weights (w) are rank 2:
- - before: insert view_copy to make rank 3, [x,y] => [1, x, y]
- - after: insert view_copy to squeeze back output dims, [1, x, y] = [x,y]
- """
-
- target = "aten.index_select.default"
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts_v0_80 # type: ignore
-
- # Specification (0.80) states that input and output types
- # should all be the same
- if inputs[0].dtype != output.dtype:
- raise ValueError(
- f"Input and output type not same: {inputs[0].dtype} != {output.dtype:}"
- )
-
- if len(inputs) != 3:
- raise ValueError(f"Number of inputs are not 3: {len(inputs)}")
-
- weights, index, indices = inputs
-
- if len(weights.shape) == 2:
- weights_new_shape = [1, weights.shape[0], weights.shape[1]]
- weights_reshaped = tosa_graph.addIntermediate(
- weights_new_shape,
- weights.dtype,
- )
- build_reshape(
- tosa_graph, weights.name, weights_new_shape, weights_reshaped.name
- )
-
- output_new_shape = [1, output.shape[0], output.shape[1]]
- output_reshaped = tosa_graph.addIntermediate(
- output_new_shape,
- output.dtype,
- )
-
- else:
- weights_reshaped = weights
- output_reshaped = output
-
- output_name = output_reshaped.name
-
- # Reshape flattened indicies, i.e. [I] => [1, I]
- indices_new_shape = [1, indices.shape[0]]
- indices_reshaped = tosa_graph.addIntermediate(
- indices_new_shape,
- indices.dtype,
- )
- build_reshape(
- tosa_graph, indices.name, indices_new_shape, indices_reshaped.name
- )
-
- tosa_graph.addOperator(
- ts_v0_80.TosaOp.Op().GATHER,
- [weights_reshaped.name, indices_reshaped.name],
- [output_name],
- None,
- )
-
- if len(weights.shape) == 2:
- output_real_shape = [output.shape[0], output.shape[1]]
- build_reshape(tosa_graph, output_name, output_real_shape, output.name)
diff --git a/backends/arm/operators/op_index_tensor.py b/backends/arm/operators/op_index_tensor.py
index 36d0b37e090..7afd7fe6612 100644
--- a/backends/arm/operators/op_index_tensor.py
+++ b/backends/arm/operators/op_index_tensor.py
@@ -24,6 +24,7 @@
from torch.fx import Node
+@register_node_visitor
class CommonIndexTensorVisitor(NodeVisitor):
target = "aten.index.Tensor"
@@ -92,136 +93,6 @@ def _calculate_value_strides(self, values_shape: List[int]) -> List[int]:
return values_strides
-@register_node_visitor
-class IndexTensorVisitor_080(CommonIndexTensorVisitor):
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- """
- This approach uses the fact that all indexing tensors are incremented
- simultaneously and they essentially act as a map along the corresponding
- dimensions of the values tensor.
- Note: that this does not hold true when slicing or ellipsis ops
- are involved as such they are not currently not supported.
-
- As such this approach flattens out the values tensor and
- constructs a flattened out index obtained by flattening out the
- index tensors, multiplying them by the relevant stride and accumulating them.
-
- This approach suffers from the fact that we are taking a number of index tensors of
- type int32 and applying multiplications and additions.
-
- If the number of total elements in the values tensor exceeds int32 limits
- then this approach falls apart.
- """
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_same_dtype(self.target, [inputs[0], output])
-
- values, indices = inputs
- index_nodes = indices.special
-
- # Broadcast indices
- broadcasted_tensors = tutils.broadcast_tensors(
- tosa_graph, index_nodes, self.tosa_spec
- )
-
- values_strides = self._calculate_value_strides(values.shape)
-
- # The indices have already been broadcast to a common shape
- # in so they are all the same.
- _, index_dtype, index_shape = self._get_tensor_info(broadcasted_tensors[0])
-
- N, K, W, C = self._calculate_tosa_vals(index_shape, index_nodes, values.shape)
-
- gather_idx_shape = [N, W]
-
- gather_index_name = ""
- # Flatten out and shift indexes.
- for i, index_node in enumerate(broadcasted_tensors):
- index_name, _, _ = self._get_tensor_info(index_node)
- index_name = index_node.name
-
- stride_shifted_indices = tosa_graph.addIntermediate(
- index_shape,
- index_dtype,
- )
-
- # Division by C is necessary when len(indices) < values.rank
- # When there are dimensions left unindexed that changes the
- # channels and thus the stride-shift.
- data = np.full(index_shape, int(values_strides[i] / C))
- mul_const = tosa_graph.addConst(index_shape, index_dtype, data)
- attr = ts.TosaSerializerAttribute()
- attr.MulAttribute(shift=0)
- tosa_graph.addOperator(
- ts.TosaOp.Op().MUL,
- [index_name, mul_const.name],
- [stride_shifted_indices.name],
- attr,
- )
-
- reshaped_idxs = tosa_graph.addIntermediate(
- gather_idx_shape,
- index_dtype,
- )
- tutils.build_reshape(
- tosa_graph,
- stride_shifted_indices.name,
- gather_idx_shape,
- reshaped_idxs.name,
- )
-
- # Guarantees that the accumulation tensor is properly
- # initialized and does not contain junk data.
- if i == 0:
- gather_index_name = reshaped_idxs.name
- else:
- add_idxs = tosa_graph.addIntermediate(
- reshaped_idxs.shape,
- reshaped_idxs.dtype,
- )
- tosa_graph.addOperator(
- ts.TosaOp.Op().ADD,
- [gather_index_name, reshaped_idxs.name],
- [add_idxs.name],
- )
- gather_index_name = add_idxs.name
-
- gather_vals_shape = [N, K, C]
- reshaped_input = tosa_graph.addIntermediate(gather_vals_shape, values.dtype)
- tutils.build_reshape(
- tosa_graph, values.name, gather_vals_shape, reshaped_input.name
- )
-
- gather_out_shape = (N, W, C)
- gather_out = tosa_graph.addIntermediate(
- gather_out_shape,
- output.dtype,
- )
- tosa_graph.addOperator(
- ts.TosaOp.Op().GATHER,
- [reshaped_input.name, gather_index_name],
- [gather_out.name],
- None,
- )
-
- output_shape = tutils.tosa_shape(output.shape, output.dim_order)
- tutils.build_reshape(tosa_graph, gather_out.name, output_shape, output.name)
-
-
@register_node_visitor
class IndexTensorVisitor(CommonIndexTensorVisitor):
tosa_specs = [
diff --git a/backends/arm/operators/op_le.py b/backends/arm/operators/op_le.py
index af615f8aacd..9301f91cb4c 100644
--- a/backends/arm/operators/op_le.py
+++ b/backends/arm/operators/op_le.py
@@ -24,57 +24,6 @@
from torch.fx import Node
-@register_node_visitor
-class LessEqualVisitor_0_80(NodeVisitor):
- target = "aten.le.Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, inputs, ts)
- validate_valid_dtype(
- self.target,
- inputs,
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
- validate_valid_dtype(self.target, output, ts.DType.BOOL, output.tosa_spec)
-
- input_nodes = inputs
- # Handle quantization
- if inputs[0].dtype == ts.DType.INT8:
- # Rescale inputs to 32 bit
- rescaled_inputs, _ = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
-
- # Update IO
- input_nodes = rescaled_inputs
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().GREATER_EQUAL,
- [input_nodes[1].name, input_nodes[0].name],
- [output.name],
- None,
- )
-
-
@register_node_visitor
class LessEqualVisitor(NodeVisitor):
target = "aten.le.Tensor"
diff --git a/backends/arm/operators/op_log.py b/backends/arm/operators/op_log.py
index 72faa99d0a4..8a48fe4fda5 100644
--- a/backends/arm/operators/op_log.py
+++ b/backends/arm/operators/op_log.py
@@ -20,34 +20,6 @@
from torch.fx import Node
-@register_node_visitor
-class LogVisitor_0_80_MI(NodeVisitor):
- target = "aten.log.default"
-
- # BI case should be handled by op_table
- tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target, [*inputs, output], ts.DType.FP32, output.tosa_spec
- )
-
- tosa_graph.addOperator(ts.TosaOp.Op().LOG, [inputs[0].name], [output.name])
-
-
@register_node_visitor
class LogVisitor(NodeVisitor):
target = "aten.log.default"
diff --git a/backends/arm/operators/op_lt.py b/backends/arm/operators/op_lt.py
index 7b483e075ec..31083e93590 100644
--- a/backends/arm/operators/op_lt.py
+++ b/backends/arm/operators/op_lt.py
@@ -24,57 +24,6 @@
from torch.fx import Node
-@register_node_visitor
-class LessThanVisitor_0_80(NodeVisitor):
- target = "aten.lt.Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, inputs, ts)
- validate_valid_dtype(
- self.target,
- inputs,
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
- validate_valid_dtype(self.target, output, ts.DType.BOOL, output.tosa_spec)
-
- input_nodes = inputs
- # Handle quantization
- if inputs[0].dtype == ts.DType.INT8:
- # Rescale inputs to 32 bit
- rescaled_inputs, _ = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
-
- # Update IO
- input_nodes = rescaled_inputs
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().GREATER,
- [input_nodes[1].name, input_nodes[0].name],
- [output.name],
- None,
- )
-
-
@register_node_visitor
class LessThanVisitor(NodeVisitor):
target = "aten.lt.Tensor"
diff --git a/backends/arm/operators/op_max_pool2d.py b/backends/arm/operators/op_max_pool2d.py
index b3c779477ca..754fcfcd638 100644
--- a/backends/arm/operators/op_max_pool2d.py
+++ b/backends/arm/operators/op_max_pool2d.py
@@ -8,10 +8,6 @@
import torch
-from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
- get_input_qparams,
- get_output_qparams,
-)
from executorch.backends.arm.operators.node_visitor import (
NodeVisitor,
register_node_visitor,
@@ -26,102 +22,6 @@
from executorch.backends.arm.tosa_specification import TosaSpecification
-@register_node_visitor
-class MaxPool2dVisitor_0_80(NodeVisitor):
- target = "aten.max_pool2d.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, [3, 4, 5, 6])
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.FP32],
- output.tosa_spec,
- )
-
- input_tensor = inputs[0]
- kernel_size = inputs[1].special
- stride = inputs[2].special
-
- if len(inputs) == 6:
- ceil_mode = bool(inputs[5].number)
- else:
- ceil_mode = False
- try:
- pad_size_list = inputs[3].special
- pad_size_list = [
- pad_size_list[0],
- pad_size_list[0],
- pad_size_list[1],
- pad_size_list[1],
- ]
- except (IndexError, AttributeError):
- pad_size_list = [0, 0, 0, 0]
-
- # Adjust the padding as necessary
- pad_size_list[1] = adjust_pooling_pad_if_needed(
- input_tensor.shape[2],
- kernel_size[0],
- stride[0],
- pad_size_list[1],
- ceil_mode,
- )
- pad_size_list[3] = adjust_pooling_pad_if_needed(
- input_tensor.shape[3],
- kernel_size[1],
- stride[1],
- pad_size_list[3],
- ceil_mode,
- )
-
- accumulator_type = output.dtype
-
- # Initilize zero point to zero.
- input_zp = 0
- if inputs[0].dtype == ts.DType.INT8:
- input_qparams = get_input_qparams(node)
- input_zp = input_qparams[0].get_zp_per_tensor()
-
- output_zp = 0
- if output.dtype == ts.DType.INT8:
- output_qparams = get_output_qparams(node)
- output_zp = output_qparams[0].get_zp_per_tensor()
-
- attr = ts.TosaSerializerAttribute()
- attr.PoolAttribute(
- kernel=kernel_size,
- stride=stride,
- pad=pad_size_list,
- input_zp=input_zp,
- output_zp=output_zp,
- accum_dtype=accumulator_type,
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().MAX_POOL2D,
- [input_tensor.name],
- [output.name],
- attr,
- )
-
-
@register_node_visitor
class MaxPool2dVisitor(NodeVisitor):
target = "aten.max_pool2d.default"
diff --git a/backends/arm/operators/op_maximum.py b/backends/arm/operators/op_maximum.py
index 834429e7bed..27e5fdc2e02 100644
--- a/backends/arm/operators/op_maximum.py
+++ b/backends/arm/operators/op_maximum.py
@@ -28,74 +28,6 @@
from torch.fx import Node
-@register_node_visitor
-class MaxVisitor_0_80(NodeVisitor):
- target = "aten.maximum.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- scale_back = 1.0
- max_output = output
- if inputs[0].dtype == ts.DType.INT8:
- input_qparams = get_input_qparams(node)
- if len(input_qparams) != 2:
- raise ValueError(
- f"Both inputs need to have quantization information for {node}"
- )
- if input_qparams[0] != input_qparams[1]:
- raise ValueError(
- "Both inputs must have the same quantization parameters for MAX"
- )
-
- # insert RESCALEs to int32
- operand_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
-
- output.shape = tosa_shape(output.shape, output.dim_order)
- max_output = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
- else:
- operand_inputs = inputs
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().MAXIMUM,
- [
- operand_inputs[0].name,
- operand_inputs[1].name,
- ],
- [max_output.name],
- )
-
- if output.dtype == ts.DType.INT8:
- # insert RESCALE from int32 back to int8
- tqutils.insert_rescale_op_to_int8(tosa_graph, max_output, scale_back, node)
-
-
@register_node_visitor
class MaxVisitor(NodeVisitor):
target = "aten.maximum.default"
diff --git a/backends/arm/operators/op_minimum.py b/backends/arm/operators/op_minimum.py
index 856686cbf47..9dfa7d1f394 100644
--- a/backends/arm/operators/op_minimum.py
+++ b/backends/arm/operators/op_minimum.py
@@ -27,74 +27,6 @@
from torch.fx import Node
-@register_node_visitor
-class MinVisitor_0_80(NodeVisitor):
- target = "aten.minimum.default"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- scale_back = 1.0
- min_output = output
- if inputs[0].dtype == ts.DType.INT8:
- input_qparams = get_input_qparams(node)
- if len(input_qparams) != 2:
- raise ValueError(
- f"Both inputs need to have quantization information for {node}"
- )
- if input_qparams[0] != input_qparams[1]:
- raise ValueError(
- "Both inputs must have the same quantization parameters for MIN"
- )
-
- # insert RESCALEs to int32
- operand_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
-
- output.shape = tosa_shape(output.shape, output.dim_order)
- min_output = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
- else:
- operand_inputs = inputs
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().MINIMUM,
- [
- operand_inputs[0].name,
- operand_inputs[1].name,
- ],
- [min_output.name],
- )
-
- if output.dtype == ts.DType.INT8:
- # insert RESCALE from int32 back to int8
- tqutils.insert_rescale_op_to_int8(tosa_graph, min_output, scale_back, node)
-
-
@register_node_visitor
class MinVisitor(NodeVisitor):
target = "aten.minimum.default"
diff --git a/backends/arm/operators/op_mul.py b/backends/arm/operators/op_mul.py
index 4c09ed91f16..7d9f6eac6aa 100644
--- a/backends/arm/operators/op_mul.py
+++ b/backends/arm/operators/op_mul.py
@@ -26,136 +26,6 @@
)
from executorch.backends.arm.tosa_mapping import TosaArg
from executorch.backends.arm.tosa_specification import TosaSpecification
-from executorch.backends.arm.tosa_utils import reshape_for_broadcast
-
-
-@register_node_visitor
-class MulVisitor_080_BI(NodeVisitor):
- target = "aten.mul.Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.INT32],
- output.tosa_spec,
- )
-
- dim_order = (
- inputs[0].dim_order
- if len(inputs[0].shape) > len(inputs[1].shape)
- else inputs[1].dim_order
- )
- if inputs[0].dtype == ts.DType.INT8:
- input_A = inputs[0]
- input_B = inputs[1]
- input_qparams = get_input_qparams(node)
- input_A_qargs = input_qparams[0]
- input_B_qargs = input_qparams[1]
- input_A.shape = tutils.tosa_shape(input_A.shape, input_A.dim_order)
- input_B.shape = tutils.tosa_shape(input_B.shape, input_B.dim_order)
-
- # Rescale inputs to INT32 with zp=0
- input_A_rescaled = tqutils.build_rescale_to_int32(
- tosa_graph,
- input_A,
- input_A_qargs.get_zp_per_tensor(),
- 1.0,
- )
- input_B_rescaled = tqutils.build_rescale_to_int32(
- tosa_graph,
- input_B,
- input_B_qargs.get_zp_per_tensor(),
- 1.0,
- )
- else:
- # input[0].dtype == ts.DType.INT32
- # Non quantized input, natively support by TOSA.MUL
- input_A_rescaled, input_B_rescaled = inputs[0], inputs[1]
-
- if output.dtype == ts.DType.INT8:
- output_shape = tutils.tosa_shape(output.shape, output.dim_order)
- mul_output = tosa_graph.addIntermediate(output_shape, ts.DType.INT32)
- else:
- # output.dtype == ts.DType.INT32
- mul_output = output
-
- input1, input2 = tutils.reshape_for_broadcast(
- tosa_graph,
- [
- input_A_rescaled,
- input_B_rescaled,
- ],
- dim_order,
- )
-
- # Do the INT32 Mul
- attr = ts.TosaSerializerAttribute()
- attr.MulAttribute(shift=0)
- tosa_graph.addOperator(
- ts.TosaOp.Op().MUL,
- [input1.name, input2.name],
- [mul_output.name],
- attr,
- )
-
- if output.dtype == ts.DType.INT8:
- # Scale output back to 8 bit
- output_scale = (
- input_A_qargs.get_scale_per_tensor() # type: ignore[possibly-undefined]
- * input_B_qargs.get_scale_per_tensor() # type: ignore[possibly-undefined]
- )
- tqutils.insert_rescale_op_to_int8(
- tosa_graph, mul_output, output_scale, node
- )
-
-
-@register_node_visitor
-class MulVisitor_080_MI(MulVisitor_080_BI):
- # inheriting 'target' from BI class
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
-
- if inputs[0].dtype == ts.DType.INT8:
- return super().define_node(node, tosa_graph, inputs, output)
-
- input1, input2 = reshape_for_broadcast(tosa_graph, inputs)
-
- attr = ts.TosaSerializerAttribute()
- attr.MulAttribute(shift=0)
- tosa_graph.addOperator(
- ts.TosaOp.Op().MUL, [input1.name, input2.name], [output.name], attr
- )
@register_node_visitor
diff --git a/backends/arm/operators/op_neg.py b/backends/arm/operators/op_neg.py
index e3b3eabf9ba..54f3dafe769 100644
--- a/backends/arm/operators/op_neg.py
+++ b/backends/arm/operators/op_neg.py
@@ -37,58 +37,11 @@ def get_negate_zero_points(node: torch.fx.Node, is_int8: bool) -> tuple[int, int
return (0, 0)
-@register_node_visitor
-class NegVisitor_0_80(NodeVisitor):
- target = "aten.neg.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- supported_dtypes = [
- ts.DType.INT8,
- ts.DType.INT16,
- ts.DType.INT32,
- ts.DType.FP16,
- ts.DType.BF16,
- ts.DType.FP32,
- ]
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target, [*inputs, output], supported_dtypes, output.tosa_spec
- )
-
- input_zp, output_zp = get_negate_zero_points(
- node, inputs[0].dtype == ts.DType.INT8
- )
-
- attr = ts.TosaSerializerAttribute()
- attr.NegateAttribute(input1_zp=input_zp, output_zp=output_zp)
- tosa_graph.addOperator(
- ts.TosaOp.Op().NEGATE,
- [inputs[0].name],
- [output.name],
- attributes=attr,
- )
-
-
@register_node_visitor
class NegVisitor(NodeVisitor):
target = "aten.neg.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_permute.py b/backends/arm/operators/op_permute.py
index 25cd294ba93..0830d8f4504 100644
--- a/backends/arm/operators/op_permute.py
+++ b/backends/arm/operators/op_permute.py
@@ -94,57 +94,11 @@ def transform_permutation_vector(permutation_vector: list[int], dim_order: list[
return permutation_vector
-@register_node_visitor
-class PermuteVisitor_0_80(NodeVisitor):
- target = "aten.permute_copy.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- # The permutation vector describes a permutation P in default Pytorch dim_order.
- # For rank 4, the default dim_order NCHW.
- # E.g. (2,3,0,1) -> permute (n,c,h,w) to (w,c,n,h)
- permutation_vector = inputs[1].special
-
- if output.dim_order != tuple(range(len(output.dim_order))):
- # the permutation vector can't be used directly if we are not in NCHW dim_order.
- # Transform to dim_order.
- permutation_vector = transform_permutation_vector(
- permutation_vector, output.dim_order
- )
-
- attr = ts.TosaSerializerAttribute()
- attr.TransposeAttribute(permutation_vector)
- tosa_graph.addOperator(
- ts.TosaOp.Op().TRANSPOSE, [inputs[0].name], [output.name], attr
- )
-
-
@register_node_visitor
class PermuteVisitor(NodeVisitor):
target = "aten.permute_copy.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_pow.py b/backends/arm/operators/op_pow.py
index ab5f5ac2f9e..413160c902a 100644
--- a/backends/arm/operators/op_pow.py
+++ b/backends/arm/operators/op_pow.py
@@ -21,46 +21,6 @@
from torch.fx import Node
-@register_node_visitor
-class PowVisitor_080_MI(NodeVisitor):
- target = "aten.pow.Tensor_Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.FP16, ts.DType.FP32],
- output.tosa_spec,
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().POW,
- [
- inputs[0].name,
- inputs[1].name,
- ],
- [output.name],
- None,
- )
-
-
@register_node_visitor
class PowVisitor(NodeVisitor):
target = "aten.pow.Tensor_Tensor"
diff --git a/backends/arm/operators/op_reciprocal.py b/backends/arm/operators/op_reciprocal.py
index 26a86ee2330..3838afd9728 100644
--- a/backends/arm/operators/op_reciprocal.py
+++ b/backends/arm/operators/op_reciprocal.py
@@ -21,36 +21,6 @@
from executorch.backends.arm.tosa_specification import TosaSpecification
-@register_node_visitor
-class ReciprocalVisitor_080_MI(NodeVisitor):
- target = "aten.reciprocal.default"
-
- # BI case should be handled by op_table
- tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target, [*inputs, output], ts.DType.FP32, output.tosa_spec
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().RECIPROCAL, [inputs[0].name], [output.name]
- )
-
-
@register_node_visitor
class ReciprocalVisitor(NodeVisitor):
target = "aten.reciprocal.default"
diff --git a/backends/arm/operators/op_repeat.py b/backends/arm/operators/op_repeat.py
index 069cf32f27b..3e636e993b7 100644
--- a/backends/arm/operators/op_repeat.py
+++ b/backends/arm/operators/op_repeat.py
@@ -21,47 +21,11 @@
from executorch.backends.arm.tosa_utils import tosa_shape
-@register_node_visitor
-class RepeatVisitor_0_80(NodeVisitor):
- target = "aten.repeat.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: list[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- multiples = inputs[1].special
-
- attr = ts.TosaSerializerAttribute()
- attr.TileAttribute(tosa_shape(multiples, output.dim_order))
- tosa_graph.addOperator(
- ts.TosaOp.Op().TILE, [inputs[0].name], [output.name], attr
- )
-
-
@register_node_visitor
class RepeatVisitor(NodeVisitor):
target = "aten.repeat.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_rescale.py b/backends/arm/operators/op_rescale.py
index df8d3c7dbef..3f86c439995 100644
--- a/backends/arm/operators/op_rescale.py
+++ b/backends/arm/operators/op_rescale.py
@@ -7,7 +7,6 @@
from typing import Any, cast, List
-import executorch.backends.arm.tosa_quant_utils as tosa_quant_utils
import torch
from executorch.backends.arm.operators.node_visitor import (
NodeVisitor,
@@ -24,65 +23,8 @@
@register_node_visitor
-class RescaleVisitor_0_80(NodeVisitor):
- target = "_rescale.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 5)
-
- input_dtype = node.all_input_nodes[0].meta["val"].dtype
- output_dtype = cast(torch.dtype, node.args[1])
- scale = cast(float, node.args[2])
- input_zp = cast(int, node.args[3])
- output_zp = cast(int, node.args[4])
-
- if input_dtype != torch.int8 and input_zp != 0:
- raise ValueError(
- f"If input dtype is not int8, input_zp must be 0. Got input_dtype{input_dtype=}, {input_zp=}"
- )
- if output_dtype != torch.int8 and output_zp != 0:
- raise ValueError(
- f"If output dtype is not int8, output_zp must be 0. Got {output_dtype=}, {output_zp=}"
- )
-
- # scale32 gives higher accuracy but for a higher HW cost.
- # For now, always go for scale32.
- scale_32 = True
- scale_width = 32 if scale_32 else 16
- multiplier, shift = tosa_quant_utils.compute_multiplier_and_shift(
- [scale], scale_width
- )
- attr_rescale = ts.TosaSerializerAttribute()
- attr_rescale.RescaleAttribute(
- input_zp=input_zp,
- output_zp=output_zp,
- multiplier=multiplier,
- shift=shift,
- scale32=scale_32,
- double_round=False,
- per_channel=False,
- input_unsigned=False,
- output_unsigned=False,
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().RESCALE, [inputs[0].name], [output.name], attr_rescale
- )
-
-
-@register_node_visitor
-class RescaleVisitor_INT(NodeVisitor):
- target = "_rescale.default"
+class RescaleVisitor(NodeVisitor):
+ target = "tosa.RESCALE.default"
tosa_specs = [TosaSpecification.create_from_string("TOSA-1.0+INT")]
diff --git a/backends/arm/operators/op_rshift_tensor.py b/backends/arm/operators/op_rshift_tensor.py
index c46b358638f..5313f5c8143 100644
--- a/backends/arm/operators/op_rshift_tensor.py
+++ b/backends/arm/operators/op_rshift_tensor.py
@@ -21,51 +21,11 @@
from executorch.backends.arm.tosa_mapping import TosaArg
-@register_node_visitor
-class RshiftVisitor_0_80(NodeVisitor):
- target = "aten.bitwise_right_shift.Tensor"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.INT16, ts.DType.INT32],
- output.tosa_spec,
- )
-
- attr = ts.TosaSerializerAttribute()
- round = False
- if self.tosa_spec.is_U55_subset:
- # U55 only supports INT32 and round == True
- # TODO MLETORCH-525 Emulate round == False with different decomposition
- round = True
- attr.ArithmeticRightShiftAttribute(round=round)
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().ARITHMETIC_RIGHT_SHIFT,
- [inputs[0].name, inputs[1].name],
- [output.name],
- attr,
- )
-
-
@register_node_visitor
class RshiftVisitor(NodeVisitor):
target = "aten.bitwise_right_shift.Tensor"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def define_node(
self,
diff --git a/backends/arm/operators/op_rsqrt.py b/backends/arm/operators/op_rsqrt.py
index 6f8340141cc..df293946ded 100644
--- a/backends/arm/operators/op_rsqrt.py
+++ b/backends/arm/operators/op_rsqrt.py
@@ -21,34 +21,6 @@
from executorch.backends.arm.tosa_specification import TosaSpecification
-@register_node_visitor
-class RsqrtVisitor_080_MI(NodeVisitor):
- target = "aten.rsqrt.default"
-
- # BI case should be handled by op_table
- tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target, [*inputs, output], ts.DType.FP32, output.tosa_spec
- )
-
- tosa_graph.addOperator(ts.TosaOp.Op().RSQRT, [inputs[0].name], [output.name])
-
-
@register_node_visitor
class RsqrtVisitor(NodeVisitor):
target = "aten.rsqrt.default"
diff --git a/backends/arm/operators/op_sigmoid.py b/backends/arm/operators/op_sigmoid.py
index 880bbe29a05..dec42ae15f9 100644
--- a/backends/arm/operators/op_sigmoid.py
+++ b/backends/arm/operators/op_sigmoid.py
@@ -20,34 +20,6 @@
from torch.fx import Node
-@register_node_visitor
-class SigmoidVisitor_080_MI(NodeVisitor):
- target = "aten.sigmoid.default"
-
- # BI case should be handled by op_table
- tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target, [*inputs, output], ts.DType.FP32, output.tosa_spec
- )
-
- tosa_graph.addOperator(ts.TosaOp.Op().SIGMOID, [inputs[0].name], [output.name])
-
-
@register_node_visitor
class SigmoidVisitor(NodeVisitor):
target = "aten.sigmoid.default"
diff --git a/backends/arm/operators/op_slice.py b/backends/arm/operators/op_slice.py
index 23acf304bbb..56115073ce1 100644
--- a/backends/arm/operators/op_slice.py
+++ b/backends/arm/operators/op_slice.py
@@ -34,80 +34,11 @@ def _fixup_end(end, shape, dim):
return min(end.number, shape[dim])
-@register_node_visitor
-class SliceVisitor_080(NodeVisitor):
- target = "aten.slice_copy.Tensor"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, [4, 5])
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- # See slice_copy_support.py
- if not (len(inputs) == 4 or (len(inputs) == 5 and inputs[4].number == 1)):
- raise ValueError("Unsupported combination of inputs")
-
- # aten.slice_copy supports slicing in 1d at a time.
- # The arguments are the actual input, dimension of slicing, start index, end index and optinal step or stride.
- input_node, dim, start, end = inputs
-
- # Translate and check parameters in Pytorch dim order.
- shape = input_node.shape
- dim = dim.number
-
- start_index = _fixup_start(start, shape, dim)
- end_index = _fixup_end(end, shape, dim)
- size = end_index - start_index
-
- if size <= 0:
- raise ValueError(
- f"The calculated slice size must be positive. Got {size=} "
- f"with {start_index=} and {end_index=}."
- )
- if size > shape[dim]:
- raise ValueError(
- f"The calculated slice size cannot be greater than the dimension size"
- f". Got {size=} and {shape[dim]=}."
- )
-
- # Convert aten args to Tosa's start and size attributes and in TOSA dim order.
- attr = ts.TosaSerializerAttribute()
-
- start_attr = [
- _fixup_start(start, shape, dim) if i == dim else 0
- for i in input_node.dim_order
- ]
- size_attr = [size if i == dim else shape[i] for i in input_node.dim_order]
- attr.SliceAttribute(start_attr, size_attr)
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().SLICE, [input_node.name], [output.name], attr
- )
-
-
@register_node_visitor
class SliceVisitor(NodeVisitor):
target = "aten.slice_copy.Tensor"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_sub.py b/backends/arm/operators/op_sub.py
index 07986ea14ae..18b3c853271 100644
--- a/backends/arm/operators/op_sub.py
+++ b/backends/arm/operators/op_sub.py
@@ -24,114 +24,6 @@
from torch.fx import Node
-@register_node_visitor
-class SubVisitor_080_BI(NodeVisitor):
- target = "aten.sub.Tensor"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.INT32],
- output.tosa_spec,
- )
-
- scale_back = 1.0
- if inputs[0].dtype == ts.DType.INT8:
- rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
- tosa_graph, inputs, node
- )
- else:
- # input[0].dtype == ts.DType.INT32
- # Non quantized input, natively support by TOSA.SUB
- rescaled_inputs = inputs
-
- if output.dtype == ts.DType.INT8:
- broadcasted_shape = tutils.tosa_shape(output.shape, output.dim_order)
- sub_output = tosa_graph.addIntermediate(broadcasted_shape, ts.DType.INT32)
- else:
- # output.dtype == ts.DType.INT32
- sub_output = output
-
- # Do the INT32 Sub
- tosa_graph.addOperator(
- ts.TosaOp.Op().SUB,
- [
- rescaled_inputs[0].name,
- rescaled_inputs[1].name,
- ],
- [sub_output.name],
- None,
- )
-
- if output.dtype == ts.DType.INT8:
- # Scale output back to 8 bit
- # pyre-ignore
- tqutils.insert_rescale_op_to_int8(
- tosa_graph, sub_output, scale_back, node
- ) # type: ignore[possibly-undefined]
-
-
-@register_node_visitor
-class SubVisitor_080_MI(SubVisitor_080_BI):
- # inheriting 'target' from BI class
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
-
- if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
- # Call the inherited define_node for handling integers
- super().define_node(node, tosa_graph, inputs, output)
- else:
- # FP32 Sub lowering
- validate_valid_dtype(
- self.target, [*inputs, output], ts.DType.FP32, output.tosa_spec
- )
-
- # MI lowering
- tosa_graph.addOperator(
- ts.TosaOp.Op().SUB,
- [inputs[0].name, inputs[1].name],
- [output.name],
- None,
- )
-
-
@register_node_visitor
class SubVisitor_INT(NodeVisitor):
target = "aten.sub.Tensor"
diff --git a/backends/arm/operators/op_sum.py b/backends/arm/operators/op_sum.py
index 84a662db01c..54e848a1bef 100644
--- a/backends/arm/operators/op_sum.py
+++ b/backends/arm/operators/op_sum.py
@@ -23,107 +23,6 @@
from torch.fx import Node
-@register_node_visitor
-class SumVisitor_080_BI(NodeVisitor):
- target = "aten.sum.dim_IntList"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 3)
- validate_same_dtype(self.target, [inputs[0], output], ts)
-
- tensor = inputs[0]
- input_shape = list(tensor.shape)
- dim = int(inputs[1].number % len(input_shape))
-
- output_shape = input_shape
- output_shape[dim] = 1 # Output shape is input shape with dim reduced
-
- # Rescale input to 32 bit
- rescaled_inputs, scale = tqutils.insert_rescale_ops_to_int32(
- tosa_graph,
- [tensor],
- node,
- )
-
- attr = ts.TosaSerializerAttribute()
- attr.AxisAttribute(tensor.dim_order.index(dim))
-
- intermediate = tosa_graph.addIntermediate(
- tutils.tosa_shape(output_shape, tensor.dim_order),
- dtype=ts.DType.INT32,
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().REDUCE_SUM,
- [rescaled_inputs[0].name],
- [intermediate.name],
- attr,
- )
-
- tqutils.insert_rescale_op_to_int8(tosa_graph, intermediate, scale, node)
-
-
-@register_node_visitor
-class SumVisitor_080_MI(SumVisitor_080_BI):
- # inheriting 'target' from BI class
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 3)
- validate_same_dtype(self.target, [inputs[0], output], ts)
-
- if inputs[0].dtype == ts.DType.INT8:
- return super().define_node(node, tosa_graph, inputs, output)
-
- tensor = inputs[0]
- input_shape = list(tensor.shape)
- dim = int(inputs[1].number % len(input_shape))
-
- output_shape = input_shape
- output_shape[dim] = 1 # Output shape is input shape with dim reduced
-
- attr = ts.TosaSerializerAttribute()
- attr.AxisAttribute(tensor.dim_order.index(dim))
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().REDUCE_SUM,
- [tensor.name],
- [output.name],
- attr,
- )
-
-
@register_node_visitor
class SumVisitor_INT(NodeVisitor):
target = "aten.sum.dim_IntList"
diff --git a/backends/arm/operators/op_table.py b/backends/arm/operators/op_table.py
index 86720eec373..4886a513881 100644
--- a/backends/arm/operators/op_table.py
+++ b/backends/arm/operators/op_table.py
@@ -7,7 +7,6 @@
from typing import Any, List
-import numpy as np
import torch
from executorch.backends.arm.operators.node_visitor import (
NodeVisitor,
@@ -22,47 +21,9 @@
from executorch.backends.arm.tosa_specification import TosaSpecification
-@register_node_visitor
-class TableVisitor_0_80(NodeVisitor):
- target = "_table.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_valid_dtype(
- self.target, inputs, [ts.DType.INT8, ts.DType.INT16], output.tosa_spec
- )
- if inputs[0].dtype == ts.DType.INT8:
- validate_valid_dtype(self.target, output, ts.DType.INT8, output.tosa_spec)
- if inputs[0].dtype == ts.DType.INT16:
- validate_valid_dtype(self.target, output, ts.DType.INT32, output.tosa_spec)
-
- if node.name not in self._exported_program.state_dict.keys(): # type: ignore[union-attr]
- raise RuntimeError(
- f"Did not find key {node.name} in state_dict {self._exported_program.state_dict.keys()}."
- )
-
- table = self._exported_program.state_dict[node.name] # type: ignore[union-attr]
- table_attr = ts.TosaSerializerAttribute()
- table_attr.TableAttribute(np.array(table))
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().TABLE, [inputs[0].name], [output.name], table_attr
- )
-
-
@register_node_visitor
class TableVisitor(NodeVisitor):
- target = "_table.default"
+ target = "tosa.TABLE.default"
tosa_specs = [TosaSpecification.create_from_string("TOSA-1.0+INT")]
@@ -75,7 +36,7 @@ def define_node(
) -> None:
import serializer.tosa_serializer as ts # type: ignore
- validate_num_inputs(self.target, inputs, 1)
+ validate_num_inputs(self.target, inputs, 2)
validate_valid_dtype(
self.target, inputs, [ts.DType.INT8, ts.DType.INT16], output.tosa_spec
)
@@ -84,12 +45,12 @@ def define_node(
if inputs[0].dtype == ts.DType.INT16:
validate_valid_dtype(self.target, output, ts.DType.INT32, output.tosa_spec)
- if node.name not in self._exported_program.state_dict.keys(): # type: ignore[union-attr]
+ if inputs[1].name not in self._exported_program.state_dict.keys(): # type: ignore[union-attr]
raise RuntimeError(
f"Did not find key {node.name} in state_dict {self._exported_program.state_dict.keys()}."
)
- table = self._exported_program.state_dict[node.name]
+ table = self._exported_program.state_dict[inputs[1].name] # type: ignore[union-attr]
table_tensor_name = node.name + "_table"
tosa_graph.addConst(
diff --git a/backends/arm/operators/op_tanh.py b/backends/arm/operators/op_tanh.py
index 4804af9b382..0d149397eb6 100644
--- a/backends/arm/operators/op_tanh.py
+++ b/backends/arm/operators/op_tanh.py
@@ -21,34 +21,6 @@
from torch.fx import Node
-@register_node_visitor
-class TanhVisitor_0_80_MI(NodeVisitor):
- target = "aten.tanh.default"
-
- # BI case should be handled by op_table
- tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
- validate_valid_dtype(
- self.target, [*inputs, output], ts.DType.FP32, output.tosa_spec
- )
-
- tosa_graph.addOperator(ts.TosaOp.Op().TANH, [inputs[0].name], [output.name])
-
-
@register_node_visitor
class TanhVisitor(NodeVisitor):
target = "aten.tanh.default"
diff --git a/backends/arm/operators/op_to_copy.py b/backends/arm/operators/op_to_copy.py
index 5dde6828f72..9758a018b87 100644
--- a/backends/arm/operators/op_to_copy.py
+++ b/backends/arm/operators/op_to_copy.py
@@ -18,35 +18,6 @@
from executorch.backends.arm.tosa_mapping import TosaArg
-@register_node_visitor
-class ToCopyVisitor_0_80(NodeVisitor):
- """
- Implement the type cast functionality of _to_copy.
-
- Other features like setting of the memory_format or moving a tensor to a
- different device are not supported.
-
- Also note that the node should not be quantized.
- """
-
- target = "aten._to_copy.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
-
- tosa_graph.addOperator(ts.TosaOp.Op().CAST, [inputs[0].name], [output.name])
-
-
@register_node_visitor
class ToCopyVisitor(NodeVisitor):
"""
@@ -60,7 +31,7 @@ class ToCopyVisitor(NodeVisitor):
target = "aten._to_copy.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def define_node(
self,
diff --git a/backends/arm/operators/op_to_dim_order_copy.py b/backends/arm/operators/op_to_dim_order_copy.py
index d68bee88a64..74bf1a5ad14 100644
--- a/backends/arm/operators/op_to_dim_order_copy.py
+++ b/backends/arm/operators/op_to_dim_order_copy.py
@@ -18,35 +18,6 @@
from executorch.backends.arm.tosa_mapping import TosaArg
-@register_node_visitor
-class ToDimOrderCopyVisitor_0_80(NodeVisitor):
- """
- Implement the type cast functionality of _to_dim_order_copy.
-
- Other features like setting of the dim_order or moving a tensor to a
- different device are not supported.
-
- Also note that the node should not be quantized.
- """
-
- target = "dim_order_ops._to_dim_order_copy.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 1)
-
- tosa_graph.addOperator(ts.TosaOp.Op().CAST, [inputs[0].name], [output.name])
-
-
@register_node_visitor
class ToDimOrderCopyVisitor(NodeVisitor):
"""
@@ -60,7 +31,7 @@ class ToDimOrderCopyVisitor(NodeVisitor):
target = "dim_order_ops._to_dim_order_copy.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def define_node(
self,
diff --git a/backends/arm/operators/op_transpose.py b/backends/arm/operators/op_transpose.py
index 2198e05abb7..91614874d23 100644
--- a/backends/arm/operators/op_transpose.py
+++ b/backends/arm/operators/op_transpose.py
@@ -21,56 +21,17 @@
from executorch.backends.arm.tosa_mapping import TosaArg
-@register_node_visitor
-class TransposeVisitor_0_80(NodeVisitor):
- """
- This node visitor targets the _transpose op defined in the
- passthrough_to_tosa library. Used when switching between tosa_dim_orders.
- Inserts a TOSA TRANSPOSE.
- """
-
- target = "_transpose.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- output_rank = len(output.shape)
- perms = [dim % output_rank for dim in inputs[1].special]
- attr = ts.TosaSerializerAttribute()
- attr.TransposeAttribute(perms)
- tosa_graph.addOperator(
- ts.TosaOp.Op().TRANSPOSE, [inputs[0].name], [output.name], attr
- )
-
-
@register_node_visitor
class TransposeVisitor(NodeVisitor):
"""
- This node visitor targets the _transpose op defined in the
- passthrough_to_tosa library. Used when switching between tosa_dim_orders.
+ This node visitor targets the tosa::TRANSPOSE op defined in the
+ TOSA backend dialect. Used when switching between tosa_dim_orders.
Inserts a TOSA TRANSPOSE.
"""
- target = "_transpose.default"
+ target = "tosa.TRANSPOSE.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def define_node(
self,
diff --git a/backends/arm/operators/op_upsample_bilinear2d.py b/backends/arm/operators/op_upsample_bilinear2d.py
index c7edee9d882..26927bfcfa2 100644
--- a/backends/arm/operators/op_upsample_bilinear2d.py
+++ b/backends/arm/operators/op_upsample_bilinear2d.py
@@ -18,113 +18,15 @@
validate_valid_dtype,
)
from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_quant_utils import build_rescale, build_rescale_v0_80
+from executorch.backends.arm.tosa_quant_utils import build_rescale
from executorch.backends.arm.tosa_utils import get_resize_parameters, tosa_shape
-@register_node_visitor
-class UpsampleBilinear2dVisitor_0_80(NodeVisitor):
- target = "aten.upsample_bilinear2d.vec"
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
-
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
- from tosa_tools.v0_80.tosa.ResizeMode import ResizeMode # type: ignore
-
- validate_num_inputs(self.target, inputs, 4)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- if inputs[0].shape is None or output.shape is None:
- raise ValueError("Only static shapes are supported")
-
- input_dtype = inputs[0].dtype
-
- # tosa_shape output is NHWC, take HW
- input_size_yx = tuple([inputs[0].shape[dim] for dim in inputs[0].dim_order])[
- 1:3
- ]
- output_size_yx = tuple([output.shape[dim] for dim in output.dim_order])[1:3]
-
- # Get align_corners value from the node arguments.
- align_corners = bool(node.args[2])
- scale_n_yx, scale_d_yx, offset_yx, border_yx = get_resize_parameters(
- input_size_yx,
- output_size_yx,
- ResizeMode.NEAREST,
- align_corners=align_corners,
- )
-
- def in_int16_range(x):
- return torch.all(x >= -(2**15)) and torch.all(x <= 2**15 - 1)
-
- if not in_int16_range(scale_n_yx):
- raise ValueError("scale_n_yx is out of the int16 range")
- if not in_int16_range(scale_d_yx):
- raise ValueError("scale_d_yx is out of the int16 range")
- if not in_int16_range(border_yx):
- raise ValueError("border_yx is out of the int16 range")
-
- attr = ts.TosaSerializerAttribute()
- attr.ResizeAttribute(
- scale=[scale_n_yx[0], scale_d_yx[0], scale_n_yx[1], scale_d_yx[1]],
- offset=offset_yx.tolist(),
- border=border_yx.tolist(),
- mode=ResizeMode.BILINEAR,
- )
-
- if input_dtype == output.dtype == ts.DType.FP32:
- tosa_graph.addOperator(
- ts.TosaOp.Op().RESIZE, [inputs[0].name], [output.name], attr
- )
- return
- elif input_dtype == output.dtype == ts.DType.INT8:
- intermediate = tosa_graph.addIntermediate(
- tosa_shape(output.shape, output.dim_order), ts.DType.INT32
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().RESIZE, [inputs[0].name], [intermediate.name], attr
- )
-
- final_output_scale = float(1 / (scale_n_yx[0] * scale_n_yx[1]))
-
- build_rescale_v0_80(
- tosa_fb=tosa_graph,
- scale=[final_output_scale],
- input_node=intermediate,
- output_name=output.name,
- output_type=ts.DType.INT8,
- input_zp=[0],
- output_zp=[0],
- is_double_round=False,
- )
- else:
- raise ValueError(
- "Input/output dtype not in {float32, int8}: {input_dtype=} {output.dtype=}"
- )
-
-
@register_node_visitor
class UpsampleBilinear2dVisitor(NodeVisitor):
target = "aten.upsample_bilinear2d.vec"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_upsample_nearest2d.py b/backends/arm/operators/op_upsample_nearest2d.py
index 1c53a6c3c3c..46dcc0605e6 100644
--- a/backends/arm/operators/op_upsample_nearest2d.py
+++ b/backends/arm/operators/op_upsample_nearest2d.py
@@ -20,76 +20,14 @@
from executorch.backends.arm.tosa_mapping import TosaArg
from executorch.backends.arm.tosa_utils import get_resize_parameters
-from tosa_tools.v0_80.tosa.ResizeMode import ResizeMode # type: ignore
-
-
-@register_node_visitor
-class UpsampleNearest2dVisitor_0_80(NodeVisitor):
- target = "aten.upsample_nearest2d.vec"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 3)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
- output.tosa_spec,
- )
-
- # tosa_shape output is NHWC, take HW
- input_size_yx = tuple([inputs[0].shape[dim] for dim in inputs[0].dim_order])[
- 1:3
- ]
- output_size_yx = tuple([output.shape[dim] for dim in output.dim_order])[1:3]
-
- # Align corners shouldn't make a difference for nearest upsampling. We set to False so
- # half pixel centers are used for resize parameter logic.
- scale_n_yx, scale_d_yx, offset_yx, border_yx = get_resize_parameters(
- input_size_yx, output_size_yx, ResizeMode.NEAREST, align_corners=False
- )
-
- def in_int16_range(x):
- return torch.all(x >= -(2**15)) and torch.all(x <= 2**15 - 1)
-
- if not in_int16_range(scale_n_yx):
- raise ValueError("scale_n_yx is out of the int16 range")
- if not in_int16_range(scale_d_yx):
- raise ValueError("scale_d_yx is out of the int16 range")
- if not in_int16_range(border_yx):
- raise ValueError("border_yx is out of the int16 range")
-
- attr = ts.TosaSerializerAttribute()
- attr.ResizeAttribute(
- scale=[scale_n_yx[0], scale_d_yx[0], scale_n_yx[1], scale_d_yx[1]],
- offset=offset_yx.tolist(),
- border=border_yx.tolist(),
- mode=ResizeMode.NEAREST,
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().RESIZE, [inputs[0].name], [output.name], attr
- )
+from tosa.ResizeMode import ResizeMode # type: ignore
@register_node_visitor
class UpsampleNearest2dVisitor(NodeVisitor):
target = "aten.upsample_nearest2d.vec"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_view.py b/backends/arm/operators/op_view.py
index 3a34a830d22..1e8c06b691f 100644
--- a/backends/arm/operators/op_view.py
+++ b/backends/arm/operators/op_view.py
@@ -21,47 +21,11 @@
from executorch.backends.arm.tosa_utils import tosa_shape
-@register_node_visitor
-class ViewVisitor_0_80(NodeVisitor):
- target = "aten.view_copy.default"
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [inputs[0], output], ts)
- validate_valid_dtype(
- self.target,
- [inputs[0], output],
- [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32, ts.DType.BOOL],
- output.tosa_spec,
- )
-
- attr = ts.TosaSerializerAttribute()
- new_shape = tosa_shape(inputs[1].special, output.dim_order)
- attr.ReshapeAttribute(new_shape)
- tosa_graph = cast(ts.TosaSerializer, tosa_graph)
- tosa_graph.addOperator(
- ts.TosaOp.Op().RESHAPE, [inputs[0].name], [output.name], attr
- )
-
-
@register_node_visitor
class ViewVisitor(NodeVisitor):
target = "aten.view_copy.default"
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
diff --git a/backends/arm/operators/op_where.py b/backends/arm/operators/op_where.py
index 402acaaf492..e6a87be6387 100644
--- a/backends/arm/operators/op_where.py
+++ b/backends/arm/operators/op_where.py
@@ -20,92 +20,6 @@
from torch.fx import Node
-@register_node_visitor
-class WhereVisitor_0_80_BI(NodeVisitor):
- target = "aten.where.self"
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+BI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def _add_node_to_tosa_graph(
- self,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- supported_dtypes: Sequence,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- validate_num_inputs(self.target, inputs, 3)
- # Not first input, which is condition tensor.
- validate_same_dtype(self.target, inputs[1:], ts)
- validate_valid_dtype(self.target, inputs[0], ts.DType.BOOL, output.tosa_spec)
- validate_valid_dtype(
- self.target,
- [*inputs[1:], output],
- supported_dtypes,
- output.tosa_spec,
- )
-
- tosa_graph.addOperator(
- ts.TosaOp.Op().SELECT,
- [inputs[0].name, inputs[1].name, inputs[2].name],
- [output.name],
- None,
- )
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- bi_supported_dtypes = [
- ts.DType.INT8,
- ts.DType.INT16,
- ts.DType.INT32,
- ts.DType.BOOL,
- ]
- self._add_node_to_tosa_graph(tosa_graph, inputs, output, bi_supported_dtypes)
-
-
-@register_node_visitor
-class WhereVisitor_0_80_MI(WhereVisitor_0_80_BI):
-
- tosa_specs = [
- TosaSpecification.create_from_string("TOSA-0.80+MI"),
- ]
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- mi_supported_dtypes = [
- ts.DType.FP16,
- ts.DType.FP32,
- ts.DType.INT8,
- ts.DType.INT16,
- ts.DType.INT32,
- ts.DType.BOOL,
- ]
- self._add_node_to_tosa_graph(tosa_graph, inputs, output, mi_supported_dtypes)
-
-
@register_node_visitor
class WhereVisitor_INT(NodeVisitor):
target = "aten.where.self"
diff --git a/backends/arm/operators/operator_validation_utils.py b/backends/arm/operators/operator_validation_utils.py
index fde76f31c7a..cc8317497b8 100644
--- a/backends/arm/operators/operator_validation_utils.py
+++ b/backends/arm/operators/operator_validation_utils.py
@@ -6,7 +6,7 @@
from math import ceil, floor
from typing import Any, List, Optional
-from executorch.backends.arm.operators.node_visitor import NodeVisitor
+import serializer.tosa_serializer as ts
def validate_num_inputs(op_name: str, inputs: List[Any], expected: int | List[int]):
@@ -158,10 +158,6 @@ def validate_valid_dtype(
)
"""
- if tosa_spec in NodeVisitor.tosa_specs_0_80:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts
- else:
- import serializer.tosa_serializer as ts
if not tensors:
raise ValueError(
diff --git a/backends/arm/operators/ops_binary.py b/backends/arm/operators/ops_binary.py
index 9c0c15364fc..dc9bd446a34 100644
--- a/backends/arm/operators/ops_binary.py
+++ b/backends/arm/operators/ops_binary.py
@@ -22,62 +22,12 @@
from executorch.backends.arm.tosa_mapping import TosaArg
-def binary_operator_factory_0_80(bw_target: str, tosa_op):
- """Creates and registers NodeVisitors for operators that have two inputs and map directly to a TOSA op."""
-
- class BinaryOperator_0_80(NodeVisitor):
- target = bw_target
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore # noqa: F401
-
- validate_num_inputs(self.target, inputs, 2)
- validate_same_dtype(self.target, [*inputs, output], ts)
-
- if self.target in [
- "aten.bitwise_and.Tensor",
- "aten.bitwise_xor.Tensor",
- "aten.bitwise_or.Tensor",
- "aten.bitwise_left_shift.Tensor",
- ]:
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.INT8, ts.DType.INT16, ts.DType.INT32],
- output.tosa_spec,
- )
- if self.target in [
- "aten.logical_and.default",
- "aten.logical_xor.defaul",
- "aten.logical_or.default",
- ]:
- validate_valid_dtype(
- self.target,
- [*inputs, output],
- [ts.DType.BOOL],
- output.tosa_spec,
- )
-
- tosa_graph.addOperator(
- tosa_op, [inputs[0].name, inputs[1].name], [output.name]
- )
-
- register_node_visitor(BinaryOperator_0_80)
-
-
def binary_operator_factory(bw_target: str, tosa_op):
"""Creates and registers NodeVisitors for operators that have two inputs and map directly to a TOSA op."""
class BinaryOperator(NodeVisitor):
target = bw_target
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def define_node(
self,
@@ -122,18 +72,6 @@ def define_node(
register_node_visitor(BinaryOperator)
-import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
-binary_operator_factory_0_80("aten.bitwise_and.Tensor", ts.TosaOp.Op().BITWISE_AND)
-binary_operator_factory_0_80("aten.bitwise_xor.Tensor", ts.TosaOp.Op().BITWISE_XOR)
-binary_operator_factory_0_80("aten.bitwise_or.Tensor", ts.TosaOp.Op().BITWISE_OR)
-binary_operator_factory_0_80("aten.logical_and.default", ts.TosaOp.Op().LOGICAL_AND)
-binary_operator_factory_0_80("aten.logical_xor.default", ts.TosaOp.Op().LOGICAL_XOR)
-binary_operator_factory_0_80("aten.logical_or.default", ts.TosaOp.Op().LOGICAL_OR)
-binary_operator_factory_0_80(
- "aten.bitwise_left_shift.Tensor", ts.TosaOp.Op().LOGICAL_LEFT_SHIFT
-)
-
import serializer.tosa_serializer as ts # type: ignore
binary_operator_factory("aten.bitwise_and.Tensor", ts.TosaOp.Op().BITWISE_AND)
diff --git a/backends/arm/operators/ops_identity.py b/backends/arm/operators/ops_identity.py
index ad5ee0c956d..238b033f8eb 100644
--- a/backends/arm/operators/ops_identity.py
+++ b/backends/arm/operators/ops_identity.py
@@ -21,41 +21,6 @@
from executorch.backends.arm.tosa_mapping import TosaArg
-def identity_operator_factory_v0_80(identity_target: str):
- """
- Creates and registers NodeVisitors for operators that map directly
- to a TOSA IDENTITY op.
- """
-
- class IdentityOperatorVisitor(NodeVisitor):
- target = identity_target
-
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
-
- # Simply add an identityOp
- tosa_graph.addOperator(
- ts.TosaOp.Op().IDENTITY, [inputs[0].name], [output.name]
- )
-
- register_node_visitor(IdentityOperatorVisitor)
-
-
-identity_operator_factory_v0_80("getitem")
-identity_operator_factory_v0_80("aten.alias_copy.default")
-
-
def identity_operator_factory(identity_target: str):
"""
Creates and registers NodeVisitors for operators that map directly
@@ -65,7 +30,7 @@ def identity_operator_factory(identity_target: str):
class IdentityOperatorVisitor(NodeVisitor):
target = identity_target
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def define_node(
self,
diff --git a/backends/arm/operators/ops_unary.py b/backends/arm/operators/ops_unary.py
index 3345619a68e..48092e13968 100644
--- a/backends/arm/operators/ops_unary.py
+++ b/backends/arm/operators/ops_unary.py
@@ -21,44 +21,6 @@
from executorch.backends.arm.tosa_mapping import TosaArg
-def unary_operator_factory_0_80(unary_target: str, tosa_op):
- "Creates and registers NodeVisitors for operations that have one input and map directly into a TOSA op."
-
- # Some TOSA unary operators only support float
- fp_only_ops = ["aten.floor.default"]
-
- class UnaryOperator_0_80(NodeVisitor):
- target = unary_target
- tosa_specs = NodeVisitor.tosa_specs_0_80
-
- def __init__(self, *args):
- super().__init__(*args)
-
- def define_node(
- self,
- node: torch.fx.Node,
- tosa_graph: Any,
- inputs: List[TosaArg],
- output: TosaArg,
- ) -> None:
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore # noqa: F401
-
- validate_num_inputs(self.target, inputs, 1)
- validate_same_dtype(self.target, [*inputs, output], ts)
-
- if self.target in fp_only_ops:
- validate_valid_dtype(
- self.target,
- inputs[0],
- ts.DType.FP32,
- output.tosa_spec,
- )
-
- tosa_graph.addOperator(tosa_op, [inputs[0].name], [output.name])
-
- register_node_visitor(UnaryOperator_0_80)
-
-
def unary_operator_factory(unary_target: str, tosa_op):
"Creates and registers NodeVisitors for operations that have one input and map directly into a TOSA op."
@@ -67,7 +29,7 @@ def unary_operator_factory(unary_target: str, tosa_op):
class UnaryOperator(NodeVisitor):
target = unary_target
- tosa_specs = NodeVisitor.tosa_specs_1_00
+ tosa_specs = NodeVisitor.tosa_specs
def __init__(self, *args):
super().__init__(*args)
@@ -97,12 +59,6 @@ def define_node(
register_node_visitor(UnaryOperator)
-import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
-unary_operator_factory_0_80("aten.ceil.default", ts.TosaOp.Op().CEIL)
-unary_operator_factory_0_80("aten.floor.default", ts.TosaOp.Op().FLOOR)
-unary_operator_factory_0_80("aten.logical_not.default", ts.TosaOp.Op().LOGICAL_NOT)
-
import serializer.tosa_serializer as ts # type: ignore
unary_operator_factory("aten.ceil.default", ts.TosaOp.Op().CEIL)
diff --git a/backends/arm/process_node.py b/backends/arm/process_node.py
index 0994079c4ab..ee8eb08592a 100644
--- a/backends/arm/process_node.py
+++ b/backends/arm/process_node.py
@@ -8,16 +8,13 @@
from typing import Any, cast, Dict
import numpy as np
+import serializer.tosa_serializer as ts
import torch
import torch.fx
from executorch.backends.arm.operators.node_visitor import NodeVisitor
from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_specification import (
- Tosa_0_80,
- Tosa_1_00,
- TosaSpecification,
-)
-from executorch.backends.arm.tosa_utils import getNodeArgs, tosa_shape
+from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_utils import tosa_shape
from torch._export.utils import (
get_buffer,
get_lifted_tensor_constant,
@@ -36,7 +33,10 @@ def process_call_function(
tosa_spec: TosaSpecification,
):
# Unpack arguments and convert
- inputs = getNodeArgs(node, tosa_spec)
+ try:
+ inputs = [TosaArg(arg, tosa_spec) for arg in node.args]
+ except ValueError as e:
+ raise ValueError(f"Failed processing args to op:\n{node}") from e
# Convert output (this node itself)
try:
@@ -85,13 +85,6 @@ def process_inputs(
"Is the original torch function supported?"
) from e
- if isinstance(tosa_spec, Tosa_0_80):
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
- elif isinstance(tosa_spec, Tosa_1_00):
- import serializer.tosa_serializer as ts
- else:
- raise ValueError(f"Unsupported TOSA spec: {tosa_spec}")
-
input_shape = tosa_arg.shape
input_dim_order = tosa_arg.dim_order
tensor = ts.TosaSerializerTensor(
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
index 734ddec4359..9fa15568cc4 100644
--- a/backends/arm/quantizer/arm_quantizer.py
+++ b/backends/arm/quantizer/arm_quantizer.py
@@ -14,18 +14,17 @@
from __future__ import annotations
import functools
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
import torch
from executorch.backends.arm._passes import ArmPassManager
from executorch.backends.arm.quantizer import QuantizationConfig
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import get_tosa_spec, TosaSpecification
from .arm_quantizer_utils import is_annotated, mark_node_as_annotated
from .quantization_annotator import annotate_graph
from executorch.backends.arm.arm_backend import (
- get_tosa_spec,
is_ethosu,
is_vgf,
) # usort: skip
@@ -102,18 +101,20 @@ def get_symmetric_quantization_config(
weight_observer_or_fake_quant_ctr: ObserverOrFakeQuantizeConstructor = (
MinMaxObserver
)
+
# Determine the right observer/fake-quant constructor
if is_qat:
- # Set plain fake-quant with true min/max
- weight_observer_or_fake_quant_ctr = FakeQuantize
+ if is_per_channel:
+ weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver
+ else:
+ # Set plain fake-quant with true min/max
+ weight_observer_or_fake_quant_ctr = FakeQuantize
else:
# PTQ: set min/max observer
weight_observer_or_fake_quant_ctr = (
PerChannelMinMaxObserver if is_per_channel else MinMaxObserver
)
- extra_args = {"eps": 2**-12}
-
weight_quantization_spec = QuantizationSpec(
dtype=torch.int8,
quant_min=weight_qmin,
@@ -218,9 +219,35 @@ def not_module_type_or_name_filter(n: Node) -> bool:
class TOSAQuantizer(Quantizer):
- def __init__(self, tosa_spec: TosaSpecification) -> None:
+ def __init__(
+ self, compile_spec_or_tosa_spec: Union[TosaSpecification, List[CompileSpec]]
+ ) -> None:
+
super().__init__()
- self.tosa_spec = tosa_spec
+ if isinstance(compile_spec_or_tosa_spec, TosaSpecification):
+ self.tosa_spec = compile_spec_or_tosa_spec
+ self.compile_spec = None
+ elif isinstance(compile_spec_or_tosa_spec, list):
+ self.compile_spec = compile_spec_or_tosa_spec
+ # find entry that is 'tosa_spec'
+ for cs in compile_spec_or_tosa_spec:
+ if cs.key == "tosa_spec":
+ spec_val = (
+ cs.value.decode() if isinstance(cs.value, bytes) else cs.value
+ )
+ self.tosa_spec = TosaSpecification.create_from_string(spec_val)
+ break
+ else:
+ raise ValueError(
+ "compile_spec list did not contain a 'tosa_spec' entry"
+ )
+ else:
+ raise TypeError(
+ f"TOSAQuantizer constructor expects "
+ f"a TosaSpecification or compile_spec list, "
+ f"got {type(compile_spec_or_tosa_spec)}"
+ )
+
self.global_config: Optional[QuantizationConfig] = None
self.io_config: Optional[QuantizationConfig] = None
self.module_type_config: Dict[Callable, Optional[QuantizationConfig]] = {}
diff --git a/backends/arm/quantizer/arm_quantizer_utils.py b/backends/arm/quantizer/arm_quantizer_utils.py
index 5c9528debbe..838dd44733e 100644
--- a/backends/arm/quantizer/arm_quantizer_utils.py
+++ b/backends/arm/quantizer/arm_quantizer_utils.py
@@ -11,11 +11,9 @@
# Utility functions for TOSAQuantizer
#
-from typing import cast, Sequence
+from typing import cast
-import torch
-from torch._subclasses import FakeTensor
-from torch.fx import GraphModule, Node
+from torch.fx import Node
from torchao.quantization.pt2e.quantizer import QuantizationAnnotation
from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
@@ -45,62 +43,3 @@ def mark_node_as_annotated(node: Node) -> None:
if Q_ANNOTATION_KEY not in node.meta:
node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation()
node.meta[Q_ANNOTATION_KEY]._annotated = True
-
-
-def is_ok_for_quantization(node: Node, gm: GraphModule):
- """Check if an node can be quantized. The node can not be quantized if:
- - The node does not output a float tensor or,
- - The node outputs a large scalar.
- """
- return not (is_non_float_tensor(node) or is_large_scalar(node, gm))
-
-
-def get_node_target(module: torch.nn.Module | GraphModule, target_str: str):
- targets = target_str.split(".")
- for target in targets[:-1]:
- module = module.get_submodule(target)
- return getattr(module, targets[-1])
-
-
-def is_large_scalar(node: Node, gm: GraphModule):
- """Check if input is a large scalar value. So that we can skip quantization for the node
- since histc op (in HistogramObserver) only works for values up to certain upper bound
- """
- if node.op == "get_attr" and isinstance(node.target, str):
- tensor = get_node_target(gm, node.target)
- # torch.histc works until this upper bound
- HISTC_UPPER_BOUND = 3.4028235e15
- return tensor.numel() == 1 and abs(tensor.item()) > HISTC_UPPER_BOUND
- return False
-
-
-def is_non_float_tensor(node: Node) -> bool:
- """Check if the output of a node has a data type other than `torch.float32`.
-
- If the output is not `torch.float32`, quantization cannot be performed, as
- observers only work with floating-point tensors.
-
- Args:
- node (Node): The node to check the output(s) for.
-
- Returns:
- bool: `True` if the data type is not float32, otherwise `False`.
-
- Note:
- - If `node.meta["val"]` is a `list`, the function returns `True` if **any**
- element is **not** an instance of `FakeTensor` or does **not** have
- `torch.float32` as its data type.
- - If node.meta["val"] is missing or is not an instance of `FakeTensor`, the
- function returns True.
- """
- if "val" in node.meta and isinstance(node.meta["val"], Sequence):
- return any(
- not isinstance(fake_tensor, FakeTensor)
- or fake_tensor.dtype != torch.float32
- for fake_tensor in node.meta["val"]
- )
-
- if "val" not in node.meta or not isinstance(node.meta["val"], FakeTensor):
- return True
-
- return node.meta["val"].dtype != torch.float32
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
index 80ea569f249..55cf08298bb 100644
--- a/backends/arm/quantizer/quantization_annotator.py
+++ b/backends/arm/quantizer/quantization_annotator.py
@@ -6,13 +6,14 @@
import logging
import operator
from dataclasses import dataclass
-from typing import Callable, List, Optional
+from typing import Callable, List, Optional, Sequence
import torch
import torch.fx
import torch.nn.functional as F
+from executorch.backends.arm.common.debug import get_node_debug_info
from executorch.backends.arm.quantizer import QuantizationConfig
-from executorch.backends.arm.tosa_utils import get_node_debug_info
+from torch._subclasses import FakeTensor
from torch.fx import Node
from torchao.quantization.pt2e.quantizer import (
@@ -24,7 +25,6 @@
from .arm_quantizer_utils import (
is_annotated,
- is_ok_for_quantization,
is_output_annotated,
mark_node_as_annotated,
)
@@ -78,9 +78,16 @@ def _is_ok_for_quantization(
"""
# Check output
if quant_properties.quant_output is not None:
- if not is_ok_for_quantization(node, gm): # type: ignore[attr-defined]
+ if _is_non_float_tensor(node):
logger.debug(
- f"Could not quantize node due to output: "
+ "Could not quantize non float tensor for the following output node: "
+ f"{get_node_debug_info(node, gm)}"
+ )
+
+ return False
+ elif _is_large_scalar(node, gm):
+ logger.debug(
+ "Could not quantize large scalar node for the following output node: "
f"{get_node_debug_info(node, gm)}"
)
@@ -99,10 +106,18 @@ def _is_ok_for_quantization(
raise TypeError(
f"n_arg must be a Node instance, got {type(n_arg).__name__!r}"
)
- if not is_ok_for_quantization(n_arg, gm): # type: ignore[attr-defined]
+
+ if _is_non_float_tensor(n_arg):
+ logger.debug(
+ "Could not quantize non float tensor for the following input "
+ f"node: {get_node_debug_info(node, gm)}"
+ )
+
+ return False
+ elif _is_large_scalar(n_arg, gm):
logger.debug(
- f'could not quantize node due to input "{node}": '
- f"{get_node_debug_info(node, gm)}"
+ "Could not quantize large scalar node for the following input "
+ f"node: {get_node_debug_info(node, gm)}"
)
return False
@@ -110,6 +125,58 @@ def _is_ok_for_quantization(
return True
+def _get_node_target(module: torch.nn.Module | torch.fx.GraphModule, target_str: str):
+ targets = target_str.split(".")
+ for target in targets[:-1]:
+ module = module.get_submodule(target)
+ return getattr(module, targets[-1])
+
+
+def _is_large_scalar(node: Node, gm: torch.fx.GraphModule):
+ """Check if input is a large scalar value. So that we can skip quantization for the
+ node since histc op (in HistogramObserver) only works for values up to certain upper
+ bound.
+ """
+ if node.op == "get_attr" and isinstance(node.target, str):
+ tensor = _get_node_target(gm, node.target)
+ # torch.histc works until this upper bound
+ HISTC_UPPER_BOUND = 3.4028235e15
+ return tensor.numel() == 1 and abs(tensor.item()) > HISTC_UPPER_BOUND
+ return False
+
+
+def _is_non_float_tensor(node: Node) -> bool:
+ """Check if the output of a node has a data type other than `torch.float32`.
+
+ If the output is not `torch.float32`, quantization cannot be performed, as
+ observers only work with floating-point tensors.
+
+ Args:
+ node (Node): The node to check the output(s) for.
+
+ Returns:
+ bool: `True` if the data type is not float32, otherwise `False`.
+
+ Note:
+ - If `node.meta["val"]` is a `list`, the function returns `True` if **any**
+ element is **not** an instance of `FakeTensor` or does **not** have
+ `torch.float32` as its data type.
+ - If node.meta["val"] is missing or is not an instance of `FakeTensor`, the
+ function returns True.
+ """
+ if "val" in node.meta and isinstance(node.meta["val"], Sequence):
+ return any(
+ not isinstance(fake_tensor, FakeTensor)
+ or fake_tensor.dtype != torch.float32
+ for fake_tensor in node.meta["val"]
+ )
+
+ if "val" not in node.meta or not isinstance(node.meta["val"], FakeTensor):
+ return True
+
+ return node.meta["val"].dtype != torch.float32
+
+
def _annotate_input(node: Node, quant_property: _QuantProperty):
if is_annotated(node):
raise RuntimeError(
@@ -198,6 +265,9 @@ def _match_pattern(
torch.ops.aten.ceil.default,
torch.ops.aten.erf.default,
torch.ops.aten.exp.default,
+ torch.ops.aten.elu.default,
+ torch.ops.aten.expm1.default,
+ torch.ops.aten.elu.default,
torch.ops.aten.floor.default,
torch.ops.aten.log.default,
torch.ops.aten.reciprocal.default,
@@ -219,6 +289,10 @@ def _match_pattern(
torch.ops.aten.sign.default,
torch.ops.aten.asin.default,
torch.ops.aten.atanh.default,
+ torch.ops.aten.asinh.default,
+ torch.ops.aten.cosh.default,
+ torch.ops.aten.acos.default,
+ torch.ops.aten.cumsum.default,
]
_one_to_one_shared_input_qspec = [
@@ -267,6 +341,10 @@ def _match_pattern(
torch.ops.aten.unflatten.int,
torch.ops.aten.index_select.default,
torch.ops.aten.index.Tensor,
+ # Neg operator flips the range, but keps the magnitude the same.
+ # That is why we force it to use the same qparams and avoid
+ # dequant -> neg -> requant chain.
+ torch.ops.aten.neg.default,
]
_one_to_one_shared_input_or_input_act_qspec = [
@@ -468,9 +546,6 @@ def any_or_hardtanh_min_zero(n: Node):
)
]
quant_properties.quant_output = _QuantProperty(0, shared_qspec) # type: ignore[arg-type]
- elif node.target in (torch.ops.aten.neg.default,):
- quant_properties.quant_inputs = [_QuantProperty(0, input_act_qspec)]
- quant_properties.quant_output = _QuantProperty(0, input_act_qspec)
elif node.target in _one_to_one:
quant_properties.quant_inputs = [_QuantProperty(0, input_act_qspec)]
quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
diff --git a/backends/arm/quantizer/quantization_config.py b/backends/arm/quantizer/quantization_config.py
index 8f31f019332..d5c3aab1060 100644
--- a/backends/arm/quantizer/quantization_config.py
+++ b/backends/arm/quantizer/quantization_config.py
@@ -13,7 +13,6 @@
from torchao.quantization.pt2e.quantizer import (
DerivedQuantizationSpec,
- FixedQParamsQuantizationSpec,
QuantizationSpec,
)
@@ -122,21 +121,3 @@ def _derive_qparams_fn(
"Only float dtype for bias is supported for bias right now"
)
return self.bias
-
- def get_fixed_qspec(
- self,
- scale: float,
- zp: int,
- dtype: torch.dtype = torch.int8,
- quant_min: int = -128,
- quant_max: int = 127,
- ) -> FixedQParamsQuantizationSpec:
- """Returns a new FixedQParamsQuantizationSpec with the given parameters."""
- return FixedQParamsQuantizationSpec(
- dtype=dtype,
- qscheme=torch.per_tensor_affine,
- scale=scale,
- zero_point=zp,
- quant_min=quant_min,
- quant_max=quant_max,
- )
diff --git a/backends/arm/runtime/EthosUBackend.cpp b/backends/arm/runtime/EthosUBackend.cpp
index d29c32b02f3..c91ad4021c4 100644
--- a/backends/arm/runtime/EthosUBackend.cpp
+++ b/backends/arm/runtime/EthosUBackend.cpp
@@ -70,6 +70,7 @@ using executorch::runtime::EValue;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::MemoryAllocator;
using executorch::runtime::Result;
+using executorch::runtime::Span;
#define ETHOSU_NUM_BASE_ADDRS 3
@@ -140,7 +141,7 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
Error execute(
BackendExecutionContext& context,
DelegateHandle* input_handle,
- EValue** args) const override {
+ Span args) const override {
#if defined(ET_EVENT_TRACER_ENABLED)
EventTracer* event_tracer = context.event_tracer();
EventTracerEntry event_tracer_local_scope;
@@ -191,8 +192,9 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
// Use a temporary allocator for the intermediate tensors of the
// computation. The allocator is released in runtime/executor/method.cpp at
// the end of the execution of the Ethos-U custom delegate
- char* ethosu_scratch =
- static_cast(temp_allocator->allocate(handles.scratch_data_size));
+ // Ethos-U driver requires 16 bit alignment.
+ char* ethosu_scratch = static_cast(
+ temp_allocator->allocate(handles.scratch_data_size, 16UL));
if (ethosu_scratch == nullptr) {
ET_LOG(
Error,
diff --git a/backends/arm/runtime/VGFBackend.cpp b/backends/arm/runtime/VGFBackend.cpp
index ea4f4286eb9..0f79033d990 100644
--- a/backends/arm/runtime/VGFBackend.cpp
+++ b/backends/arm/runtime/VGFBackend.cpp
@@ -25,6 +25,7 @@ using executorch::runtime::EValue;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::MemoryAllocator;
using executorch::runtime::Result;
+using executorch::runtime::Span;
// We use the platform and runtime environment provided by the Vulkan delegate
#include
@@ -152,7 +153,7 @@ class VGFBackend final : public ::executorch::runtime::BackendInterface {
Error execute(
ET_UNUSED BackendExecutionContext& context,
DelegateHandle* handle,
- EValue** args) const override {
+ Span args) const override {
VgfRepr* repr = static_cast(handle);
// Copy all inputs from EValue to VkDeviceMemory
@@ -264,15 +265,60 @@ VkResult vkml_allocate_basics(
.engineVersion = 0,
.apiVersion = VK_API_VERSION_1_3,
};
+
+ std::vector requested_extensions;
+ VkInstanceCreateFlags instance_flags = 0;
+
+#ifdef __APPLE__
+ instance_flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
+
+ uint32_t extension_count = 0;
+ result = vkEnumerateInstanceExtensionProperties(
+ nullptr, &extension_count, nullptr);
+
+ if (result != VK_SUCCESS) {
+ ET_LOG(Error, "Failed to enumerate instance extensions");
+ return result;
+ }
+
+ std::vector extension_properties(extension_count);
+ result = vkEnumerateInstanceExtensionProperties(
+ nullptr, &extension_count, extension_properties.data());
+
+ if (result != VK_SUCCESS) {
+ ET_LOG(Error, "Failed to enumerate instance extensions");
+ return result;
+ }
+
+ if (std::any_of(
+ extension_properties.begin(),
+ extension_properties.end(),
+ [](const auto& extension) {
+ return strcmp(
+ VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME,
+ extension.extensionName) == 0;
+ })) {
+ requested_extensions.push_back(
+ VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
+ }
+
+ if (requested_extensions.empty()) {
+ ET_LOG(Error, "VK_KHR_portability_enumeration not found");
+ }
+
+#endif
+
VkInstanceCreateInfo instance_info{
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
.pNext = nullptr,
- .flags = 0,
+ .flags = instance_flags,
.pApplicationInfo = &app_info,
- 0,
- nullptr,
- 0,
- nullptr};
+ .enabledLayerCount = 0,
+ .ppEnabledLayerNames = nullptr,
+ .enabledExtensionCount =
+ static_cast(requested_extensions.size()),
+ .ppEnabledExtensionNames = requested_extensions.data(),
+ };
result = vkCreateInstance(&instance_info, nullptr, instance);
if (result != VK_SUCCESS) {
ET_LOG(Error, "Failed to create VkInstance");
diff --git a/backends/arm/runtime/VGFSetup.cpp b/backends/arm/runtime/VGFSetup.cpp
index 18c9dbc9727..eb802017c68 100644
--- a/backends/arm/runtime/VGFSetup.cpp
+++ b/backends/arm/runtime/VGFSetup.cpp
@@ -517,14 +517,30 @@ bool VgfRepr::process_vgf(const char* vgf_data, ArrayRef specs) {
return false;
}
+ std::vector poolSizes;
+ poolSizes.reserve(layout_bindings.size());
+ for (const auto& b : layout_bindings) {
+ bool found = false;
+ for (size_t idx = 0; idx < poolSizes.size(); ++idx) {
+ if (poolSizes[idx].type == b.descriptorType) {
+ poolSizes[idx].descriptorCount += b.descriptorCount;
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ poolSizes.push_back({b.descriptorType, b.descriptorCount});
+ }
+ }
+
// Create descriptor pool and descriptors for pipeline
const VkDescriptorPoolCreateInfo descriptor_pool_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.maxSets = static_cast(set_count),
- .poolSizeCount = 0,
- .pPoolSizes = nullptr,
+ .poolSizeCount = static_cast(poolSizes.size()),
+ .pPoolSizes = poolSizes.data(),
};
result = vkCreateDescriptorPool(
vk_device, &descriptor_pool_info, nullptr, &vk_descriptor_pool);
diff --git a/backends/arm/scripts/build_executor_runner.sh b/backends/arm/scripts/build_executor_runner.sh
index 974c5ca1ff7..8482e2a0113 100755
--- a/backends/arm/scripts/build_executor_runner.sh
+++ b/backends/arm/scripts/build_executor_runner.sh
@@ -25,6 +25,7 @@ output_folder_set=false
output_folder="."
et_build_root="${et_root_dir}/arm_test"
ethosu_tools_dir=${et_root_dir}/examples/arm/ethos-u-scratch
+select_ops_list=""
build_bundleio_flags=" -DET_BUNDLE_IO=OFF "
build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF "
@@ -32,7 +33,7 @@ build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF "
help() {
echo "Usage: $(basename $0) [options]"
echo "Options:"
- echo " --pte= pte file (genrated by the aot_arm_compier from the model to include in the elf"
+ echo " --pte=|semihosting pte file (generated by the aot_arm_compier from the model to include in the elf), or semihosting to supply pte at runtime."
echo " --target= Target to build and run for Default: ${target}"
echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}"
echo " --bundleio Support both pte and Bundle IO bpte using Devtools BundelIO with Input/RefOutput included"
@@ -46,7 +47,10 @@ help() {
echo " --output= Output folder Default: /_.pte"
echo " --et_build_root= Build output root folder to use, defaults to ${et_build_root}"
echo " --ethosu_tools_dir= Path to your Ethos-U tools dir if you not using default: ${ethosu_tools_dir}"
- echo " --toolchain= Toolchain can be specified (e.g. bare metal as arm-none-eabi-gcc or zephyr as arm-zephyr-eabi-gcc"
+ echo " --toolchain= Toolchain can be specified (e.g. bare metal as arm-none-eabi-gcc or zephyr as arm-zephyr-eabi-gcc Default: ${toolchain}"
+ echo " --select_ops_list= Comma separated list of portable (non delagated) kernels to include Default: ${select_ops_list}"
+ echo " NOTE: This is used when select_ops_model is not possible to use, e.g. for semihosting or bundleio."
+ echo " See https://docs.pytorch.org/executorch/stable/kernel-library-selective-build.html for more information."
exit 0
}
@@ -65,6 +69,7 @@ for arg in "$@"; do
--et_build_root=*) et_build_root="${arg#*=}";;
--ethosu_tools_dir=*) ethosu_tools_dir="${arg#*=}";;
--toolchain=*) toolchain="${arg#*=}";;
+ --select_ops_list=*) select_ops_list="${arg#*=}";;
*)
;;
esac
@@ -75,7 +80,7 @@ if [[ ${toolchain} == "arm-none-eabi-gcc" ]]; then
elif [[ ${toolchain} == "arm-zephyr-eabi-gcc" ]]; then
toolchain_cmake=${et_root_dir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
else
- echo "Error: Invalid toolchain selection, provided: ${tolchain}"
+ echo "Error: Invalid toolchain selection, provided: ${toolchain}"
echo " Valid options are {arm-none-eabi-gcc, arm-zephyr-eabi-gcc}"
exit 1;
fi
@@ -88,18 +93,24 @@ toolchain_cmake=$(realpath ${toolchain_cmake})
source ${setup_path_script}
-pte_file=$(realpath ${pte_file})
+if [[ ${pte_file} == "semihosting" ]]; then
+ extra_build_flags="${extra_build_flags} -DSEMIHOSTING=ON"
+else
+ pte_file=$(realpath ${pte_file})
+ extra_build_flags="${extra_build_flags} -DET_PTE_FILE_PATH:PATH='${pte_file}'"
+fi
ethosu_tools_dir=$(realpath ${ethosu_tools_dir})
ethos_u_root_dir="$ethosu_tools_dir/ethos-u"
mkdir -p "${ethos_u_root_dir}"
ethosu_tools_dir=$(realpath ${ethos_u_root_dir})
et_build_dir=${et_build_root}/cmake-out
+mkdir -p ${et_build_dir}
et_build_dir=$(realpath ${et_build_dir})
if [ "$output_folder_set" = false ] ; then
# remove file ending
- output_folder=${pte_file%.*}
+ output_folder=${pte_file%.*}/cmake-out
fi
if [[ ${system_config} == "" ]]
@@ -129,7 +140,7 @@ else
target_cpu=cortex-m85
fi
echo "--------------------------------------------------------------------------------"
-echo "Build Arm ${toolchain/-gcc/} executor_runner for ${target} with ${pte_file} using ${system_config} ${memory_mode} ${extra_build_flags} to '${output_folder}/cmake-out'"
+echo "Build Arm ${toolchain/-gcc/} executor_runner for ${target} with ${pte_file} using ${system_config} ${memory_mode} ${extra_build_flags} to '${output_folder}'"
echo "--------------------------------------------------------------------------------"
cd ${et_root_dir}/examples/arm/executor_runner
@@ -149,7 +160,6 @@ cmake \
-DTARGET_CPU=${target_cpu} \
-DET_DIR_PATH:PATH=${et_root_dir} \
-DET_BUILD_DIR_PATH:PATH=${et_build_dir} \
- -DET_PTE_FILE_PATH:PATH="${pte_file}" \
-DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \
-DETHOSU_TARGET_NPU_CONFIG=${target} \
${build_bundleio_flags} \
@@ -157,15 +167,16 @@ cmake \
-DPYTHON_EXECUTABLE=$(which python3) \
-DSYSTEM_CONFIG=${system_config} \
-DMEMORY_MODE=${memory_mode} \
+ -DEXECUTORCH_SELECT_OPS_LIST="${select_ops_list}" \
${extra_build_flags} \
- -B ${output_folder}/cmake-out
+ -B ${output_folder}
echo "[${BASH_SOURCE[0]}] Configured CMAKE"
-cmake --build ${output_folder}/cmake-out -j$(nproc) -- arm_executor_runner
+cmake --build ${output_folder} -j$(nproc) -- arm_executor_runner
echo "[${BASH_SOURCE[0]}] Generated ${toolchain} elf file:"
-find ${output_folder}/cmake-out -name "arm_executor_runner"
-echo "executable_text: $(find ${output_folder}/cmake-out -name arm_executor_runner -exec ${toolchain/-gcc/-size} {} \; | grep -v filename | awk '{print $1}') bytes"
-echo "executable_data: $(find ${output_folder}/cmake-out -name arm_executor_runner -exec ${toolchain/-gcc/-size} {} \; | grep -v filename | awk '{print $2}') bytes"
-echo "executable_bss: $(find ${output_folder}/cmake-out -name arm_executor_runner -exec ${toolchain/-gcc/-size} {} \; | grep -v filename | awk '{print $3}') bytes"
+find ${output_folder} -name "arm_executor_runner"
+echo "executable_text: $(find ${output_folder} -name arm_executor_runner -exec ${toolchain/-gcc/-size} {} \; | grep -v filename | awk '{print $1}') bytes"
+echo "executable_data: $(find ${output_folder} -name arm_executor_runner -exec ${toolchain/-gcc/-size} {} \; | grep -v filename | awk '{print $2}') bytes"
+echo "executable_bss: $(find ${output_folder} -name arm_executor_runner -exec ${toolchain/-gcc/-size} {} \; | grep -v filename | awk '{print $3}') bytes"
diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh
index c66eeea4ca9..84c675ddb4a 100755
--- a/backends/arm/scripts/build_executorch.sh
+++ b/backends/arm/scripts/build_executorch.sh
@@ -19,8 +19,8 @@ _setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly ins
et_build_root="${et_root_dir}/arm_test"
build_type="Release"
-build_devtools=false
-build_with_etdump=false
+build_devtools=OFF
+build_with_etdump=OFF
help() {
echo "Usage: $(basename $0) [options]"
@@ -29,7 +29,7 @@ help() {
echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}"
echo " --devtools Build Devtools libs"
echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log"
- echo " --toolchain= Toolchain can be specified (e.g. bare metal as arm-none-eabi-gcc or zephyr as arm-zephyr-eabi-gcc"
+ echo " --toolchain= Toolchain can be specified (e.g. bare metal as arm-none-eabi-gcc or zephyr as arm-zephyr-eabi-gcc Default: ${toolchain}"
exit 0
}
@@ -38,8 +38,8 @@ for arg in "$@"; do
-h|--help) help ;;
--et_build_root=*) et_build_root="${arg#*=}";;
--build_type=*) build_type="${arg#*=}";;
- --devtools) build_devtools=true ;;
- --etdump) build_with_etdump=true ;;
+ --devtools) build_devtools=ON ;;
+ --etdump) build_with_etdump=ON ;;
--toolchain=*) toolchain="${arg#*=}";;
*)
;;
@@ -48,10 +48,10 @@ done
if [[ ${toolchain} == "arm-none-eabi-gcc" ]]; then
toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/${toolchain}.cmake
-elif [[ ${toolchain} == "arm-zephyr-eabi-gcc" ]]; then
+elif [[ ${toolchain} == "arm-zephyr-eabi-gcc" ]]; then
toolchain_cmake=${et_root_dir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
else
- echo "Error: Invalid toolchain selection, provided: ${tolchain}"
+ echo "Error: Invalid toolchain selection, provided: ${toolchain}"
echo " Valid options are {arm-none-eabi-gcc, arm-zephyr-eabi-gcc}"
exit 1;
fi
@@ -74,40 +74,12 @@ cd "${et_root_dir}"
echo "Build ExecuTorch target libs ${build_type} into '${et_build_dir}'" ;
echo "--------------------------------------------------------------------------------" )
-build_devtools_flags=" -DEXECUTORCH_BUILD_DEVTOOLS=OFF "
-if [ "$build_devtools" = true ] ; then
- build_devtools_flags=" -DEXECUTORCH_BUILD_DEVTOOLS=ON "
-fi
-
-build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF "
-if [ "$build_with_etdump" = true ] ; then
- # Add DevTools flags use in the Target build below
- build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON \
- -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
- -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \
- -DFLATCC_ALLOW_WERROR=OFF "
-fi
-
-echo "Building with Devtools: ${build_devtools_flags} ${build_with_etdump_flags}"
-
-
# Build
-cmake \
- -DCMAKE_INSTALL_PREFIX=${et_build_dir} \
- -DCMAKE_BUILD_TYPE=${build_type} \
- -DCMAKE_TOOLCHAIN_FILE="${toolchain_cmake}" \
- -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
- -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \
- -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
- -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
- -DEXECUTORCH_BUILD_CORTEX_M=ON \
- -DEXECUTORCH_ENABLE_LOGGING=ON \
- ${build_devtools_flags} \
- ${build_with_etdump_flags} \
- -B"${et_build_dir}" \
- "${et_root_dir}"
-
-echo "[$(basename $0)] Configured CMAKE"
+cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \
+-DCMAKE_BUILD_TYPE=Release \
+-DEXECUTORCH_BUILD_DEVTOOLS=$build_devtools \
+-DEXECUTORCH_BUILD_ARM_ETDUMP=$build_with_etdump \
+--preset arm-baremetal -B${et_build_dir}
cmake --build ${et_build_dir} -j$(nproc) --target install --config ${build_type} --
diff --git a/backends/arm/scripts/build_portable_kernels.sh b/backends/arm/scripts/build_portable_kernels.sh
index 0d06b59dd03..cfa008c80d5 100755
--- a/backends/arm/scripts/build_portable_kernels.sh
+++ b/backends/arm/scripts/build_portable_kernels.sh
@@ -4,92 +4,4 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
-# Optional parameter:
-# --build_type= "Release" | "Debug" | "RelWithDebInfo"
-# --etdump build with devtools-etdump support
-
-set -eu
-
-script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
-et_root_dir=$(cd ${script_dir}/../../.. && pwd)
-et_root_dir=$(realpath ${et_root_dir})
-toolchain=arm-none-eabi-gcc
-setup_path_script=${et_root_dir}/examples/arm/ethos-u-scratch/setup_path.sh
-_setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly install necessary tools."
-
-
-et_build_root="${et_root_dir}/arm_test"
-build_type="Release"
-portable_kernels="aten::_softmax.out"
-
-help() {
- echo "Usage: $(basename $0) [options]"
- echo "Options:"
- echo " --et_build_root= Build output root folder to use, defaults to ${et_build_root}"
- echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}"
- echo " --portable_kernels= Comma separated list of portable (non delagated) kernels to include Default: ${portable_kernels}"
- echo " --toolchain= Toolchain can be specified (e.g. bare metal as arm-none-eabi-gcc or zephyr as arm-zephyr-eabi-gcc"
- exit 0
-}
-
-for arg in "$@"; do
- case $arg in
- -h|--help) help ;;
- --et_build_root=*) et_build_root="${arg#*=}";;
- --build_type=*) build_type="${arg#*=}";;
- --portable_kernels=*) portable_kernels="${arg#*=}";;
- --toolchain=*) toolchain="${arg#*=}";;
- *)
- ;;
- esac
-done
-
-if [[ ${toolchain} == "arm-none-eabi-gcc" ]]; then
- toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/${toolchain}.cmake
-elif [[ ${toolchain} == "arm-zephyr-eabi-gcc" ]]; then
- toolchain_cmake=${et_root_dir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
-else
- echo "Error: Invalid toolchain selection, provided: ${tolchain}"
- echo " Valid options are {arm-none-eabi-gcc, arm-zephyr-eabi-gcc}"
- exit 1;
-fi
-toolchain_cmake=$(realpath ${toolchain_cmake})
-
-# Source the tools
-# This should be prepared by the setup.sh
-[[ -f ${setup_path_script} ]] \
- || { echo "Missing ${setup_path_script}. ${_setup_msg}"; exit 1; }
-
-source ${setup_path_script}
-
-et_build_dir=${et_build_root}/cmake-out
-
-cd "${et_root_dir}"
-
-echo "--------------------------------------------------------------------------------" ;
-echo "Build ExecuTorch Libraries ${build_type} portable kernels: ${portable_kernels} into '${et_build_dir}'" ;
-echo "--------------------------------------------------------------------------------"
-
-if ! [[ $portable_kernels =~ ^((^|,)aten::[a-zA-Z0-9_]+\.[a-zA-Z0-9_]*out)*$ ]]; then
- echo " ERROR: specified argument --portable_kernels=${portable_kernels}"
- echo " is in the wrong format please use \"aten::.out,aten::.out,...\""
- echo " e.g. \"aten::_softmax.out,aten::add.out\""
- exit 1
-fi
-
-set -x
-
-cmake \
- -DCMAKE_INSTALL_PREFIX=${et_build_dir} \
- -DCMAKE_BUILD_TYPE=${build_type} \
- -DCMAKE_TOOLCHAIN_FILE="${toolchain_cmake}" \
- -DEXECUTORCH_SELECT_OPS_LIST=${portable_kernels} \
- -B"${et_build_dir}/examples/arm" \
- "${et_root_dir}/examples/arm"
-
-cmake --build "${et_build_dir}/examples/arm" -j$(nproc) --config ${build_type} --
-
-set +x
-
-echo "[$(basename $0)] Generated static libraries for ExecuTorch:"
-find "${et_build_dir}/examples/arm" -name "*.a" -exec ls -al {} \;
+echo "DEPRECATED: build_portable_kernels.sh is deprecated and will be removed. The kernel registration library is now built directly with the arm_executor_runner."
diff --git a/backends/arm/scripts/corstone_utils.cmake b/backends/arm/scripts/corstone_utils.cmake
new file mode 100644
index 00000000000..8253f3985ca
--- /dev/null
+++ b/backends/arm/scripts/corstone_utils.cmake
@@ -0,0 +1,463 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+function(fetch_ethos_u_content ETHOS_SDK_PATH ET_DIR_PATH)
+ message(STATUS "Fetching Ethos-U content into ${ETHOS_SDK_PATH}")
+
+ file(MAKE_DIRECTORY ${ETHOS_SDK_PATH}/../ethos_u)
+ include(FetchContent)
+ set(ethos_u_base_tag "25.05")
+ FetchContent_Declare(
+ ethos_u
+ GIT_REPOSITORY
+ https://git.gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u.git
+ GIT_TAG ${ethos_u_base_tag}
+ SOURCE_DIR
+ ${ETHOS_SDK_PATH}
+ BINARY_DIR
+ ${ETHOS_SDK_PATH}
+ SUBBUILD_DIR
+ ${ETHOS_SDK_PATH}/../ethos_u-subbuild
+ SOURCE_SUBDIR
+ none
+ )
+ FetchContent_MakeAvailable(ethos_u)
+ # Patch manifest to remove unused projects.
+ set(patch_dir "${ET_DIR_PATH}/examples/arm/ethos-u-setup")
+ set(ethos_u_base_rev "24950bd4381b6c51db0349a229f8ba86b8e1093f")
+ execute_process(
+ COMMAND
+ bash -c
+ "pwd && source backends/arm/scripts/utils.sh && patch_repo ${ETHOS_SDK_PATH} ${ethos_u_base_rev} ${patch_dir}"
+ WORKING_DIRECTORY ${ET_DIR_PATH} COMMAND_ECHO STDOUT
+ )
+ # Get ethos_u externals only if core_platform folder does not already exist.
+ if(NOT EXISTS "${ETHOS_SDK_PATH}/core_platform")
+ execute_process(
+ COMMAND ${PYTHON_EXECUTABLE} fetch_externals.py -c
+ ${ethos_u_base_tag}.json fetch
+ WORKING_DIRECTORY ${ETHOS_SDK_PATH} COMMAND_ECHO STDOUT
+ )
+ endif()
+ # Patch core_software to remove unused projects.
+ set(core_software_base_rev "55904c3da73c876c6d6c58290938ae217a8b94bd")
+ execute_process(
+ COMMAND
+ bash -c
+ "pwd && source backends/arm/scripts/utils.sh && patch_repo ${ETHOS_SDK_PATH}/core_software ${core_software_base_rev} ${patch_dir}"
+ WORKING_DIRECTORY ${ET_DIR_PATH} COMMAND_ECHO STDOUT
+ )
+ # Always patch the core_platform repo since this is fast enough.
+ set(core_platform_base_rev "1916a9c984819c35b19c9e5c4c80d47e4e866420")
+ execute_process(
+ COMMAND
+ bash -c
+ "pwd && source backends/arm/scripts/utils.sh && patch_repo ${ETHOS_SDK_PATH}/core_platform ${core_platform_base_rev} ${patch_dir}"
+ WORKING_DIRECTORY ${ET_DIR_PATH} COMMAND_ECHO STDOUT
+ )
+endfunction()
+
+function(add_corstone_subdirectory SYSTEM_CONFIG ETHOS_SDK_PATH)
+ if(SYSTEM_CONFIG MATCHES "Ethos_U55")
+ add_subdirectory(
+ ${ETHOS_SDK_PATH}/core_platform/targets/corstone-300 target
+ )
+ elseif(SYSTEM_CONFIG MATCHES "Ethos_U85")
+ add_subdirectory(
+ ${ETHOS_SDK_PATH}/core_platform/targets/corstone-320 target
+ )
+ else()
+ message(FATAL_ERROR "Unsupported SYSTEM_CONFIG ${SYSTEM_CONFIG}.")
+ endif()
+ if(MEMORY_MODE MATCHES "Dedicated_Sram")
+ target_compile_definitions(
+ ethosu_target_common INTERFACE ETHOSU_MODEL=1 ETHOSU_ARENA=1
+ )
+ elseif(MEMORY_MODE MATCHES "Shared_Sram" OR MEMORY_MODE MATCHES "Sram_Only")
+ target_compile_definitions(
+ ethosu_target_common INTERFACE ETHOSU_MODEL=1 ETHOSU_ARENA=0
+ )
+ else()
+ message(
+ FATAL_ERROR
+ "Unsupported MEMORY_MODE ${MEMORY_MODE}. Memory_mode can be Shared_Sram, Sram_Only or Dedicated_Sram(applicable for the Ethos-U85)"
+ )
+ endif()
+endfunction()
+
+function(configure_timing_adapters SYSTEM_CONFIG MEMORY_MODE)
+ if(SYSTEM_CONFIG MATCHES "Ethos_U55_High_End_Embedded")
+ set(TARGET_BOARD
+ "corstone-300"
+ PARENT_SCOPE
+ )
+ if(MEMORY_MODE MATCHES "Shared_Sram")
+ target_compile_definitions(
+ ethosu_target_common
+ INTERFACE # Configure NPU architecture timing adapters This is just
+ # example numbers and you should make this match your hardware
+ # SRAM
+ ETHOSU_TA_MAXR_0=8
+ ETHOSU_TA_MAXW_0=8
+ ETHOSU_TA_MAXRW_0=0
+ ETHOSU_TA_RLATENCY_0=32
+ ETHOSU_TA_WLATENCY_0=32
+ ETHOSU_TA_PULSE_ON_0=3999
+ ETHOSU_TA_PULSE_OFF_0=1
+ ETHOSU_TA_BWCAP_0=4000
+ ETHOSU_TA_PERFCTRL_0=0
+ ETHOSU_TA_PERFCNT_0=0
+ ETHOSU_TA_MODE_0=1
+ ETHOSU_TA_HISTBIN_0=0
+ ETHOSU_TA_HISTCNT_0=0
+ # Flash
+ ETHOSU_TA_MAXR_1=2
+ ETHOSU_TA_MAXW_1=0
+ ETHOSU_TA_MAXRW_1=0
+ ETHOSU_TA_RLATENCY_1=64
+ ETHOSU_TA_WLATENCY_1=0
+ ETHOSU_TA_PULSE_ON_1=320
+ ETHOSU_TA_PULSE_OFF_1=80
+ ETHOSU_TA_BWCAP_1=50
+ ETHOSU_TA_PERFCTRL_1=0
+ ETHOSU_TA_PERFCNT_1=0
+ ETHOSU_TA_MODE_1=1
+ ETHOSU_TA_HISTBIN_1=0
+ ETHOSU_TA_HISTCNT_1=0
+ )
+ elseif(MEMORY_MODE MATCHES "Sram_Only")
+ target_compile_definitions(
+ ethosu_target_common
+ INTERFACE # This is just example numbers and you should make this match
+ # your hardware SRAM
+ ETHOSU_TA_MAXR_0=8
+ ETHOSU_TA_MAXW_0=8
+ ETHOSU_TA_MAXRW_0=0
+ ETHOSU_TA_RLATENCY_0=32
+ ETHOSU_TA_WLATENCY_0=32
+ ETHOSU_TA_PULSE_ON_0=3999
+ ETHOSU_TA_PULSE_OFF_0=1
+ ETHOSU_TA_BWCAP_0=4000
+ ETHOSU_TA_PERFCTRL_0=0
+ ETHOSU_TA_PERFCNT_0=0
+ ETHOSU_TA_MODE_0=1
+ ETHOSU_TA_HISTBIN_0=0
+ ETHOSU_TA_HISTCNT_0=0
+ # Set the second Timing Adapter to SRAM latency & bandwidth
+ ETHOSU_TA_MAXR_1=8
+ ETHOSU_TA_MAXW_1=8
+ ETHOSU_TA_MAXRW_1=0
+ ETHOSU_TA_RLATENCY_1=32
+ ETHOSU_TA_WLATENCY_1=32
+ ETHOSU_TA_PULSE_ON_1=3999
+ ETHOSU_TA_PULSE_OFF_1=1
+ ETHOSU_TA_BWCAP_1=4000
+ ETHOSU_TA_PERFCTRL_1=0
+ ETHOSU_TA_PERFCNT_1=0
+ ETHOSU_TA_MODE_1=1
+ ETHOSU_TA_HISTBIN_1=0
+ ETHOSU_TA_HISTCNT_1=0
+ )
+
+ else()
+ message(
+ FATAL_ERROR
+ "Unsupported memory_mode ${MEMORY_MODE} for the Ethos-U55. The Ethos-U55 supports only Shared_Sram and Sram_Only."
+ )
+ endif()
+ elseif(SYSTEM_CONFIG MATCHES "Ethos_U55_Deep_Embedded")
+ add_subdirectory(
+ ${ETHOS_SDK_PATH}/core_platform/targets/corstone-300 target
+ )
+ set(TARGET_BOARD
+ "corstone-300"
+ PARENT_SCOPE
+ )
+ if(MEMORY_MODE MATCHES "Shared_Sram")
+ target_compile_definitions(
+ ethosu_target_common
+ INTERFACE # Configure NPU architecture timing adapters This is just
+ # example numbers and you should make this match your hardware
+ # SRAM
+ ETHOSU_TA_MAXR_0=4
+ ETHOSU_TA_MAXW_0=4
+ ETHOSU_TA_MAXRW_0=0
+ ETHOSU_TA_RLATENCY_0=8
+ ETHOSU_TA_WLATENCY_0=8
+ ETHOSU_TA_PULSE_ON_0=3999
+ ETHOSU_TA_PULSE_OFF_0=1
+ ETHOSU_TA_BWCAP_0=4000
+ ETHOSU_TA_PERFCTRL_0=0
+ ETHOSU_TA_PERFCNT_0=0
+ ETHOSU_TA_MODE_0=1
+ ETHOSU_TA_HISTBIN_0=0
+ ETHOSU_TA_HISTCNT_0=0
+ # Flash
+ ETHOSU_TA_MAXR_1=2
+ ETHOSU_TA_MAXW_1=0
+ ETHOSU_TA_MAXRW_1=0
+ ETHOSU_TA_RLATENCY_1=32
+ ETHOSU_TA_WLATENCY_1=0
+ ETHOSU_TA_PULSE_ON_1=360
+ ETHOSU_TA_PULSE_OFF_1=40
+ ETHOSU_TA_BWCAP_1=25
+ ETHOSU_TA_PERFCTRL_1=0
+ ETHOSU_TA_PERFCNT_1=0
+ ETHOSU_TA_MODE_1=1
+ ETHOSU_TA_HISTBIN_1=0
+ ETHOSU_TA_HISTCNT_1=0
+ )
+ elseif(MEMORY_MODE MATCHES "Sram_Only")
+ target_compile_definitions(
+ ethosu_target_common
+ INTERFACE # Configure NPU architecture timing adapters This is just
+ # example numbers and you should make this match your hardware
+ # SRAM
+ ETHOSU_TA_MAXR_0=4
+ ETHOSU_TA_MAXW_0=4
+ ETHOSU_TA_MAXRW_0=0
+ ETHOSU_TA_RLATENCY_0=8
+ ETHOSU_TA_WLATENCY_0=8
+ ETHOSU_TA_PULSE_ON_0=3999
+ ETHOSU_TA_PULSE_OFF_0=1
+ ETHOSU_TA_BWCAP_0=4000
+ ETHOSU_TA_PERFCTRL_0=0
+ ETHOSU_TA_PERFCNT_0=0
+ ETHOSU_TA_MODE_0=1
+ ETHOSU_TA_HISTBIN_0=0
+ ETHOSU_TA_HISTCNT_0=0
+ # Set the second Timing Adapter to SRAM latency & bandwidth
+ ETHOSU_TA_MAXR_1=4
+ ETHOSU_TA_MAXW_1=4
+ ETHOSU_TA_MAXRW_1=0
+ ETHOSU_TA_RLATENCY_1=8
+ ETHOSU_TA_WLATENCY_1=8
+ ETHOSU_TA_PULSE_ON_1=3999
+ ETHOSU_TA_PULSE_OFF_1=1
+ ETHOSU_TA_BWCAP_1=4000
+ ETHOSU_TA_PERFCTRL_1=0
+ ETHOSU_TA_PERFCNT_1=0
+ ETHOSU_TA_MODE_1=1
+ ETHOSU_TA_HISTBIN_1=0
+ ETHOSU_TA_HISTCNT_1=0
+ )
+ else()
+ message(
+ FATAL_ERROR
+ "Unsupported memory_mode ${MEMORY_MODE} for the Ethos-U55. The Ethos-U55 supports only Shared_Sram and Sram_Only."
+ )
+ endif()
+ elseif(SYSTEM_CONFIG MATCHES "Ethos_U85_SYS_DRAM_Low")
+ add_subdirectory(
+ ${ETHOS_SDK_PATH}/core_platform/targets/corstone-320 target
+ )
+ set(TARGET_BOARD
+ "corstone-320"
+ PARENT_SCOPE
+ )
+ if(MEMORY_MODE MATCHES "Dedicated_Sram")
+ target_compile_definitions(
+ ethosu_target_common
+ INTERFACE # Configure NPU architecture timing adapters This is just
+ # example numbers and you should make this match your hardware
+ # SRAM
+ ETHOSU_TA_MAXR_0=8
+ ETHOSU_TA_MAXW_0=8
+ ETHOSU_TA_MAXRW_0=0
+ ETHOSU_TA_RLATENCY_0=16
+ ETHOSU_TA_WLATENCY_0=16
+ ETHOSU_TA_PULSE_ON_0=3999
+ ETHOSU_TA_PULSE_OFF_0=1
+ ETHOSU_TA_BWCAP_0=4000
+ ETHOSU_TA_PERFCTRL_0=0
+ ETHOSU_TA_PERFCNT_0=0
+ ETHOSU_TA_MODE_0=1
+ ETHOSU_TA_HISTBIN_0=0
+ ETHOSU_TA_HISTCNT_0=0
+ # DRAM
+ ETHOSU_TA_MAXR_1=24
+ ETHOSU_TA_MAXW_1=12
+ ETHOSU_TA_MAXRW_1=0
+ ETHOSU_TA_RLATENCY_1=250
+ ETHOSU_TA_WLATENCY_1=125
+ ETHOSU_TA_PULSE_ON_1=4000
+ ETHOSU_TA_PULSE_OFF_1=1000
+ ETHOSU_TA_BWCAP_1=2344
+ ETHOSU_TA_PERFCTRL_1=0
+ ETHOSU_TA_PERFCNT_1=0
+ ETHOSU_TA_MODE_1=1
+ ETHOSU_TA_HISTBIN_1=0
+ ETHOSU_TA_HISTCNT_1=0
+ )
+ elseif(MEMORY_MODE MATCHES "Sram_Only")
+ target_compile_definitions(
+ ethosu_target_common
+ INTERFACE # Configure NPU architecture timing adapters This is just
+ # example numbers and you should make this match your hardware
+ # SRAM
+ ETHOSU_TA_MAXR_0=8
+ ETHOSU_TA_MAXW_0=8
+ ETHOSU_TA_MAXRW_0=0
+ ETHOSU_TA_RLATENCY_0=16
+ ETHOSU_TA_WLATENCY_0=16
+ ETHOSU_TA_PULSE_ON_0=3999
+ ETHOSU_TA_PULSE_OFF_0=1
+ ETHOSU_TA_BWCAP_0=4000
+ ETHOSU_TA_PERFCTRL_0=0
+ ETHOSU_TA_PERFCNT_0=0
+ ETHOSU_TA_MODE_0=1
+ ETHOSU_TA_HISTBIN_0=0
+ ETHOSU_TA_HISTCNT_0=0
+ # Set the second Timing Adapter to SRAM latency & bandwidth
+ ETHOSU_TA_MAXR_1=8
+ ETHOSU_TA_MAXW_1=8
+ ETHOSU_TA_MAXRW_1=0
+ ETHOSU_TA_RLATENCY_1=16
+ ETHOSU_TA_WLATENCY_1=16
+ ETHOSU_TA_PULSE_ON_1=3999
+ ETHOSU_TA_PULSE_OFF_1=1
+ ETHOSU_TA_BWCAP_1=4000
+ ETHOSU_TA_PERFCTRL_1=0
+ ETHOSU_TA_PERFCNT_1=0
+ ETHOSU_TA_MODE_1=1
+ ETHOSU_TA_HISTBIN_1=0
+ ETHOSU_TA_HISTCNT_1=0
+ )
+ endif()
+ elseif(SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_Mid"
+ OR SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_High"
+ )
+ set(TARGET_BOARD
+ "corstone-320"
+ PARENT_SCOPE
+ )
+ if(MEMORY_MODE MATCHES "Dedicated_Sram")
+ target_compile_definitions(
+ ethosu_target_common
+ INTERFACE # Configure NPU architecture timing adapters This is just
+ # example numbers and you should make this match your hardware
+ # SRAM
+ ETHOSU_TA_MAXR_0=8
+ ETHOSU_TA_MAXW_0=8
+ ETHOSU_TA_MAXRW_0=0
+ ETHOSU_TA_RLATENCY_0=32
+ ETHOSU_TA_WLATENCY_0=32
+ ETHOSU_TA_PULSE_ON_0=3999
+ ETHOSU_TA_PULSE_OFF_0=1
+ ETHOSU_TA_BWCAP_0=4000
+ ETHOSU_TA_PERFCTRL_0=0
+ ETHOSU_TA_PERFCNT_0=0
+ ETHOSU_TA_MODE_0=1
+ ETHOSU_TA_HISTBIN_0=0
+ ETHOSU_TA_HISTCNT_0=0
+ # DRAM
+ ETHOSU_TA_MAXR_1=64
+ ETHOSU_TA_MAXW_1=32
+ ETHOSU_TA_MAXRW_1=0
+ ETHOSU_TA_RLATENCY_1=500
+ ETHOSU_TA_WLATENCY_1=250
+ ETHOSU_TA_PULSE_ON_1=4000
+ ETHOSU_TA_PULSE_OFF_1=1000
+ ETHOSU_TA_BWCAP_1=3750
+ ETHOSU_TA_PERFCTRL_1=0
+ ETHOSU_TA_PERFCNT_1=0
+ ETHOSU_TA_MODE_1=1
+ ETHOSU_TA_HISTBIN_1=0
+ ETHOSU_TA_HISTCNT_1=0
+ )
+ elseif(MEMORY_MODE MATCHES "Sram_Only")
+ target_compile_definitions(
+ ethosu_target_common
+ INTERFACE # Configure NPU architecture timing adapters This is just
+ # example numbers and you should make this match your hardware
+ # SRAM
+ ETHOSU_TA_MAXR_0=8
+ ETHOSU_TA_MAXW_0=8
+ ETHOSU_TA_MAXRW_0=0
+ ETHOSU_TA_RLATENCY_0=32
+ ETHOSU_TA_WLATENCY_0=32
+ ETHOSU_TA_PULSE_ON_0=3999
+ ETHOSU_TA_PULSE_OFF_0=1
+ ETHOSU_TA_BWCAP_0=4000
+ ETHOSU_TA_PERFCTRL_0=0
+ ETHOSU_TA_PERFCNT_0=0
+ ETHOSU_TA_MODE_0=1
+ ETHOSU_TA_HISTBIN_0=0
+ ETHOSU_TA_HISTCNT_0=0
+ # Set the second Timing Adapter to SRAM latency & bandwidth
+ ETHOSU_TA_MAXR_1=8
+ ETHOSU_TA_MAXW_1=8
+ ETHOSU_TA_MAXRW_1=0
+ ETHOSU_TA_RLATENCY_1=32
+ ETHOSU_TA_WLATENCY_1=32
+ ETHOSU_TA_PULSE_ON_1=3999
+ ETHOSU_TA_PULSE_OFF_1=1
+ ETHOSU_TA_BWCAP_1=4000
+ ETHOSU_TA_PERFCTRL_1=0
+ ETHOSU_TA_PERFCNT_1=0
+ ETHOSU_TA_MODE_1=1
+ ETHOSU_TA_HISTBIN_1=0
+ ETHOSU_TA_HISTCNT_1=0
+ )
+ endif()
+ else()
+ message(FATAL_ERROR "Unsupported SYSTEM_CONFIG: ${SYSTEM_CONFIG}")
+ endif()
+
+ # The REGIONCFG registers of the Ethos-U control whether the NPU reads/writes
+ # data through the SRAM or the external memory. By default, the Ethos-U driver
+ # provides REGIONCFG configuration for Shared Sram memory mode. For Sram_Only
+ # and Dedicated_Sram memory modes, we need to change the settings for optimal
+ # performance.
+ #
+ # Currently, the convention used by Vela and the Ethos-U driver is that the
+ # NPU uses: Region 0 for traffic of the Read-Only data(weights & biases)
+ # Region 1 for traffic of of the intermediate Read/Write buffers required for
+ # the computation Region 2 for traffic of of the cache in Dedicated_Sram
+ # memory mode(not applicable in Sram_Only or Shared_Sram)
+ #
+ # NOTE: The above convention is determined by the Vela compiler and the
+ # Ethos-U driver and can change in the future.
+ #
+ # Common definitions: For Ethos-U55/U65/U85, region configs are set as: 0 or 1
+ # = AXI0 (Ethos-U55 or Ethos-U65) or AXI_SRAM(Ethos-U85) 2 or 3 = AXI1
+ # (Ethos-U55 or Ethos-U65) or AXI_EXT(Ethos-U85)
+ #
+ # When we compile a model for Sram_Only, the memory traffic for Region 0 and
+ # Region 1 should pass via the SRAM(hence regioncfg = 1) When we compile a
+ # model for Dedicated_Sram, the memory traffic for Region 0 should pass via
+ # the external memory(3), the memory traffic of Region 1 should pass via the
+ # external memory(3) and the traffic for Region 2 should pass via the SRAM(0)
+ #
+
+ if(MEMORY_MODE MATCHES "Sram_Only")
+ target_compile_definitions(
+ ethosu_core_driver
+ PRIVATE NPU_QCONFIG=1
+ NPU_REGIONCFG_0=1
+ NPU_REGIONCFG_1=0
+ NPU_REGIONCFG_2=0
+ NPU_REGIONCFG_3=0
+ NPU_REGIONCFG_4=0
+ NPU_REGIONCFG_5=0
+ NPU_REGIONCFG_6=0
+ NPU_REGIONCFG_7=0
+ )
+ elseif(MEMORY_MODE MATCHES "Dedicated_Sram")
+ target_compile_definitions(
+ ethosu_core_driver
+ PRIVATE NPU_QCONFIG=3
+ NPU_REGIONCFG_0=3
+ NPU_REGIONCFG_1=3
+ NPU_REGIONCFG_2=0
+ NPU_REGIONCFG_3=0
+ NPU_REGIONCFG_4=0
+ NPU_REGIONCFG_5=0
+ NPU_REGIONCFG_6=0
+ NPU_REGIONCFG_7=0
+ )
+ endif()
+
+endfunction()
diff --git a/backends/arm/scripts/install_reference_model.sh b/backends/arm/scripts/install_reference_model.sh
index 4d2d8cf4954..2e77b061565 100755
--- a/backends/arm/scripts/install_reference_model.sh
+++ b/backends/arm/scripts/install_reference_model.sh
@@ -6,14 +6,10 @@
set -euo pipefail
-# Installation script to manage transition to 1.0
+# Installation script for TOSA reference model
-# TOSA reference model
tosa_reference_model_url="https://git.gitlab.arm.com/tosa/tosa-reference-model.git"
-tosa_reference_model_0_80_branch="v0.80"
-tosa_reference_model_0_80_rev="70ed0b40fa831387e36abdb4f7fb9670a3464f5a"
-tosa_serialization_lib_0_80_rev="v0.80.1"
-tosa_reference_model_1_0_rev="1e6e4526df3391e1d6bc41562596bb18b3153bf3"
+tosa_reference_model_1_0_rev="8aa2896be5b0625a7cde57abb2308da0d426198d" #2025.07.0
script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
@@ -31,23 +27,6 @@ function setup_tosa_reference_model() {
mkdir -p "$work_dir"
pushd "$work_dir" || exit 1
- # Install a patched version of TOSA reference model v0.80.1 to make it co-exist with 1.0 during the transition period
- if [[ ! -d "reference_model" ]]; then
- git clone --recurse-submodules --branch ${tosa_reference_model_0_80_branch} "$tosa_reference_model_url" reference_model
- fi
-
- patches_dir=${script_dir}/../third-party/reference_model/patches/v0.80
- patch_repo reference_model ${tosa_reference_model_0_80_rev} ${patches_dir}
- patch_repo reference_model/thirdparty/serialization_lib ${tosa_serialization_lib_0_80_rev} ${patches_dir}
-
- pushd reference_model
- rm -rf build
- # reference_model flatbuffers version clashes with Vela.
- # go with Vela's since it newer.
- # Vela's flatbuffer requirement is expected to loosen, then remove this. MLETORCH-565
- CMAKE_POLICY_VERSION_MINIMUM=3.5 pip install . --no-dependencies flatbuffers
- popd
-
# Install the 1.0 branch from upstream
CMAKE_POLICY_VERSION_MINIMUM=3.5 BUILD_PYBIND=1 pip install "tosa-tools@git+${tosa_reference_model_url}@${tosa_reference_model_1_0_rev}" ml_dtypes==0.5.1 --no-dependencies flatbuffers
}
diff --git a/backends/arm/scripts/mlsdk_utils.sh b/backends/arm/scripts/mlsdk_utils.sh
index ed6d78c900a..10018b7ccdc 100755
--- a/backends/arm/scripts/mlsdk_utils.sh
+++ b/backends/arm/scripts/mlsdk_utils.sh
@@ -6,8 +6,7 @@
set -euo pipefail
-# TODO
-mlsdk_manifest_url=""
+mlsdk_manifest_url="https://github.com/arm/ai-ml-sdk-manifest.git"
script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
@@ -55,8 +54,9 @@ function download_ai_mlsdk_manifest() {
function setup_model_converter() {
local work_dir="$1"
local manifest_dir="$2"
- local enable_vgf_lib="$3"
- local enable_emulation_layer="$4"
+ local enable_model_converter="$3"
+ local enable_vgf_lib="$4"
+ local enable_emulation_layer="$5"
if [[ -z "$work_dir" ]]; then
echo "Error: work_dir parameter is required."
@@ -76,29 +76,34 @@ function setup_model_converter() {
pushd "$manifest_dir"
# model-converter
- # TODO: Remove macOS patch after mlsdk fully supports macOS
- if [[ "$(uname)" == "Darwin" ]]; then
+ if [[ "${enable_model_converter}" -eq 1 ]]; then
+ # TODO: Remove this workaround once MLSDK has full Darwin support
+ # Do not indent sed command, the whitespace is significant for the patch to work.
+ if [[ "$(uname)" == "Darwin" ]]; then
sed -i '' '/^ *print(f"Unsupported host platform/ i\
if system == "Darwin":\
- # Use default Apple toolchain (Clang) on macOS\
return True\
\
' sw/model-converter/scripts/build.py
+ fi
+ python sw/model-converter/scripts/build.py -j$(nproc)
fi
- python sw/model-converter/scripts/build.py -j$(nproc)
# libvgf
if [[ "${enable_vgf_lib}" -eq 1 ]]; then
- # TODO: Remove macOS patch after mlsdk fully supports macOS
+ # TODO: Remove this workaround once MLSDK has full Darwin support
+ # Do not indent sed command, the whitespace is significant for the patch to work.
if [[ "$(uname)" == "Darwin" ]]; then
sed -i '' '/^ *print(f"ERROR: Unsupported host platform/ i\
if system == "Darwin":\
- # Use default Apple toolchain (Clang) on macOS\
return True\
\
' sw/vgf-lib/scripts/build.py
fi
- python sw/vgf-lib/scripts/build.py -j$(nproc)
+ pushd sw/vgf-lib
+ python scripts/build.py -j$(nproc)
+ cmake --install build --prefix deploy
+ popd
fi
# emu layer
@@ -110,7 +115,9 @@ function setup_model_converter() {
-DSPIRV_HEADERS_PATH=../../dependencies/SPIRV-Headers \
-DSPIRV_TOOLS_PATH=../../dependencies/SPIRV-Tools \
-DVULKAN_HEADERS_PATH=../../dependencies/Vulkan-Headers
+
cmake --build build
+ cmake --install build --prefix deploy
popd
fi
diff --git a/backends/arm/scripts/parse_test_names.py b/backends/arm/scripts/parse_test_names.py
index e865723722e..9ceb5d73d23 100644
--- a/backends/arm/scripts/parse_test_names.py
+++ b/backends/arm/scripts/parse_test_names.py
@@ -8,6 +8,7 @@
CUSTOM_EDGE_OPS = [
"linspace.default",
"eye.default",
+ "expm1.default",
"vector_norm.default",
"hardsigmoid.default",
"hardswish.default",
@@ -18,6 +19,8 @@
"bitwise_right_shift.Tensor",
"bitwise_left_shift.Tensor",
"native_group_norm.default",
+ "silu.default",
+ "sdpa.default",
"unbind.int",
"unflatten.int",
"_native_batch_norm_legit_no_training.default",
@@ -26,7 +29,7 @@
ALL_EDGE_OPS = SAMPLE_INPUT.keys() | CUSTOM_EDGE_OPS
# Add all targets and TOSA profiles we support here.
-TARGETS = ["tosa_MI", "tosa_BI", "u55_BI", "u85_BI", "vgf_INT", "vgf_FP"]
+TARGETS = ["tosa_FP", "tosa_INT", "u55_INT", "u85_INT", "vgf_INT", "vgf_FP"]
def get_op_name_map():
@@ -68,8 +71,8 @@ def parse_test_name(
where OP must match a key in op_name_map and TARGET one string in TARGETS. The
"not_delegated" suffix indicates that the test tests that the op is not delegated.
- Examples of valid names: "test_mm_u55_BI_not_delegated" and
- "test_add_scalar_tosa_MI_two_inputs".
+ Examples of valid names: "test_mm_u55_INT_not_delegated" and
+ "test_add_scalar_tosa_FP_two_inputs".
Returns a tuple (OP, TARGET, IS_DELEGATED) if valid.
"""
diff --git a/backends/arm/scripts/run_vkml.sh b/backends/arm/scripts/run_vkml.sh
new file mode 100755
index 00000000000..ebbdb7e415f
--- /dev/null
+++ b/backends/arm/scripts/run_vkml.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Optional parameter:
+# --build_type= "Release" | "Debug" | "RelWithDebInfo"
+# --etdump build with devtools-etdump support
+
+set -eu
+set -o pipefail
+
+script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+et_root_dir=$(cd ${script_dir}/../../.. && pwd)
+et_root_dir=$(realpath ${et_root_dir})
+setup_path_script=${et_root_dir}/examples/arm/ethos-u-scratch/setup_path.sh
+_setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly install necessary tools."
+
+
+model=""
+build_path="cmake-out"
+converter="model-converter"
+
+help() {
+ echo "Usage: $(basename $0) [options]"
+ echo "Options:"
+ echo " --model= .pte model file to run"
+ echo " --build= Target to build and run for Default: ${build_path}"
+ exit 0
+}
+
+for arg in "$@"; do
+ case $arg in
+ -h|--help) help ;;
+ --model=*) model="${arg#*=}";;
+ --build_path=*) build_path="${arg#*=}";;
+ *)
+ ;;
+ esac
+done
+
+if [[ -z ${model} ]]; then echo "Model name needs to be provided"; exit 1; fi
+
+
+# Source the tools
+# This should be prepared by the setup.sh
+[[ -f ${setup_path_script} ]] \
+ || { echo "Missing ${setup_path_script}. ${_setup_msg}"; exit 1; }
+
+source ${setup_path_script}
+
+# basic checks before we get started
+hash ${converter} \
+ || { echo "Could not find ${converter} on PATH, ${_setup_msg}"; exit 1; }
+
+
+
+runner="${build_path}/executor_runner"
+
+echo "--------------------------------------------------------------------------------"
+echo "Running ${model} with ${runner}"
+echo "WARNING: The VK_ML layer driver will not provide accurate performance information"
+echo "--------------------------------------------------------------------------------"
+
+# Check if stdbuf is intalled and use stdbuf -oL together with tee below to make the output
+# go all the way to the console more directly and not be buffered
+
+if hash stdbuf 2>/dev/null; then
+ nobuf="stdbuf -oL"
+else
+ nobuf=""
+fi
+
+log_file=$(mktemp)
+
+
+${nobuf} ${runner} -model_path ${model} | tee ${log_file}
+echo "[${BASH_SOURCE[0]}] execution complete, $?"
+
+# Most of these can happen for bare metal or linx executor_runner runs.
+echo "Checking for problems in log:"
+! grep -E "^(F|E|\\[critical\\]|Hard fault.|Info: Simulation is stopping. Reason: CPU time has been exceeded.).*$" ${log_file}
+if [ $? != 0 ]; then
+ echo "Found ERROR"
+ rm "${log_file}"
+ exit 1
+fi
+echo "No problems found!"
+rm "${log_file}"
diff --git a/backends/arm/test/TARGETS b/backends/arm/test/TARGETS
index 3c29719e1cc..9443547879d 100644
--- a/backends/arm/test/TARGETS
+++ b/backends/arm/test/TARGETS
@@ -41,7 +41,7 @@ python_library(
deps = [
":common",
"//executorch/backends/xnnpack/test/tester:tester",
- "//executorch/backends/arm:arm_partitioner",
+ "//executorch/backends/arm:ethosu_partitioner",
"//executorch/backends/arm/quantizer:lib",
"//executorch/backends/arm:tosa_mapping",
"//executorch/devtools/backend_debug:delegation_info",
diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py
index 8354e36aef2..b01dec4d371 100644
--- a/backends/arm/test/common.py
+++ b/backends/arm/test/common.py
@@ -19,6 +19,7 @@
corstone300_installed,
corstone320_installed,
model_converter_installed,
+ vkml_emulation_layer_installed,
)
from executorch.backends.arm.tosa_specification import TosaSpecification
from executorch.exir.backend.compile_spec_schema import CompileSpec
@@ -33,7 +34,7 @@ def get_time_formatted_path(path: str, log_prefix: str) -> str:
log_prefix: The name of the test.
Example output:
- './my_log_folder/test_BI_artifact_28-Nov-14:14:38.log'
+ './my_log_folder/test_INT_artifact_28-Nov-14:14:38.log'
"""
return str(
Path(path) / f"{log_prefix}_{datetime.now().strftime('%d-%b-%H:%M:%S')}.log"
@@ -48,12 +49,12 @@ def maybe_get_tosa_collate_path() -> str | None:
tosa_test_base = os.environ.get("TOSA_TESTCASES_BASE_PATH")
if tosa_test_base:
current_test = os.environ.get("PYTEST_CURRENT_TEST")
- # '::test_collate_tosa_BI_tests[randn] (call)'
+ # '::test_collate_tosa_INT_tests[randn] (call)'
test_name = current_test.split("::")[1].split(" ")[0] # type: ignore[union-attr]
- if "BI" in test_name:
- tosa_test_base = os.path.join(tosa_test_base, "tosa-bi")
- elif "MI" in test_name:
- tosa_test_base = os.path.join(tosa_test_base, "tosa-mi")
+ if "INT" in test_name:
+ tosa_test_base = os.path.join(tosa_test_base, "tosa-int")
+ elif "FP" in test_name:
+ tosa_test_base = os.path.join(tosa_test_base, "tosa-fp")
else:
tosa_test_base = os.path.join(tosa_test_base, "other")
return os.path.join(tosa_test_base, test_name)
@@ -90,39 +91,6 @@ def get_tosa_compile_spec_unbuilt(
return compile_spec_builder
-def get_vgf_compile_spec(
- tosa_spec: str | TosaSpecification,
- compiler_flags: Optional[str] = "",
- custom_path=None,
-) -> list[CompileSpec]:
- """
- Default compile spec for VGF tests.
- """
- return get_vgf_compile_spec_unbuilt(tosa_spec, compiler_flags, custom_path).build()
-
-
-def get_vgf_compile_spec_unbuilt(
- tosa_spec: str | TosaSpecification,
- compiler_flags: Optional[str] = "",
- custom_path=None,
-) -> ArmCompileSpecBuilder:
- """Get the ArmCompileSpecBuilder for the default VGF tests, to modify
- the compile spec before calling .build() to finalize it.
- """
- if not custom_path:
- custom_path = maybe_get_tosa_collate_path()
-
- if custom_path is not None:
- os.makedirs(custom_path, exist_ok=True)
- compile_spec_builder = (
- ArmCompileSpecBuilder()
- .vgf_compile_spec(tosa_spec, compiler_flags)
- .dump_intermediate_artifacts_to(custom_path)
- )
-
- return compile_spec_builder
-
-
def get_u55_compile_spec(
macs: int = 128,
system_config: str = "Ethos_U55_High_End_Embedded",
@@ -165,6 +133,17 @@ def get_u85_compile_spec(
).build()
+def get_vgf_compile_spec(
+ tosa_spec: str | TosaSpecification,
+ compiler_flags: Optional[str] = "",
+ custom_path=None,
+) -> list[CompileSpec]:
+ """
+ Default compile spec for VGF tests.
+ """
+ return get_vgf_compile_spec_unbuilt(tosa_spec, compiler_flags, custom_path).build()
+
+
def get_u55_compile_spec_unbuilt(
macs: int,
system_config: str,
@@ -228,6 +207,33 @@ def get_u85_compile_spec_unbuilt(
return compile_spec # type: ignore[return-value]
+def get_vgf_compile_spec_unbuilt(
+ tosa_spec: str | TosaSpecification,
+ compiler_flags: Optional[str] = "",
+ custom_path=None,
+) -> ArmCompileSpecBuilder:
+ """Get the ArmCompileSpecBuilder for the default VGF tests, to modify
+ the compile spec before calling .build() to finalize it.
+ """
+ if "FP" in repr(tosa_spec):
+ artifact_path = custom_path or tempfile.mkdtemp(prefix="arm_vgf_fp_")
+ elif "INT" in repr(tosa_spec):
+ artifact_path = custom_path or tempfile.mkdtemp(prefix="arm_vgf_int_")
+ else:
+ raise ValueError(f"Unsupported vgf compile_spec: {repr(tosa_spec)}")
+
+ if not os.path.exists(artifact_path):
+ os.makedirs(artifact_path, exist_ok=True)
+
+ compile_spec_builder = (
+ ArmCompileSpecBuilder()
+ .vgf_compile_spec(tosa_spec, compiler_flags)
+ .dump_intermediate_artifacts_to(artifact_path)
+ )
+
+ return compile_spec_builder
+
+
XfailIfNoCorstone300 = pytest.mark.xfail(
condition=not (
corstone300_installed() and arm_executor_runner_exists("corstone-300")
@@ -251,7 +257,14 @@ def get_u85_compile_spec_unbuilt(
raises=FileNotFoundError,
reason="Did not find model-converter on path",
)
-"""Xfails a test if model-converter is not installed"""
+"""Skips a test if model-converter is not installed"""
+
+XfailfNoVKMLEmulationLayer = pytest.mark.xfail(
+ condition=not (vkml_emulation_layer_installed()),
+ raises=TypeError,
+ reason="VKML environment is not set properly or executor_runner path is misused",
+)
+"""Xfails a test if VKML Emulation Layer is not installed"""
xfail_type = str | tuple[str, type[Exception]]
diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py
index 71eb5782967..6fc9e7e5adc 100644
--- a/backends/arm/test/conftest.py
+++ b/backends/arm/test/conftest.py
@@ -33,17 +33,6 @@ def pytest_configure(config):
if config.option.arm_run_tosa_version:
pytest._test_options["tosa_version"] = config.option.arm_run_tosa_version
- # Not all deployments of ET have the TOSA reference model available.
- # Make sure we don't try to use it if it's not available.
- try:
- if pytest._test_options["tosa_version"] == "0.80":
- import tosa_tools.v0_80.tosa_reference_model as tosa_reference_model
- else:
- import tosa_tools.tosa_ref_model as tosa_reference_model
- except ImportError:
- pytest._test_options["tosa_ref_model"] = False # type: ignore[attr-defined]
- tosa_reference_model = None # noqa
-
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
diff --git a/backends/arm/test/misc/test_bn_relu_folding_qat.py b/backends/arm/test/misc/test_bn_relu_folding_qat.py
index bf7bc4227ad..c88c38e869d 100644
--- a/backends/arm/test/misc/test_bn_relu_folding_qat.py
+++ b/backends/arm/test/misc/test_bn_relu_folding_qat.py
@@ -12,7 +12,7 @@
TOSAQuantizer,
)
from executorch.backends.arm.test import common, conftest
-from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineBI
+from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineINT
from executorch.backends.xnnpack.test.tester.tester import Quantize
from torch import nn
@@ -40,17 +40,20 @@ def forward(self, x: torch.Tensor):
models = {
- "conv_bn_relu": ConvModule(batch_norm=True),
- "conv_relu": ConvModule(batch_norm=False),
+ # name : (model, is_per_channel)
+ "conv_bn_relu_per_channel": (ConvModule(batch_norm=True), True),
+ "conv_relu_per_channel": (ConvModule(batch_norm=False), True),
+ "conv_bn_relu_per_tensor": (ConvModule(batch_norm=True), False),
+ "conv_relu_per_tensor": (ConvModule(batch_norm=False), False),
}
-@common.parametrize("model", models)
-def test_qat_tosa_BI(model: torch.nn.Module):
- pipeline = TosaPipelineBI[input_t1](model, model.test_data, [], [], qtol=1)
+@common.parametrize("test_data", models)
+def test_qat_tosa_INT(test_data):
+ model, per_channel = test_data
+ pipeline = TosaPipelineINT[input_t1](model, model.test_data, [], [], qtol=1)
tosa_version = conftest.get_option("tosa_version")
tosa_profiles = {
- "0.80": common.TosaSpecification.create_from_string("TOSA-0.80+BI"),
"1.0": common.TosaSpecification.create_from_string("TOSA-1.0+INT"),
}
tosa_spec = tosa_profiles[tosa_version]
@@ -60,7 +63,7 @@ def test_qat_tosa_BI(model: torch.nn.Module):
Quantize(
quantizer=quantizer,
quantization_config=get_symmetric_quantization_config(
- is_qat=True, is_per_channel=False
+ is_qat=True, is_per_channel=per_channel
),
is_qat=True,
),
diff --git a/backends/arm/test/misc/test_custom_partition.py b/backends/arm/test/misc/test_custom_partition.py
index c2889f17ce3..6cdd63af7c9 100644
--- a/backends/arm/test/misc/test_custom_partition.py
+++ b/backends/arm/test/misc/test_custom_partition.py
@@ -8,7 +8,7 @@
import torch
from executorch.backends.arm.test import common
-from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineMI
+from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineFP
from executorch.exir.backend.operator_support import (
DontPartition,
DontPartitionModule,
@@ -50,7 +50,7 @@ def test_single_reject(caplog, test_data: input_t1):
caplog.set_level(logging.INFO)
module = CustomPartitioning()
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], exir_op=[])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], exir_op=[])
check = DontPartition(exir_ops.edge.aten.sigmoid.default)
pipeline.change_args("to_edge_transform_and_lower", additional_checks=[check])
pipeline.change_args(
@@ -68,7 +68,7 @@ def test_single_reject(caplog, test_data: input_t1):
@common.parametrize("test_data", CustomPartitioning.inputs)
def test_multiple_reject(test_data: input_t1):
module = CustomPartitioning()
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], exir_op=[])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], exir_op=[])
check = DontPartition(
exir_ops.edge.aten.sigmoid.default, exir_ops.edge.aten.mul.Tensor
)
@@ -90,7 +90,7 @@ def test_torch_op_reject(caplog, test_data: input_t1):
module = CustomPartitioning()
check = DontPartition(torch.ops.aten.sigmoid.default)
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], exir_op=[])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], exir_op=[])
pipeline.change_args("to_edge_transform_and_lower", additional_checks=[check])
pipeline.change_args(
"check_count.exir", {"torch.ops.higher_order.executorch_call_delegate": 2}
@@ -108,7 +108,7 @@ def test_torch_op_reject(caplog, test_data: input_t1):
def test_string_op_reject(test_data: input_t1):
module = CustomPartitioning()
check = DontPartition("aten.sigmoid.default")
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], exir_op=[])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], exir_op=[])
pipeline.change_args("to_edge_transform_and_lower", additional_checks=[check])
pipeline.change_args(
"check_count.exir", {"torch.ops.higher_order.executorch_call_delegate": 2}
@@ -127,7 +127,7 @@ def test_name_reject(caplog, test_data: input_t1):
module = CustomPartitioning()
check = DontPartitionName("mul", "sigmoid", exact=False)
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], exir_op=[])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], exir_op=[])
pipeline.change_args("to_edge_transform_and_lower", additional_checks=[check])
pipeline.change_args(
"check_count.exir",
@@ -142,7 +142,7 @@ def test_name_reject(caplog, test_data: input_t1):
def test_module_reject(test_data: input_t1):
module = NestedModule()
check = DontPartitionModule(module_name="CustomPartitioning")
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], exir_op=[])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], exir_op=[])
pipeline.change_args("to_edge_transform_and_lower", additional_checks=[check])
pipeline.change_args(
"check_count.exir",
@@ -158,7 +158,7 @@ def test_inexact_module_reject(caplog, test_data: input_t1):
module = NestedModule()
check = DontPartitionModule(module_name="Custom", exact=False)
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], exir_op=[])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], exir_op=[])
pipeline.change_args("to_edge_transform_and_lower", additional_checks=[check])
pipeline.change_args(
"check_count.exir",
@@ -173,7 +173,7 @@ def test_inexact_module_reject(caplog, test_data: input_t1):
def test_module_instance_reject(test_data: input_t1):
module = NestedModule()
check = DontPartitionModule(instance_name="nested")
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], exir_op=[])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], exir_op=[])
pipeline.change_args("to_edge_transform_and_lower", additional_checks=[check])
pipeline.change_args(
"check_count.exir",
diff --git a/backends/arm/test/misc/test_debug_feats.py b/backends/arm/test/misc/test_debug_feats.py
index 8da394c9e5d..288d5b41615 100644
--- a/backends/arm/test/misc/test_debug_feats.py
+++ b/backends/arm/test/misc/test_debug_feats.py
@@ -12,11 +12,11 @@
import pytest
import torch
-from executorch.backends.arm.test import common, conftest
+from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
@@ -45,18 +45,18 @@ def forward(self, x):
"""Tests dumping the partition artifact in ArmTester. Both to file and to stdout."""
-def _tosa_MI_pipeline(module: torch.nn.Module, test_data: input_t1, dump_file=None):
+def _tosa_FP_pipeline(module: torch.nn.Module, test_data: input_t1, dump_file=None):
- pipeline = TosaPipelineMI[input_t1](module, test_data, [], [])
+ pipeline = TosaPipelineFP[input_t1](module, test_data, [], [])
pipeline.dump_artifact("to_edge_transform_and_lower")
pipeline.dump_artifact("to_edge_transform_and_lower", suffix=dump_file)
pipeline.pop_stage("run_method_and_compare_outputs")
pipeline.run()
-def _tosa_BI_pipeline(module: torch.nn.Module, test_data: input_t1, dump_file=None):
+def _tosa_INT_pipeline(module: torch.nn.Module, test_data: input_t1, dump_file=None):
- pipeline = TosaPipelineBI[input_t1](module, test_data, [], [])
+ pipeline = TosaPipelineINT[input_t1](module, test_data, [], [])
pipeline.dump_artifact("to_edge_transform_and_lower")
pipeline.dump_artifact("to_edge_transform_and_lower", suffix=dump_file)
pipeline.pop_stage("run_method_and_compare_outputs")
@@ -71,12 +71,12 @@ def _is_tosa_marker_in_file(tmp_file):
@common.parametrize("test_data", Linear.inputs)
-def test_MI_artifact(test_data: input_t1):
+def test_FP_artifact(test_data: input_t1):
model = Linear()
tmp_file = common.get_time_formatted_path(
- tempfile.mkdtemp(), test_MI_artifact.__name__
+ tempfile.mkdtemp(), test_FP_artifact.__name__
)
- _tosa_MI_pipeline(model, test_data, dump_file=tmp_file)
+ _tosa_FP_pipeline(model, test_data, dump_file=tmp_file)
assert os.path.exists(tmp_file), f"File {tmp_file} was not created"
if _is_tosa_marker_in_file(tmp_file):
return # Implicit pass test
@@ -84,12 +84,12 @@ def test_MI_artifact(test_data: input_t1):
@common.parametrize("test_data", Linear.inputs)
-def test_BI_artifact(test_data: input_t1):
+def test_INT_artifact(test_data: input_t1):
model = Linear()
tmp_file = common.get_time_formatted_path(
- tempfile.mkdtemp(), test_BI_artifact.__name__
+ tempfile.mkdtemp(), test_INT_artifact.__name__
)
- _tosa_BI_pipeline(model, test_data, dump_file=tmp_file)
+ _tosa_INT_pipeline(model, test_data, dump_file=tmp_file)
assert os.path.exists(tmp_file), f"File {tmp_file} was not created"
if _is_tosa_marker_in_file(tmp_file):
return # Implicit pass test
@@ -101,7 +101,7 @@ def test_BI_artifact(test_data: input_t1):
@common.parametrize("test_data", Linear.inputs)
def test_numerical_diff_print(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Linear(),
test_data,
[],
@@ -125,7 +125,7 @@ def test_numerical_diff_print(test_data: input_t1):
@common.parametrize("test_data", Linear.inputs)
def test_dump_ops_and_dtypes(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](Linear(), test_data, [], [])
+ pipeline = TosaPipelineINT[input_t1](Linear(), test_data, [], [])
pipeline.pop_stage("run_method_and_compare_outputs")
pipeline.add_stage_after("quantize", pipeline.tester.dump_dtype_distribution)
pipeline.add_stage_after("quantize", pipeline.tester.dump_operator_distribution)
@@ -143,7 +143,7 @@ def test_dump_ops_and_dtypes(test_data: input_t1):
@common.parametrize("test_data", Linear.inputs)
def test_dump_ops_and_dtypes_parseable(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](Linear(), test_data, [], [])
+ pipeline = TosaPipelineINT[input_t1](Linear(), test_data, [], [])
pipeline.pop_stage("run_method_and_compare_outputs")
pipeline.add_stage_after("quantize", pipeline.tester.dump_dtype_distribution, False)
pipeline.add_stage_after(
@@ -167,24 +167,21 @@ def test_dump_ops_and_dtypes_parseable(test_data: input_t1):
@common.parametrize("test_data", Linear.inputs)
-def test_collate_tosa_BI_tests(test_data: input_t1):
+def test_collate_tosa_INT_tests(test_data: input_t1):
# Set the environment variable to trigger the collation of TOSA tests
os.environ["TOSA_TESTCASES_BASE_PATH"] = "test_collate_tosa_tests"
# Clear out the directory
- pipeline = TosaPipelineBI[input_t1](Linear(), test_data, [], [])
+ pipeline = TosaPipelineINT[input_t1](Linear(), test_data, [], [])
pipeline.pop_stage("run_method_and_compare_outputs")
pipeline.run()
test_collate_dir = (
- "test_collate_tosa_tests/tosa-bi/test_collate_tosa_BI_tests[randn]"
+ "test_collate_tosa_tests/tosa-int/test_collate_tosa_INT_tests[randn]"
)
# test that the output directory is created and contains the expected files
assert os.path.exists(test_collate_dir)
- tosa_version = conftest.get_option("tosa_version")
for file in os.listdir(test_collate_dir):
- file_name_prefix = f"TOSA-{tosa_version}+" + (
- "INT" if tosa_version == "1.0" else "BI"
- )
+ file_name_prefix = "TOSA-1.0+INT"
assert file.endswith((f"{file_name_prefix}.json", f"{file_name_prefix}.tosa"))
os.environ.pop("TOSA_TESTCASES_BASE_PATH")
@@ -193,7 +190,7 @@ def test_collate_tosa_BI_tests(test_data: input_t1):
@common.parametrize("test_data", Linear.inputs)
def test_dump_tosa_ops(caplog, test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](Linear(), test_data, [], [])
+ pipeline = TosaPipelineINT[input_t1](Linear(), test_data, [], [])
pipeline.pop_stage("run_method_and_compare_outputs")
pipeline.dump_operator_distribution("to_edge_transform_and_lower")
pipeline.run()
@@ -211,7 +208,7 @@ def forward(self, x):
@common.parametrize("test_data", Add.inputs)
def test_fail_dump_tosa_ops(caplog, test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
Add(), test_data, [], [], use_to_edge_transform_and_lower=True, run_on_fvp=False
)
pipeline.dump_operator_distribution("to_edge_transform_and_lower")
diff --git a/backends/arm/test/misc/test_dim_order_guards.py b/backends/arm/test/misc/test_dim_order_guards.py
index 44c9e707324..b291aaa52cf 100644
--- a/backends/arm/test/misc/test_dim_order_guards.py
+++ b/backends/arm/test/misc/test_dim_order_guards.py
@@ -12,8 +12,8 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
@@ -34,9 +34,9 @@ def forward(self, x):
@common.parametrize("test_data", Conv2D.inputs)
-def test_tosa_MI_pipeline(test_data: input_t1):
+def test_tosa_FP_pipeline(test_data: input_t1):
module = Conv2D()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
module,
test_data,
[],
@@ -51,9 +51,9 @@ def test_tosa_MI_pipeline(test_data: input_t1):
@common.parametrize("test_data", Conv2D.inputs)
-def test_tosa_BI_pipeline(test_data: input_t1):
+def test_tosa_INT_pipeline(test_data: input_t1):
module = Conv2D()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
module,
test_data,
[],
diff --git a/backends/arm/test/misc/test_extract_io_params_tosa.py b/backends/arm/test/misc/test_extract_io_params_tosa.py
new file mode 100644
index 00000000000..2afa3876081
--- /dev/null
+++ b/backends/arm/test/misc/test_extract_io_params_tosa.py
@@ -0,0 +1,92 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+
+import pytest
+import torch
+from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder
+from executorch.backends.arm.quantizer import VgfQuantizer
+from executorch.backends.arm.quantizer.arm_quantizer import (
+ get_symmetric_quantization_config,
+ TOSAQuantizer,
+)
+
+from executorch.backends.arm.test.common import SkipIfNoModelConverter
+from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
+from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.vgf_partitioner import VgfPartitioner
+from executorch.exir import to_edge_transform_and_lower
+from executorch.exir.passes.quantize_io_pass import extract_io_quant_params
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
+
+class SimpleAdd(torch.nn.Module):
+ def forward(self, x, y):
+ return x + y
+
+
+@pytest.mark.parametrize(
+ "builder_method, quantizer_cls, partitioner_cls",
+ [
+ ("tosa_compile_spec", TOSAQuantizer, TOSAPartitioner),
+ pytest.param(
+ "vgf_compile_spec",
+ VgfQuantizer,
+ VgfPartitioner,
+ marks=SkipIfNoModelConverter,
+ id="VGF",
+ ),
+ ],
+)
+def test_roundtrip_extracts_io_params(builder_method, quantizer_cls, partitioner_cls):
+ """
+ Validates that IO quantization parameters round-trip for both flows.
+ """
+ example_inputs = (
+ torch.ones(1, 5),
+ torch.full((1, 5), 2.0),
+ )
+ mod = SimpleAdd().eval()
+
+ base_spec = TosaSpecification.create_from_string("TOSA-1.0+INT")
+ compile_spec = getattr(ArmCompileSpecBuilder(), builder_method)(
+ tosa_spec=base_spec
+ ).build()
+
+ quantizer = quantizer_cls(compile_spec)
+ operator_config = get_symmetric_quantization_config(is_qat=True)
+ quantizer.set_global(operator_config)
+
+ exported = torch.export.export(mod, copy.deepcopy(example_inputs), strict=True)
+ prepared = prepare_pt2e(exported.module(), quantizer)
+ _ = prepared(*example_inputs)
+
+ converted = convert_pt2e(prepared)
+ final_export = torch.export.export(converted, example_inputs, strict=True)
+ partitioner = partitioner_cls(compile_spec)
+ edge_prog = to_edge_transform_and_lower(final_export, partitioner=[partitioner])
+
+ # Extract IO quantization parameters
+ q = extract_io_quant_params(
+ edge_prog,
+ input_idxs=(0, 1),
+ output_idxs=(0,),
+ )
+
+ assert "inputs" in q
+ assert "outputs" in q
+ assert len(q["inputs"]) == 2
+ assert len(q["outputs"]) == 1
+
+ for name, params in q["inputs"].items():
+ assert isinstance(name, str)
+ assert isinstance(params["scale"], float)
+ assert isinstance(params["zero_point"], int)
+
+ out_name, out_params = next(iter(q["outputs"].items()))
+ assert isinstance(out_name, str)
+ assert isinstance(out_params["scale"], float)
+ assert isinstance(out_params["zero_point"], int)
diff --git a/backends/arm/test/misc/test_lifted_tensor.py b/backends/arm/test/misc/test_lifted_tensor.py
index c17d93765e5..2e45a36d12a 100644
--- a/backends/arm/test/misc/test_lifted_tensor.py
+++ b/backends/arm/test/misc/test_lifted_tensor.py
@@ -9,8 +9,8 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
from executorch.backends.test.harness.stages import StageType
@@ -60,11 +60,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", LiftedTensor.test_data)
-def test_partition_lifted_tensor_tosa_MI(test_data: input_t1):
+def test_partition_lifted_tensor_tosa_FP(test_data: input_t1):
op = test_data[0]
data = test_data[1:]
module = LiftedTensor(op)
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
module,
*data,
[],
@@ -81,11 +81,11 @@ def test_partition_lifted_tensor_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", LiftedTensor.test_data)
-def test_partition_lifted_tensor_tosa_BI(test_data: input_t1):
+def test_partition_lifted_tensor_tosa_INT(test_data: input_t1):
op = test_data[0]
data = test_data[1:]
module = LiftedTensor(op)
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
module,
*data,
[],
@@ -102,11 +102,11 @@ def test_partition_lifted_tensor_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", LiftedScalarTensor.test_data)
-def test_partition_lifted_scalar_tensor_tosa_MI(test_data: input_t1):
+def test_partition_lifted_scalar_tensor_tosa_FP(test_data: input_t1):
op = test_data[0]
data = test_data[1:]
module = LiftedScalarTensor(op, data[-1])
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
module,
data[0],
[],
@@ -117,11 +117,11 @@ def test_partition_lifted_scalar_tensor_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", LiftedScalarTensor.test_data)
-def test_partition_lifted_scalar_tensor_tosa_BI(test_data: input_t1):
+def test_partition_lifted_scalar_tensor_tosa_INT(test_data: input_t1):
op = test_data[0]
data = test_data[1:]
module = LiftedScalarTensor(op, data[-1])
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
module,
data[0],
[],
diff --git a/backends/arm/test/misc/test_multiple_delegates.py b/backends/arm/test/misc/test_multiple_delegates.py
index 0b0122bf65e..f716bc45385 100644
--- a/backends/arm/test/misc/test_multiple_delegates.py
+++ b/backends/arm/test/misc/test_multiple_delegates.py
@@ -8,8 +8,8 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
@@ -28,8 +28,8 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
@common.parametrize("test_data", MultipleDelegatesModule.inputs)
-def test_tosa_MI_pipeline(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](MultipleDelegatesModule(), test_data, [], [])
+def test_tosa_FP_pipeline(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](MultipleDelegatesModule(), test_data, [], [])
pipeline.change_args(
"check_count.exir", {"torch.ops.higher_order.executorch_call_delegate": 2}
)
@@ -37,8 +37,8 @@ def test_tosa_MI_pipeline(test_data: input_t1):
@common.parametrize("test_data", MultipleDelegatesModule.inputs)
-def test_tosa_BI_pipeline(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_tosa_INT_pipeline(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
MultipleDelegatesModule(), test_data, [], [], qtol=1
)
pipeline.change_args(
diff --git a/backends/arm/test/misc/test_multiple_outputs.py b/backends/arm/test/misc/test_multiple_outputs.py
index abb6bb1bf30..45398437238 100644
--- a/backends/arm/test/misc/test_multiple_outputs.py
+++ b/backends/arm/test/misc/test_multiple_outputs.py
@@ -9,10 +9,10 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
@@ -29,14 +29,14 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
@common.parametrize("test_data", MultipleOutputsModule.inputs)
-def test_tosa_MI_pipeline(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](MultipleOutputsModule(), test_data, [], [])
+def test_tosa_FP_pipeline(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](MultipleOutputsModule(), test_data, [], [])
pipeline.run()
@common.parametrize("test_data", MultipleOutputsModule.inputs)
-def test_tosa_BI_pipeline(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_tosa_INT_pipeline(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
MultipleOutputsModule(), test_data, [], [], qtol=1
)
pipeline.run()
@@ -45,7 +45,7 @@ def test_tosa_BI_pipeline(test_data: input_t1):
@common.parametrize("test_data", MultipleOutputsModule.inputs)
@common.XfailIfNoCorstone300
def test_U55_pipeline(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
MultipleOutputsModule(), test_data, [], [], qtol=1
)
pipeline.run()
@@ -54,7 +54,7 @@ def test_U55_pipeline(test_data: input_t1):
@common.parametrize("test_data", MultipleOutputsModule.inputs)
@common.XfailIfNoCorstone320
def test_U85_pipeline(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
MultipleOutputsModule(), test_data, [], [], qtol=1
)
pipeline.run()
diff --git a/backends/arm/test/misc/test_non_persistent_buffers.py b/backends/arm/test/misc/test_non_persistent_buffers.py
index 1b9456ae470..c563ba07208 100644
--- a/backends/arm/test/misc/test_non_persistent_buffers.py
+++ b/backends/arm/test/misc/test_non_persistent_buffers.py
@@ -8,8 +8,8 @@
from executorch.backends.arm.test.common import parametrize
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
@@ -32,18 +32,18 @@ def forward(self, x):
@parametrize("test_data", test_input)
-def test_non_persistent_buffer_MI(test_data: input_t):
+def test_non_persistent_buffer_FP(test_data: input_t):
"""
Test validates Arm backend handling of non-persistent buffers
and ensures that there are no asserts or errors when they are used.
"""
- TosaPipelineMI[input_t](NonPersistentBuffer(), test_data, "").run()
+ TosaPipelineFP[input_t](NonPersistentBuffer(), test_data, "").run()
@parametrize("test_data", test_input)
-def test_non_persistent_buffer_BI(test_data: input_t):
+def test_non_persistent_buffer_INT(test_data: input_t):
"""
Test validates Arm backend handling of non-persistent buffers
and ensures that there are no asserts or errors when they are used.
"""
- TosaPipelineBI[input_t](NonPersistentBuffer(), test_data, "").run()
+ TosaPipelineINT[input_t](NonPersistentBuffer(), test_data, "").run()
diff --git a/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py b/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py
index 49efbbb4a9c..1aaa2950337 100644
--- a/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py
+++ b/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py
@@ -14,8 +14,8 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
input_t1 = Tuple[torch.Tensor]
@@ -83,8 +83,8 @@ def forward(self, x: torch.Tensor):
# Softplus is decomposed which messes up the quantization. This test tests that CheckProperQuantization does not
# partition nodes where quantization is not as expected.
@common.parametrize("test_data", test_data)
-def test_softplus_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_softplus_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
SoftplusModule(),
test_data=test_data,
aten_op=softplus_aten_op,
@@ -96,8 +96,8 @@ def test_softplus_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", test_data)
-def test_softplus_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_softplus_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
SoftplusModule(),
test_data=test_data,
aten_op=softplus_aten_op,
@@ -115,16 +115,16 @@ def test_softplus_tosa_BI(test_data: input_t1):
# Since GELU will not be quantized by TosaQuantizer, the Dropout's input will not be quantized either.
-# If so, the Dropout should not be partitioned by TosaPartitioner for TOSA BI profile. This test tests that the
-# partitioner indeed does not partition the Dropout (clone) for TOSA BI.
+# If so, the Dropout should not be partitioned by TosaPartitioner for TOSA INT profile. This test tests that the
+# partitioner indeed does not partition the Dropout (clone) for TOSA INT.
@common.parametrize(
"test_data",
test_data,
{"3d_rand": "MLETORCH-909: Partition test to not rely on unsupported ops"},
strict=False,
)
-def test_linear_residaul_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_linear_residaul_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
LinearResidualModule(),
test_data=test_data,
aten_op=linear_residual_aten_op,
@@ -156,8 +156,8 @@ def test_linear_residaul_tosa_MI(test_data: input_t1):
{"3d_rand": "MLETORCH-855: Issue with Quantization folding."},
strict=False,
)
-def test_linear_residual_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_linear_residual_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
LinearResidualModule(),
test_data=test_data,
aten_op=linear_residual_aten_op,
diff --git a/backends/arm/test/misc/test_tosa_spec.py b/backends/arm/test/misc/test_tosa_spec.py
index 19136c514fb..a2f5f7d85ee 100644
--- a/backends/arm/test/misc/test_tosa_spec.py
+++ b/backends/arm/test/misc/test_tosa_spec.py
@@ -5,10 +5,8 @@
import unittest
-from executorch.backends.arm.arm_backend import get_tosa_spec
-
from executorch.backends.arm.tosa_specification import (
- Tosa_0_80,
+ get_tosa_spec,
Tosa_1_00,
TosaSpecification,
)
@@ -16,12 +14,7 @@
from executorch.exir.backend.compile_spec_schema import CompileSpec
from parameterized import parameterized # type: ignore[import-untyped]
-test_valid_0_80_strings = [
- "TOSA-0.80+BI",
- "TOSA-0.80+MI+8k",
- "TOSA-0.80+BI+u55",
-]
-test_valid_1_0_strings = [
+test_valid_strings = [
"TOSA-1.0.0+INT+FP+fft",
"TOSA-1.0.0+FP+bf16+fft",
"TOSA-1.0.0+INT+int4+cf",
@@ -36,34 +29,25 @@
"TOSA-1.0+FP+INT+fft+int4+cf+8k",
]
-test_valid_1_0_extensions = {
+test_valid_extensions = {
"INT": ["int16", "int4", "var", "cf"],
"FP": ["bf16", "fp8e4m3", "fp8e5m2", "fft", "var", "cf"],
}
test_invalid_strings = [
- "TOSA-0.80+bi",
- "TOSA-0.80",
- "TOSA-0.80+8k",
- "TOSA-0.80+BI+MI",
- "TOSA-0.80+BI+U55",
"TOSA-1.0.0+fft",
"TOSA-1.0.0+fp+bf16+fft",
"TOSA-1.0.0+INT+INT4+cf",
- "TOSA-1.0.0+BI",
"TOSA-1.0.0+FP+FP+INT",
"TOSA-1.0.0+FP+CF+bf16",
"TOSA-1.0.0+BF16+fft+int4+cf+INT",
]
test_compile_specs = [
- ([CompileSpec("tosa_spec", "TOSA-0.80+BI".encode())],),
- ([CompileSpec("tosa_spec", "TOSA-0.80+BI+u55".encode())],),
([CompileSpec("tosa_spec", "TOSA-1.0.0+INT".encode())],),
]
test_compile_specs_no_version = [
- ([CompileSpec("other_key", "TOSA-0.80+BI".encode())],),
([CompileSpec("other_key", "some_value".encode())],),
]
@@ -71,14 +55,8 @@
class TestTosaSpecification(unittest.TestCase):
"""Tests the TOSA specification class"""
- @parameterized.expand(test_valid_0_80_strings) # type: ignore[misc]
- def test_version_string_0_80(self, version_string: str):
- tosa_spec = TosaSpecification.create_from_string(version_string)
- assert isinstance(tosa_spec, Tosa_0_80)
- assert tosa_spec.profile in ["BI", "MI"]
-
- @parameterized.expand(test_valid_1_0_strings) # type: ignore[misc]
- def test_version_string_1_0(self, version_string: str):
+ @parameterized.expand(test_valid_strings) # type: ignore[misc]
+ def test_version_string(self, version_string: str):
tosa_spec = TosaSpecification.create_from_string(version_string)
assert isinstance(tosa_spec, Tosa_1_00)
assert [profile in ["INT", "FP"] for profile in tosa_spec.profiles].count(
@@ -86,9 +64,7 @@ def test_version_string_1_0(self, version_string: str):
) > 0
for profile in tosa_spec.profiles:
- assert [
- e in test_valid_1_0_extensions[profile] for e in tosa_spec.extensions
- ]
+ assert [e in test_valid_extensions[profile] for e in tosa_spec.extensions]
@parameterized.expand(test_invalid_strings) # type: ignore[misc]
def test_invalid_version_strings(self, version_string: str):
@@ -111,14 +87,8 @@ def test_create_from_invalid_compilespec(self, compile_specs: list[CompileSpec])
assert tosa_spec is None
- @parameterized.expand(test_valid_0_80_strings)
- def test_correct_string_representation_0_80(self, version_string: str):
- tosa_spec = TosaSpecification.create_from_string(version_string)
- assert isinstance(tosa_spec, Tosa_0_80)
- assert f"{tosa_spec}" == version_string
-
- @parameterized.expand(test_valid_1_0_strings)
- def test_correct_string_representation_1_0(self, version_string: str):
+ @parameterized.expand(test_valid_strings)
+ def test_correct_string_representation(self, version_string: str):
tosa_spec = TosaSpecification.create_from_string(version_string)
assert isinstance(tosa_spec, Tosa_1_00)
assert f"{tosa_spec}" == version_string
diff --git a/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py b/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py
index 72e23d506c5..9561e2132ee 100644
--- a/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py
+++ b/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py
@@ -83,7 +83,7 @@ def test_CLIPTextModelWithProjection_tosa_MI(self):
# MLETORCH-867, MLETORCH-1059
# Failures: "Fatal Python error: Aborted, Dependency cycles, KeyError in CastInt64BuffersToInt32Pass")
@unittest.expectedFailure
- def test_CLIPTextModelWithProjection_tosa_BI(self):
+ def test_CLIPTextModelWithProjection_tosa_INT(self):
text_encoder_model, text_encoder_model_inputs = self.prepare_model_and_inputs()
with torch.no_grad():
(
diff --git a/backends/arm/test/models/stable_diffusion/test_SD3Transformer2DModel.py b/backends/arm/test/models/stable_diffusion/test_SD3Transformer2DModel.py
index fc8ab9b484b..880dc17166d 100644
--- a/backends/arm/test/models/stable_diffusion/test_SD3Transformer2DModel.py
+++ b/backends/arm/test/models/stable_diffusion/test_SD3Transformer2DModel.py
@@ -89,7 +89,7 @@ def forward(self, *args, **kwargs):
return sd35_transformer2D_model, sd35_transformer2D_model_inputs
- def test_SD3Transformer2DModel_tosa_MI(self):
+ def test_SD3Transformer2DModel_tosa_FP(self):
sd35_transformer2D_model, sd35_transformer2D_model_inputs = (
self.prepare_model_and_inputs()
)
@@ -106,12 +106,12 @@ def test_SD3Transformer2DModel_tosa_MI(self):
.to_executorch()
.run_method_and_compare_outputs(
inputs=sd35_transformer2D_model_inputs,
- rtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with MI and BI
+ rtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT
atol=4.0,
)
)
- def test_SD3Transformer2DModel_tosa_BI(self):
+ def test_SD3Transformer2DModel_tosa_INT(self):
sd35_transformer2D_model, sd35_transformer2D_model_inputs = (
self.prepare_model_and_inputs()
)
@@ -129,7 +129,7 @@ def test_SD3Transformer2DModel_tosa_BI(self):
.to_executorch()
.run_method_and_compare_outputs(
inputs=sd35_transformer2D_model_inputs,
- qtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with MI and BI
+ qtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT
rtol=1.0,
atol=4.0,
)
diff --git a/backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py b/backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py
index 565db22492c..aba58379a92 100644
--- a/backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py
+++ b/backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py
@@ -86,7 +86,7 @@ def test_T5EncoderModel_tosa_MI(self):
)
)
- def test_T5EncoderModel_tosa_BI(self):
+ def test_T5EncoderModel_tosa_INT(self):
t5_encoder_model, t5_encoder_model_inputs = self.prepare_model_and_inputs()
with torch.no_grad():
(
diff --git a/backends/arm/test/models/stable_diffusion/test_vae_AutoencoderKL.py b/backends/arm/test/models/stable_diffusion/test_vae_AutoencoderKL.py
index d2c48e2adba..cab4ca53d9c 100644
--- a/backends/arm/test/models/stable_diffusion/test_vae_AutoencoderKL.py
+++ b/backends/arm/test/models/stable_diffusion/test_vae_AutoencoderKL.py
@@ -59,7 +59,7 @@ def test_AutoencoderKL_tosa_MI(self):
)
)
- def test_AutoencoderKL_tosa_BI(self):
+ def test_AutoencoderKL_tosa_INT(self):
auto_encoder_model, auto_encoder_model_inputs = self.prepare_model_and_inputs()
with torch.no_grad():
(
@@ -75,6 +75,6 @@ def test_AutoencoderKL_tosa_BI(self):
.to_executorch()
.run_method_and_compare_outputs(
inputs=auto_encoder_model_inputs,
- atol=1.0, # TODO: MLETORCH-990 Reduce tolerance of vae(AutoencoderKL) with BI
+ atol=1.0, # TODO: MLETORCH-990 Reduce tolerance of vae(AutoencoderKL) with INT
)
)
diff --git a/backends/arm/test/models/test_conformer.py b/backends/arm/test/models/test_conformer.py
index e6db624f256..6a66b25d27d 100644
--- a/backends/arm/test/models/test_conformer.py
+++ b/backends/arm/test/models/test_conformer.py
@@ -11,10 +11,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from torchaudio.models import Conformer
@@ -49,8 +50,8 @@ class TestConformer:
conformer = conformer.eval()
-def test_conformer_tosa_MI():
- pipeline = TosaPipelineMI[input_t](
+def test_conformer_tosa_FP():
+ pipeline = TosaPipelineFP[input_t](
TestConformer.conformer,
TestConformer.model_example_inputs,
aten_op=TestConformer.aten_ops,
@@ -60,8 +61,8 @@ def test_conformer_tosa_MI():
pipeline.run()
-def test_conformer_tosa_BI():
- pipeline = TosaPipelineBI[input_t](
+def test_conformer_tosa_INT():
+ pipeline = TosaPipelineINT[input_t](
TestConformer.conformer,
TestConformer.model_example_inputs,
aten_op=TestConformer.aten_ops,
@@ -84,8 +85,8 @@ def test_conformer_tosa_BI():
@pytest.mark.xfail(
reason="TODO(MLETORCH-635): Expected failure under FVP option, but test passed."
)
-def test_conformer_u55_BI():
- pipeline = EthosU55PipelineBI[input_t](
+def test_conformer_u55_INT():
+ pipeline = EthosU55PipelineINT[input_t](
TestConformer.conformer,
TestConformer.model_example_inputs,
aten_ops=TestConformer.aten_ops,
@@ -106,8 +107,8 @@ def test_conformer_u55_BI():
@common.XfailIfNoCorstone320
@pytest.mark.xfail(reason="All IO needs to have the same data type (MLETORCH-635)")
-def test_conformer_u85_BI():
- pipeline = EthosU85PipelineBI[input_t](
+def test_conformer_u85_INT():
+ pipeline = EthosU85PipelineINT[input_t](
TestConformer.conformer,
TestConformer.model_example_inputs,
aten_ops=TestConformer.aten_ops,
@@ -124,3 +125,40 @@ def test_conformer_u85_BI():
atol=5.0,
)
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_conformer_vgf_INT():
+ pipeline = VgfPipeline[input_t](
+ TestConformer.conformer,
+ TestConformer.model_example_inputs,
+ aten_op=TestConformer.aten_ops,
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.pop_stage("check_count.exir")
+
+ # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+ # pipeline.change_args(
+ # "run_method_and_compare_outputs",
+ # get_test_inputs(
+ # TestConformer.dim, TestConformer.lengths, TestConformer.num_examples
+ # ),
+ # rtol=1.0,
+ # atol=3.0,
+ # )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_conformer_vgf_FP():
+ pipeline = VgfPipeline[input_t](
+ TestConformer.conformer,
+ TestConformer.model_example_inputs,
+ aten_op=TestConformer.aten_ops,
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/models/test_deit_tiny_arm.py b/backends/arm/test/models/test_deit_tiny_arm.py
index a637db65dfd..22685a079bd 100644
--- a/backends/arm/test/models/test_deit_tiny_arm.py
+++ b/backends/arm/test/models/test_deit_tiny_arm.py
@@ -11,9 +11,12 @@
import torch
+from executorch.backends.arm.test import common
+
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
@@ -34,8 +37,8 @@
input_t = Tuple[torch.Tensor]
-def test_deit_tiny_tosa_MI():
- pipeline = TosaPipelineMI[input_t](
+def test_deit_tiny_tosa_FP():
+ pipeline = TosaPipelineFP[input_t](
deit_tiny,
model_inputs,
aten_op=[],
@@ -45,8 +48,8 @@ def test_deit_tiny_tosa_MI():
pipeline.run()
-def test_deit_tiny_tosa_BI():
- pipeline = TosaPipelineBI[input_t](
+def test_deit_tiny_tosa_INT():
+ pipeline = TosaPipelineINT[input_t](
deit_tiny,
model_inputs,
aten_op=[],
@@ -56,3 +59,31 @@ def test_deit_tiny_tosa_BI():
qtol=1,
)
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_deit_tiny_vgf_INT():
+ pipeline = VgfPipeline[input_t](
+ deit_tiny,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ atol=1.5,
+ qtol=1,
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_deit_tiny_vgf_FP():
+ pipeline = VgfPipeline[input_t](
+ deit_tiny,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/models/test_dl3_arm.py b/backends/arm/test/models/test_dl3_arm.py
index 2e7a3117865..2000ac34794 100644
--- a/backends/arm/test/models/test_dl3_arm.py
+++ b/backends/arm/test/models/test_dl3_arm.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from executorch.examples.models import deeplab_v3
@@ -31,8 +32,8 @@ class TestDl3:
dl3 = dl3.get_eager_model()
-def test_dl3_tosa_MI():
- pipeline = TosaPipelineMI[input_t](
+def test_dl3_tosa_FP():
+ pipeline = TosaPipelineFP[input_t](
TestDl3.dl3,
TestDl3.model_example_inputs,
aten_op=[],
@@ -44,8 +45,8 @@ def test_dl3_tosa_MI():
pipeline.run()
-def test_dl3_tosa_BI():
- pipeline = TosaPipelineBI[input_t](
+def test_dl3_tosa_INT():
+ pipeline = TosaPipelineINT[input_t](
TestDl3.dl3,
TestDl3.model_example_inputs,
aten_op=[],
@@ -59,8 +60,8 @@ def test_dl3_tosa_BI():
@common.XfailIfNoCorstone300
@pytest.mark.skip(reason="upsample_bilinear2d operator is not supported on U55")
-def test_dl3_u55_BI():
- pipeline = EthosU55PipelineBI[input_t](
+def test_dl3_u55_INT():
+ pipeline = EthosU55PipelineINT[input_t](
TestDl3.dl3,
TestDl3.model_example_inputs,
aten_ops=[],
@@ -75,8 +76,8 @@ def test_dl3_u55_BI():
@common.XfailIfNoCorstone320
@pytest.mark.skip(reason="Runs out of memory on U85")
-def test_dl3_u85_BI():
- pipeline = EthosU85PipelineBI[input_t](
+def test_dl3_u85_INT():
+ pipeline = EthosU85PipelineINT[input_t](
TestDl3.dl3,
TestDl3.model_example_inputs,
aten_ops=[],
@@ -87,3 +88,37 @@ def test_dl3_u85_BI():
"run_method_and_compare_outputs", rtol=1.0, atol=1.0
) # TODO: MLETORCH-1036 decrease tolerance
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_dl3_vgf_INT():
+ pipeline = VgfPipeline[input_t](
+ TestDl3.dl3,
+ TestDl3.model_example_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ )
+ # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+ # pipeline.change_args(
+ # "run_method_and_compare_outputs", rtol=1.0, atol=1.0
+ # )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_dl3_vgf_FP():
+ pipeline = VgfPipeline[input_t](
+ TestDl3.dl3,
+ TestDl3.model_example_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+ # pipeline.change_args(
+ # "run_method_and_compare_outputs", rtol=1.0, atol=1.0
+ # )
+ pipeline.run()
diff --git a/backends/arm/test/models/test_inception_v3_arm.py b/backends/arm/test/models/test_inception_v3_arm.py
new file mode 100644
index 00000000000..f69022de712
--- /dev/null
+++ b/backends/arm/test/models/test_inception_v3_arm.py
@@ -0,0 +1,121 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import common
+import pytest
+
+import torch
+
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+from torchvision import models, transforms
+
+ic3 = models.inception_v3(weights=models.Inception_V3_Weights)
+ic3 = ic3.eval()
+
+# Normalization values referenced from here:
+# https://docs.pytorch.org/vision/main/models/generated/torchvision.models.quantization.inception_v3.html
+normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+
+model_inputs = (normalize(torch.rand(1, 3, 224, 224)),)
+input_t = Tuple[torch.Tensor]
+
+
+@pytest.mark.slow
+def test_ic3_tosa_FP():
+ pipeline = TosaPipelineFP[input_t](
+ ic3,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
+
+
+@pytest.mark.slow
+def test_ic3_tosa_BI():
+ pipeline = TosaPipelineINT[input_t](
+ ic3,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ use_to_edge_transform_and_lower=True,
+ atol=0.6,
+ qtol=1,
+ )
+ pipeline.run()
+
+
+@pytest.mark.slow
+@pytest.mark.skip(reason="Takes too long to run on CI")
+@common.XfailIfNoCorstone300
+def test_ic3_u55_BI():
+ pipeline = EthosU55PipelineINT[input_t](
+ ic3,
+ model_inputs,
+ aten_ops=[],
+ exir_ops=[],
+ run_on_fvp=True,
+ use_to_edge_transform_and_lower=True,
+ atol=0.6,
+ qtol=1,
+ )
+ pipeline.run()
+
+
+@pytest.mark.slow
+@pytest.mark.skip(reason="Takes too long to run on CI")
+@common.XfailIfNoCorstone320
+def test_ic3_u85_BI():
+ pipeline = EthosU85PipelineINT[input_t](
+ ic3,
+ model_inputs,
+ aten_ops=[],
+ exir_ops=[],
+ run_on_fvp=True,
+ use_to_edge_transform_and_lower=True,
+ atol=0.6,
+ qtol=1,
+ )
+ pipeline.run()
+
+
+@pytest.mark.slow
+@pytest.mark.skip(reason="Takes too long to run on CI")
+@common.SkipIfNoModelConverter
+def test_ic3_vgf_FP():
+ pipeline = VgfPipeline[input_t](
+ ic3,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
+
+
+@pytest.mark.slow
+@pytest.mark.skip(reason="Takes too long to run on CI")
+@common.SkipIfNoModelConverter
+def test_ic3_vgf_INT():
+ pipeline = VgfPipeline[input_t](
+ ic3,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py
index 84eec491c1e..7732943d5fb 100644
--- a/backends/arm/test/models/test_llama.py
+++ b/backends/arm/test/models/test_llama.py
@@ -17,10 +17,11 @@
import torch
from executorch.backends.arm._passes import InsertCastForOpsWithInt64InputPass
-from executorch.backends.arm.test import conftest
+from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from executorch.examples.models.llama.export_llama_lib import (
build_args_parser,
@@ -98,14 +99,14 @@ def prepare_model(self):
return llama_model, llama_inputs, llama_meta
-def test_llama_tosa_MI():
+def test_llama_tosa_FP():
llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
if llama_model is None or llama_inputs is None:
pytest.skip("Missing model and/or input files")
with torch.no_grad():
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
llama_model,
llama_inputs,
aten_op=[],
@@ -116,14 +117,14 @@ def test_llama_tosa_MI():
pipeline.run()
-def test_llama_tosa_BI():
+def test_llama_tosa_INT():
llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
if llama_model is None or llama_inputs is None:
pytest.skip("Missing model and/or input files")
with torch.no_grad():
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
llama_model,
llama_inputs,
aten_op=[],
@@ -131,3 +132,42 @@ def test_llama_tosa_BI():
use_to_edge_transform_and_lower=True,
)
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_llama_vgf_FP():
+ llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
+
+ if llama_model is None or llama_inputs is None:
+ pytest.skip("Missing model and/or input files")
+
+ with torch.no_grad():
+ pipeline = VgfPipeline[input_t](
+ llama_model,
+ llama_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_llama_vgf_INT():
+ llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
+
+ if llama_model is None or llama_inputs is None:
+ pytest.skip("Missing model and/or input files")
+
+ with torch.no_grad():
+ pipeline = VgfPipeline[input_t](
+ llama_model,
+ llama_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ transform_passes=[InsertCastForOpsWithInt64InputPass()],
+ )
+ pipeline.run()
diff --git a/backends/arm/test/models/test_lstm_arm.py b/backends/arm/test/models/test_lstm_arm.py
index 48d2e918ff6..1e63472f5f4 100644
--- a/backends/arm/test/models/test_lstm_arm.py
+++ b/backends/arm/test/models/test_lstm_arm.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from torch.nn.quantizable.modules import rnn
@@ -42,8 +43,8 @@ class TestLSTM:
model_example_inputs = get_test_inputs()
-def test_lstm_tosa_MI():
- pipeline = TosaPipelineMI[input_t](
+def test_lstm_tosa_FP():
+ pipeline = TosaPipelineFP[input_t](
TestLSTM.lstm,
TestLSTM.model_example_inputs,
aten_op=[],
@@ -54,8 +55,8 @@ def test_lstm_tosa_MI():
pipeline.run()
-def test_lstm_tosa_BI():
- pipeline = TosaPipelineBI[input_t](
+def test_lstm_tosa_INT():
+ pipeline = TosaPipelineINT[input_t](
TestLSTM.lstm,
TestLSTM.model_example_inputs,
aten_op=[],
@@ -69,8 +70,8 @@ def test_lstm_tosa_BI():
@common.XfailIfNoCorstone300
-def test_lstm_u55_BI():
- pipeline = EthosU55PipelineBI[input_t](
+def test_lstm_u55_INT():
+ pipeline = EthosU55PipelineINT[input_t](
TestLSTM.lstm,
TestLSTM.model_example_inputs,
aten_ops=[],
@@ -85,8 +86,8 @@ def test_lstm_u55_BI():
@common.XfailIfNoCorstone320
-def test_lstm_u85_BI():
- pipeline = EthosU85PipelineBI[input_t](
+def test_lstm_u85_INT():
+ pipeline = EthosU85PipelineINT[input_t](
TestLSTM.lstm,
TestLSTM.model_example_inputs,
aten_ops=[],
@@ -98,3 +99,37 @@ def test_lstm_u85_BI():
"run_method_and_compare_outputs", get_test_inputs(), atol=3e-1, qtol=1.0
)
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_lstm_vgf_INT():
+ pipeline = VgfPipeline[input_t](
+ TestLSTM.lstm,
+ TestLSTM.model_example_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ )
+ # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+ # pipeline.change_args(
+ # "run_method_and_compare_outputs", get_test_inputs(), atol=3e-1, qtol=1.0
+ # )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_lstm_vgf_FP():
+ pipeline = VgfPipeline[input_t](
+ TestLSTM.lstm,
+ TestLSTM.model_example_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+ # pipeline.change_args(
+ # "run_method_and_compare_outputs", get_test_inputs(), atol=3e-1, qtol=1.0
+ # )
+ pipeline.run()
diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
index a1f9bc0633d..d4e3bbc8e28 100644
--- a/backends/arm/test/models/test_mobilenet_v2_arm.py
+++ b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -12,10 +12,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from torchvision import models, transforms # type: ignore[import-untyped]
@@ -38,16 +39,16 @@
}
-def test_mv2_tosa_MI():
- pipeline = TosaPipelineMI[input_t](
+def test_mv2_tosa_FP():
+ pipeline = TosaPipelineFP[input_t](
mv2, model_inputs, aten_op=[], exir_op=[], use_to_edge_transform_and_lower=True
)
pipeline.run()
@common.parametrize("per_channel_quantization", quant_test_data)
-def test_mv2_tosa_BI(per_channel_quantization):
- pipeline = TosaPipelineBI[input_t](
+def test_mv2_tosa_INT(per_channel_quantization):
+ pipeline = TosaPipelineINT[input_t](
mv2,
model_inputs,
aten_op=[],
@@ -63,8 +64,8 @@ def test_mv2_tosa_BI(per_channel_quantization):
@pytest.mark.slow
@common.XfailIfNoCorstone300
@common.parametrize("per_channel_quantization", quant_test_data)
-def test_mv2_u55_BI(per_channel_quantization):
- pipeline = EthosU55PipelineBI[input_t](
+def test_mv2_u55_INT(per_channel_quantization):
+ pipeline = EthosU55PipelineINT[input_t](
mv2,
model_inputs,
aten_ops=[],
@@ -81,8 +82,8 @@ def test_mv2_u55_BI(per_channel_quantization):
@pytest.mark.slow
@common.XfailIfNoCorstone320
@common.parametrize("per_channel_quantization", quant_test_data)
-def test_mv2_u85_BI(per_channel_quantization):
- pipeline = EthosU85PipelineBI[input_t](
+def test_mv2_u85_INT(per_channel_quantization):
+ pipeline = EthosU85PipelineINT[input_t](
mv2,
model_inputs,
aten_ops=[],
@@ -94,3 +95,41 @@ def test_mv2_u85_BI(per_channel_quantization):
qtol=1,
)
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+@common.parametrize("per_channel_quantization", quant_test_data)
+def test_mv2_vgf_INT(per_channel_quantization):
+ pipeline = VgfPipeline[input_t](
+ mv2,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ per_channel_quantization=per_channel_quantization,
+ atol=0.25,
+ qtol=1,
+ )
+ # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+ # pipeline.change_args(
+ # "run_method_and_compare_outputs", get_test_inputs(), atol=3e-1, qtol=1.0
+ # )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_mv2_vgf_FP():
+ pipeline = VgfPipeline[input_t](
+ mv2,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+ # pipeline.change_args(
+ # "run_method_and_compare_outputs", get_test_inputs(), atol=3e-1, qtol=1.0
+ # ) # TODO: MLETORCH-1036 decrease tolerance
+ pipeline.run()
diff --git a/backends/arm/test/models/test_mobilenet_v3_arm.py b/backends/arm/test/models/test_mobilenet_v3_arm.py
index f80b94bad2e..0dcbd9757ac 100644
--- a/backends/arm/test/models/test_mobilenet_v3_arm.py
+++ b/backends/arm/test/models/test_mobilenet_v3_arm.py
@@ -11,10 +11,11 @@
import torch
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from torchvision import models, transforms
@@ -31,16 +32,16 @@
@pytest.mark.slow
-def test_mv3_tosa_MI():
- pipeline = TosaPipelineMI[input_t](
+def test_mv3_tosa_FP():
+ pipeline = TosaPipelineFP[input_t](
mv3, model_inputs, aten_op=[], exir_op=[], use_to_edge_transform_and_lower=True
)
pipeline.run()
@pytest.mark.slow
-def test_mv3_tosa_BI():
- pipeline = TosaPipelineBI[input_t](
+def test_mv3_tosa_INT():
+ pipeline = TosaPipelineINT[input_t](
mv3,
model_inputs,
aten_op=[],
@@ -54,8 +55,8 @@ def test_mv3_tosa_BI():
@pytest.mark.slow
@common.XfailIfNoCorstone300
-def test_mv3_u55_BI():
- pipeline = EthosU55PipelineBI[input_t](
+def test_mv3_u55_INT():
+ pipeline = EthosU55PipelineINT[input_t](
mv3,
model_inputs,
aten_ops=[],
@@ -70,8 +71,8 @@ def test_mv3_u55_BI():
@pytest.mark.slow
@common.XfailIfNoCorstone320
-def test_mv3_u85_BI():
- pipeline = EthosU85PipelineBI[input_t](
+def test_mv3_u85_INT():
+ pipeline = EthosU85PipelineINT[input_t](
mv3,
model_inputs,
aten_ops=[],
@@ -82,3 +83,32 @@ def test_mv3_u85_BI():
qtol=1,
)
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+@pytest.mark.slow
+def test_mv3_vgf_INT():
+ pipeline = VgfPipeline[input_t](
+ mv3,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ atol=0.5,
+ qtol=1,
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_mv3_vgf_FP():
+ pipeline = VgfPipeline[input_t](
+ mv3,
+ model_inputs,
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/models/test_nn_functional.py b/backends/arm/test/models/test_nn_functional.py
index 7c5c98cdcb3..651f9585459 100644
--- a/backends/arm/test/models/test_nn_functional.py
+++ b/backends/arm/test/models/test_nn_functional.py
@@ -22,8 +22,8 @@
import torch
from executorch.backends.arm.test.common import parametrize
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
@@ -85,9 +85,9 @@ def forward(self, *args):
"affine_grid": "Int64 input. Partition handling fails since arange int64 output is split between 2 partitions.",
},
)
-def test_nn_functional_MI(test_data):
+def test_nn_functional_FP(test_data):
module, inputs = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
module, inputs, "", use_to_edge_transform_and_lower=False
)
pipeline.pop_stage("check.aten")
@@ -111,9 +111,9 @@ def test_nn_functional_MI(test_data):
@parametrize("test_data", module_tests, x_fails, strict=False)
-def test_nn_functional_BI(test_data):
+def test_nn_functional_INT(test_data):
module, inputs = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
module, inputs, "", use_to_edge_transform_and_lower=True
)
pipeline.pop_stage("check.aten")
diff --git a/backends/arm/test/models/test_nn_modules.py b/backends/arm/test/models/test_nn_modules.py
index 43fe1f4b3f9..0daf035a7f1 100644
--- a/backends/arm/test/models/test_nn_modules.py
+++ b/backends/arm/test/models/test_nn_modules.py
@@ -20,8 +20,8 @@
import torch
from executorch.backends.arm.test.common import parametrize
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
example_input = torch.rand(1, 6, 16, 16)
@@ -57,9 +57,9 @@
"test_data",
test_parameters,
)
-def test_nn_Modules_MI(test_data):
+def test_nn_Modules_FP(test_data):
module, inputs = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
module, inputs, "", use_to_edge_transform_and_lower=True
)
pipeline.pop_stage("check.aten")
@@ -83,9 +83,9 @@ def test_nn_Modules_MI(test_data):
"Transformer": "AssertionError: Output 0 does not match reference output.",
},
)
-def test_nn_Modules_BI(test_data):
+def test_nn_Modules_INT(test_data):
module, inputs = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
module, inputs, "", use_to_edge_transform_and_lower=True
)
pipeline.pop_stage("check.aten")
diff --git a/backends/arm/test/models/test_torch_functions.py b/backends/arm/test/models/test_torch_functions.py
index c7fc1654caa..580438f6da8 100644
--- a/backends/arm/test/models/test_torch_functions.py
+++ b/backends/arm/test/models/test_torch_functions.py
@@ -23,8 +23,8 @@
import torch
from executorch.backends.arm.test.common import parametrize
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
@@ -104,9 +104,9 @@ def forward(self, *args):
"norm": "An error occurred when running the 'KeepDimsFalseToSqueezePass' pass after the following passes:",
},
)
-def test_torch_fns_MI(test_data):
+def test_torch_fns_FP(test_data):
module, inputs = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
module, inputs, "", use_to_edge_transform_and_lower=True
)
pipeline.pop_stage("check.aten")
@@ -133,9 +133,9 @@ def test_torch_fns_MI(test_data):
},
strict=False,
)
-def test_torch_fns_BI(test_data):
+def test_torch_fns_INT(test_data):
module, inputs = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
module, inputs, "", use_to_edge_transform_and_lower=True
)
pipeline.pop_stage("check.aten")
diff --git a/backends/arm/test/models/test_w2l_arm.py b/backends/arm/test/models/test_w2l_arm.py
index 1a755937482..32b25a18fd8 100644
--- a/backends/arm/test/models/test_w2l_arm.py
+++ b/backends/arm/test/models/test_w2l_arm.py
@@ -13,10 +13,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from torchaudio import models
@@ -46,8 +47,8 @@ class TestW2L(unittest.TestCase):
@pytest.mark.slow # about 3min on std laptop
-def test_w2l_tosa_MI():
- pipeline = TosaPipelineMI[input_t](
+def test_w2l_tosa_FP():
+ pipeline = TosaPipelineFP[input_t](
TestW2L.w2l,
TestW2L.model_example_inputs,
aten_op=[],
@@ -59,8 +60,8 @@ def test_w2l_tosa_MI():
@pytest.mark.slow # about 1min on std laptop
@pytest.mark.flaky
-def test_w2l_tosa_BI():
- pipeline = TosaPipelineBI[input_t](
+def test_w2l_tosa_INT():
+ pipeline = TosaPipelineINT[input_t](
TestW2L.w2l,
TestW2L.model_example_inputs,
aten_op=[],
@@ -76,8 +77,8 @@ def test_w2l_tosa_BI():
reason="MLETORCH-1009: Wav2Letter fails on U55 due to unsupported conditions",
strict=False,
)
-def test_w2l_u55_BI():
- pipeline = EthosU55PipelineBI[input_t](
+def test_w2l_u55_INT():
+ pipeline = EthosU55PipelineINT[input_t](
TestW2L.w2l,
TestW2L.model_example_inputs,
aten_ops=[],
@@ -91,8 +92,8 @@ def test_w2l_u55_BI():
@pytest.mark.slow
@common.XfailIfNoCorstone320
@pytest.mark.skip(reason="Intermittent timeout issue: MLETORCH-856")
-def test_w2l_u85_BI():
- pipeline = EthosU85PipelineBI[input_t](
+def test_w2l_u85_INT():
+ pipeline = EthosU85PipelineINT[input_t](
TestW2L.w2l,
TestW2L.model_example_inputs,
aten_ops=[],
@@ -101,3 +102,30 @@ def test_w2l_u85_BI():
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+@pytest.mark.slow
+def test_w2l_vgf_INT():
+ pipeline = VgfPipeline[input_t](
+ TestW2L.w2l,
+ TestW2L.model_example_inputs,
+ aten_op=[],
+ exir_op=TestW2L.all_operators,
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_w2l_vgf_FP():
+ pipeline = VgfPipeline[input_t](
+ TestW2L.w2l,
+ TestW2L.model_example_inputs,
+ aten_op=[],
+ exir_op=TestW2L.all_operators,
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_abs.py b/backends/arm/test/ops/test_abs.py
index ed7e616e946..4ebcf7393c1 100644
--- a/backends/arm/test/ops/test_abs.py
+++ b/backends/arm/test/ops/test_abs.py
@@ -11,10 +11,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.abs.default"
@@ -39,21 +40,21 @@ def forward(self, x):
@common.parametrize("test_data", Abs.test_parameters)
-def test_abs_tosa_MI(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](Abs(), test_data(), aten_op, exir_op)
+def test_abs_tosa_FP(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](Abs(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Abs.test_parameters)
-def test_abs_tosa_BI(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](Abs(), test_data(), aten_op, exir_op)
+def test_abs_tosa_INT(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](Abs(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Abs.test_parameters)
@common.XfailIfNoCorstone300
-def test_abs_u55_BI(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_abs_u55_INT(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Abs(), test_data(), aten_op, exir_op, run_on_fvp=True
)
pipeline.run()
@@ -61,8 +62,30 @@ def test_abs_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", Abs.test_parameters)
@common.XfailIfNoCorstone320
-def test_abs_u85_BI(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_abs_u85_INT(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Abs(), test_data(), aten_op, exir_op, run_on_fvp=True
)
pipeline.run()
+
+
+@common.parametrize("test_data", Abs.test_parameters)
+@common.SkipIfNoModelConverter
+def test_abs_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Abs(), test_data(), aten_op, exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Abs.test_parameters)
+@common.SkipIfNoModelConverter
+def test_abs_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Abs(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_acos.py b/backends/arm/test/ops/test_acos.py
new file mode 100644
index 00000000000..102d979352e
--- /dev/null
+++ b/backends/arm/test/ops/test_acos.py
@@ -0,0 +1,119 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+input_t = Tuple[torch.Tensor]
+aten_op = "torch.ops.aten.acos.default"
+exir_op = "executorch_exir_dialects_edge__ops_aten__acos_default"
+
+
+test_data_suite = {
+ "ones": lambda: torch.ones(1, 7, 10, 12),
+ "rand_in_range": lambda: (torch.rand(10, 10) - 0.5) * 2, # Uniform in [-1, 1)
+ "ramp_valid": lambda: torch.linspace(-1.0, 1.0, steps=160),
+ "edge_cases": lambda: torch.tensor([-1.0, 0.0, 1.0]),
+ "1d_tensor": lambda: torch.linspace(-1.0, 1.0, steps=10), # Shape: [10]
+ "2d_batch": lambda: torch.tensor(
+ [[-1.0, -0.5, 0.0, 0.5, 1.0], [0.9, -0.9, 0.3, -0.3, 0.0]]
+ ), # Shape: [2, 5]
+ "3d_batch": lambda: torch.rand(4, 5, 6) * 2 - 1, # Shape: [4, 5, 6] in [-1, 1)
+ "3d_mixed_shape": lambda: (torch.rand(7, 15, 2) - 0.5) * 2,
+ "4d_mixed": lambda: torch.linspace(-1, 1, steps=1 * 3 * 4 * 5).reshape(
+ 1, 3, 4, 5
+ ), # Shape: [2, 3, 4, 5]
+ "4d_random": lambda: (torch.rand(1, 5, 10, 7) - 0.5) * 2,
+ "bool_casted": lambda: torch.ones(3, 3, dtype=torch.bool).to(
+ dtype=torch.float32
+ ), # All 1.0 (edge case)
+}
+
+
+class Acos(torch.nn.Module):
+
+ def forward(self, x: torch.Tensor):
+ return torch.acos(x)
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_acos_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t](
+ Acos(),
+ (test_data(),),
+ aten_op,
+ exir_op=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_acos_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t](
+ Acos(),
+ (test_data(),),
+ aten_op=aten_op,
+ exir_op=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.XfailIfNoCorstone300
+def test_acos_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t](
+ Acos(),
+ (test_data(),),
+ aten_ops=aten_op,
+ exir_ops=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.XfailIfNoCorstone320
+def test_acos_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t](
+ Acos(),
+ (test_data(),),
+ aten_ops=aten_op,
+ exir_ops=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_acos_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t](
+ Acos(),
+ (test_data(),),
+ [],
+ [],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_acos_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t](
+ Acos(),
+ (test_data(),),
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_acosh.py b/backends/arm/test/ops/test_acosh.py
index 00742105b63..25ba2b1a83b 100644
--- a/backends/arm/test/ops/test_acosh.py
+++ b/backends/arm/test/ops/test_acosh.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = Tuple[torch.Tensor] # Input x
@@ -48,8 +49,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_acosh_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t](
+def test_acosh_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t](
Acosh(),
(test_data(),),
aten_op,
@@ -59,8 +60,8 @@ def test_acosh_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_acosh_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t](
+def test_acosh_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t](
Acosh(),
(test_data(),),
aten_op=[],
@@ -70,8 +71,8 @@ def test_acosh_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_acosh_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t](
+def test_acosh_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t](
Acosh(),
(test_data(),),
aten_ops=[],
@@ -81,8 +82,8 @@ def test_acosh_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite_xfails)
@pytest.mark.xfail(reason="Invalid inputs are currently not handled")
-def test_acosh_u55_BI_xfail(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t](
+def test_acosh_u55_INT_xfail(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t](
Acosh(),
(test_data(),),
aten_ops=[],
@@ -93,8 +94,8 @@ def test_acosh_u55_BI_xfail(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_acosh_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t](
+def test_acosh_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t](
Acosh(),
(test_data(),),
aten_ops=[],
@@ -104,11 +105,35 @@ def test_acosh_u85_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite_xfails)
@pytest.mark.xfail(reason="Invalid inputs are currently not handled")
-def test_acosh_u85_BI_xfail(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t](
+def test_acosh_u85_INT_xfail(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t](
Acosh(),
(test_data(),),
aten_ops=[],
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_acosh_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t](
+ Acosh(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_acosh_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t](
+ Acosh(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_adaptive_avg_pool2d.py b/backends/arm/test/ops/test_adaptive_avg_pool2d.py
index 7426ef78dca..4411ce7f746 100644
--- a/backends/arm/test/ops/test_adaptive_avg_pool2d.py
+++ b/backends/arm/test/ops/test_adaptive_avg_pool2d.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
exir_op = "executorch_exir_dialects_edge__ops_aten_avg_pool2d_default"
@@ -110,10 +111,10 @@ def forward(self, *args, **kwargs):
@common.parametrize("test_module", test_modules)
-def test_adaptive_avg_pool2d_tosa_MI(test_module):
+def test_adaptive_avg_pool2d_tosa_FP(test_module):
model, input_tensor = test_module()
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
model,
input_tensor,
aten_op=[],
@@ -123,10 +124,10 @@ def test_adaptive_avg_pool2d_tosa_MI(test_module):
@common.parametrize("test_module", test_modules)
-def test_adaptive_avg_pool2d_tosa_BI(test_module):
+def test_adaptive_avg_pool2d_tosa_INT(test_module):
model, input_tensor = test_module()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
input_tensor,
aten_op=[],
@@ -137,10 +138,10 @@ def test_adaptive_avg_pool2d_tosa_BI(test_module):
@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone300
-def test_adaptive_avg_pool2d_u55_BI(test_module):
+def test_adaptive_avg_pool2d_u55_INT(test_module):
model, input_tensor = test_module()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
input_tensor,
aten_ops=[],
@@ -151,13 +152,41 @@ def test_adaptive_avg_pool2d_u55_BI(test_module):
@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone320
-def test_adaptive_avg_pool2d_u85_BI(test_module):
+def test_adaptive_avg_pool2d_u85_INT(test_module):
model, input_tensor = test_module()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
input_tensor,
aten_ops=[],
exir_ops=exir_op,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.SkipIfNoModelConverter
+def test_adaptive_avg_pool2d_vgf_FP(test_module):
+ model, input_tensor = test_module()
+ pipeline = VgfPipeline[input_t](
+ model,
+ input_tensor,
+ [],
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.SkipIfNoModelConverter
+def test_adaptive_avg_pool2d_vgf_INT(test_module):
+ model, input_tensor = test_module()
+ pipeline = VgfPipeline[input_t](
+ model,
+ input_tensor,
+ [],
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py
index 777603f0301..6bf3830d038 100644
--- a/backends/arm/test/ops/test_add.py
+++ b/backends/arm/test/ops/test_add.py
@@ -7,18 +7,18 @@
from typing import Tuple
+import pytest
import torch
-from executorch.backends.arm.arm_backend import get_tosa_spec
from executorch.backends.arm.quantizer import arm_quantizer
from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
VgfPipeline,
)
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import get_tosa_spec, TosaSpecification
from executorch.backends.xnnpack.test.tester import Quantize
from torchao.quantization.pt2e import HistogramObserver
from torchao.quantization.pt2e.quantizer import QuantizationSpec
@@ -80,23 +80,22 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
@common.parametrize("test_data", Add.test_data)
-def test_add_tensor_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](Add(), test_data(), aten_op, exir_op)
+def test_add_tensor_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](Add(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Add.test_data)
-def test_add_tensor_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](Add(), test_data(), aten_op, exir_op)
+def test_add_tensor_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](Add(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Add.test_data)
-def test_add_tensor_tosa_BI_i32(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](Add(), test_data(), aten_op, exir_op)
+def test_add_tensor_tosa_INT_i32(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](Add(), test_data(), aten_op, exir_op)
tosa_version = conftest.get_option("tosa_version")
tosa_profiles = {
- "0.80": TosaSpecification.create_from_string("TOSA-0.80+BI"),
"1.0": TosaSpecification.create_from_string("TOSA-1.0+INT"),
}
# Create a quantizer with int8 quantization on the input and output but int32 on everything else.
@@ -129,8 +128,8 @@ def test_add_tensor_tosa_BI_i32(test_data: input_t1):
@common.parametrize("test_data", Add.test_data)
@common.XfailIfNoCorstone300
-def test_add_tensor_u55_BI(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_add_tensor_u55_INT(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
Add(), test_data(), aten_op, exir_op, run_on_fvp=True
)
pipeline.run()
@@ -138,41 +137,41 @@ def test_add_tensor_u55_BI(test_data: input_t1):
@common.parametrize("test_data", Add.test_data)
@common.XfailIfNoCorstone320
-def test_add_tensor_u85_BI(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_add_tensor_u85_INT(test_data: input_t1):
+ pipeline = EthosU85PipelineINT[input_t1](
Add(), test_data(), aten_op, exir_op, run_on_fvp=True
)
pipeline.run()
@common.parametrize("test_data", Add2.test_data)
-def test_add_tensor_tosa_MI_2(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](Add2(), test_data(), aten_op, exir_op)
+def test_add_tensor_tosa_FP_2(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](Add2(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Add3.test_data)
-def test_add_tensor_tosa_MI_3(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](Add3(), test_data(), aten_op, exir_op)
+def test_add_tensor_tosa_FP_3(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](Add3(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Add3.test_data)
-def test_add_tensor_tosa_BI_3(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](Add3(), test_data(), aten_op, exir_op)
+def test_add_tensor_tosa_INT_3(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](Add3(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Add2.test_data)
-def test_add_tensor_tosa_BI_2(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](Add2(), test_data(), aten_op, exir_op)
+def test_add_tensor_tosa_INT_2(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](Add2(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Add2.test_data)
@common.XfailIfNoCorstone300
-def test_add_tensor_u55_BI_2(test_data: input_t2):
- pipeline = EthosU55PipelineBI[input_t2](
+def test_add_tensor_u55_INT_2(test_data: input_t2):
+ pipeline = EthosU55PipelineINT[input_t2](
Add2(), test_data(), aten_op, exir_op, run_on_fvp=True
)
pipeline.run()
@@ -180,8 +179,8 @@ def test_add_tensor_u55_BI_2(test_data: input_t2):
@common.parametrize("test_data", Add2.test_data)
@common.XfailIfNoCorstone320
-def test_add_tensor_u85_BI_2(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_add_tensor_u85_INT_2(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
Add2(), test_data(), aten_op, exir_op, run_on_fvp=True
)
pipeline.run()
@@ -189,9 +188,19 @@ def test_add_tensor_u85_BI_2(test_data: input_t2):
@common.parametrize("test_data", Add.test_data)
@common.SkipIfNoModelConverter
+@common.XfailfNoVKMLEmulationLayer
+@pytest.mark.xfail(
+ reason="VGF runtime is not yet fully supported for FP pipeline (MLETORCH-1234)",
+ strict=True,
+)
def test_add_tensor_vgf_FP(test_data: input_t1):
pipeline = VgfPipeline[input_t1](
- Add(), test_data(), aten_op, exir_op, tosa_version="TOSA-1.0+FP"
+ Add(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ run_on_vulkan_runtime=True,
)
pipeline.run()
diff --git a/backends/arm/test/ops/test_addmm.py b/backends/arm/test/ops/test_addmm.py
index 7da5596ab00..cfe324ab0af 100644
--- a/backends/arm/test/ops/test_addmm.py
+++ b/backends/arm/test/ops/test_addmm.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.addmm.default"
@@ -112,8 +113,8 @@ def forward(
@common.parametrize("test_data", test_data_suite)
-def test_addmm_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_addmm_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Addmm(),
(*test_data,),
aten_op=aten_op,
@@ -123,8 +124,8 @@ def test_addmm_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_addmm_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_addmm_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Addmm(),
(*test_data,),
aten_op=[],
@@ -135,8 +136,8 @@ def test_addmm_tosa_BI(test_data: Tuple):
@common.XfailIfNoCorstone300
@common.parametrize("test_data", test_data_suite)
-def test_addmm_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_addmm_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Addmm(),
(*test_data,),
aten_ops=[],
@@ -147,11 +148,37 @@ def test_addmm_u55_BI(test_data: Tuple):
@common.XfailIfNoCorstone320
@common.parametrize("test_data", test_data_suite)
-def test_addmm_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_addmm_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Addmm(),
(*test_data,),
aten_ops=[],
exir_ops=exir_op,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_addmm_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Addmm(),
+ (*test_data,),
+ aten_op=aten_op,
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_addmm_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Addmm(),
+ (*test_data,),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_alias_copy.py b/backends/arm/test/ops/test_alias_copy.py
index 74e62275577..cf8caca02c4 100644
--- a/backends/arm/test/ops/test_alias_copy.py
+++ b/backends/arm/test/ops/test_alias_copy.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor]
@@ -44,8 +45,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", AliasCopy.test_data)
-def test_alias_tosa_MI(test_data: input_t1):
- TosaPipelineMI[input_t1](
+def test_alias_tosa_FP(test_data: input_t1):
+ TosaPipelineFP[input_t1](
AliasCopy(),
test_data(),
AliasCopy.aten_op,
@@ -54,8 +55,8 @@ def test_alias_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", AliasCopy.test_data)
-def test_alias_tosa_BI(test_data: input_t1):
- TosaPipelineBI[input_t1](
+def test_alias_tosa_INT(test_data: input_t1):
+ TosaPipelineINT[input_t1](
AliasCopy(),
test_data(),
AliasCopy.aten_op,
@@ -65,8 +66,8 @@ def test_alias_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", AliasCopy.test_data)
@common.XfailIfNoCorstone300
-def test_alias_u55_BI(test_data: input_t1):
- EthosU55PipelineBI[input_t1](
+def test_alias_u55_INT(test_data: input_t1):
+ EthosU55PipelineINT[input_t1](
AliasCopy(),
test_data(),
AliasCopy.aten_op,
@@ -76,10 +77,36 @@ def test_alias_u55_BI(test_data: input_t1):
@common.parametrize("test_data", AliasCopy.test_data)
@common.XfailIfNoCorstone320
-def test_alias_u85_BI(test_data: input_t1):
- EthosU85PipelineBI[input_t1](
+def test_alias_u85_INT(test_data: input_t1):
+ EthosU85PipelineINT[input_t1](
AliasCopy(),
test_data(),
AliasCopy.aten_op,
AliasCopy.exir_op,
).run()
+
+
+@common.parametrize("test_data", AliasCopy.test_data)
+@common.SkipIfNoModelConverter
+def test_alias_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AliasCopy(),
+ test_data(),
+ AliasCopy.aten_op,
+ AliasCopy.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AliasCopy.test_data)
+@common.SkipIfNoModelConverter
+def test_alias_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AliasCopy(),
+ test_data(),
+ AliasCopy.aten_op,
+ AliasCopy.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_amax.py b/backends/arm/test/ops/test_amax.py
index bde9174de0f..3600c34c94c 100644
--- a/backends/arm/test/ops/test_amax.py
+++ b/backends/arm/test/ops/test_amax.py
@@ -10,10 +10,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -69,20 +70,20 @@ def forward(self, x):
@common.parametrize("test_data", Amax.test_data)
-def test_amax_tosa_MI(test_data: Amax.input_t):
+def test_amax_tosa_FP(test_data: Amax.input_t):
data, dim, keep_dims = test_data()
- pipeline = TosaPipelineMI[Amax.input_t](Amax(dim, keep_dims), data, Amax.aten_op)
+ pipeline = TosaPipelineFP[Amax.input_t](Amax(dim, keep_dims), data, Amax.aten_op)
pipeline.run()
@common.parametrize("test_data", Amax.test_data)
-def test_amax_tosa_BI(test_data: Amax.input_t):
+def test_amax_tosa_INT(test_data: Amax.input_t):
data, dim, keep_dims = test_data()
- pipeline = TosaPipelineBI[Amax.input_t](Amax(dim, keep_dims), data, Amax.aten_op)
+ pipeline = TosaPipelineINT[Amax.input_t](Amax(dim, keep_dims), data, Amax.aten_op)
pipeline.run()
-def test_amax_u55_BI_not_delegated():
+def test_amax_u55_INT_not_delegated():
data, dim, keep_dims = Amax.test_data["rank_4_all_dim"]()
pipeline = OpNotSupportedPipeline[Amax.input_t](
Amax(dim, keep_dims),
@@ -99,9 +100,9 @@ def test_amax_u55_BI_not_delegated():
@common.parametrize("test_data", Amax.test_data, fvp_xfails, strict=False)
@common.XfailIfNoCorstone320
-def test_amax_u85_BI(test_data: Amax.input_t):
+def test_amax_u85_INT(test_data: Amax.input_t):
data, dim, keep_dims = test_data()
- pipeline = EthosU85PipelineBI[Amax.input_t](
+ pipeline = EthosU85PipelineINT[Amax.input_t](
Amax(dim, keep_dims),
data,
Amax.aten_op,
@@ -111,22 +112,22 @@ def test_amax_u85_BI(test_data: Amax.input_t):
@common.parametrize("test_data", Max.test_data)
-def test_max_dim_tosa_MI_to_amax(test_data: Max.input_t):
+def test_max_dim_tosa_FP_to_amax(test_data: Max.input_t):
data, dim = test_data()
- pipeline = TosaPipelineMI[Max.input_t](Max(dim), data, "torch.ops.aten.max")
+ pipeline = TosaPipelineFP[Max.input_t](Max(dim), data, "torch.ops.aten.max")
pipeline.run()
@common.parametrize("test_data", Max.test_data)
-def test_max_dim_tosa_BI_to_amax(test_data: Max.input_t):
+def test_max_dim_tosa_INT_to_amax(test_data: Max.input_t):
data, dim = test_data()
module = Max(dim)
- pipeline = TosaPipelineBI[Max.input_t](module, data, "torch.ops.aten.amax")
+ pipeline = TosaPipelineINT[Max.input_t](module, data, "torch.ops.aten.amax")
pipeline.run()
@pytest.mark.xfail(reason="MLETORCH-718 : Quantization of indices in arm_quantizer")
-def test_max_dim_tosa_BI_not_delegated():
+def test_max_dim_tosa_INT_not_delegated():
data, dim = Max.test_data()["rank_4_dim_3"]()
pipeline = OpNotSupportedPipeline[Max.input_t](
MaxWithIndex(dim), data, {}, quantize=True
@@ -134,7 +135,61 @@ def test_max_dim_tosa_BI_not_delegated():
pipeline.run()
-def test_max_dim_tosa_MI_not_delegated():
+def test_max_dim_tosa_FP_not_delegated():
data, dim = Max.test_data["rank_4_dim_3"]()
pipeline = OpNotSupportedPipeline[Max.input_t](MaxWithIndex(dim), data, {})
pipeline.run()
+
+
+@common.parametrize("test_data", Amax.test_data)
+@common.SkipIfNoModelConverter
+def test_amax_vgf_FP(test_data: Amax.input_t):
+ data, dim, keep_dims = test_data()
+ module = Amax(dim, keep_dims)
+ pipeline = VgfPipeline[Amax.input_t](
+ module,
+ data,
+ Amax.aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Amax.test_data)
+@common.SkipIfNoModelConverter
+def test_amax_vgf_INT(test_data: Amax.input_t):
+ data, dim, keep_dims = test_data()
+ module = Amax(dim, keep_dims)
+ pipeline = VgfPipeline[Amax.input_t](
+ module,
+ data,
+ Amax.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Max.test_data)
+@common.SkipIfNoModelConverter
+def test_max_dim_vgf_FP_to_amax(test_data: Max.input_t):
+ data, dim = test_data()
+ pipeline = VgfPipeline[Max.input_t](
+ Max(dim),
+ data,
+ "torch.ops.aten.max",
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Max.test_data)
+@common.SkipIfNoModelConverter
+def test_max_dim_vgf_INT_to_amax(test_data: Max.input_t):
+ data, dim = test_data()
+ pipeline = VgfPipeline[Max.input_t](
+ Max(dim),
+ data,
+ "torch.ops.aten.amax",
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_amin.py b/backends/arm/test/ops/test_amin.py
index 89c4b71e5af..3ae94fe3c6e 100644
--- a/backends/arm/test/ops/test_amin.py
+++ b/backends/arm/test/ops/test_amin.py
@@ -11,10 +11,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -70,9 +71,9 @@ def forward(self, x):
@common.parametrize("test_data", Amin.test_data)
-def test_amin_tosa_MI(test_data: Amin.input_t):
+def test_amin_tosa_FP(test_data: Amin.input_t):
data, dim, keep_dims = test_data()
- pipeline = TosaPipelineMI[Amin.input_t](
+ pipeline = TosaPipelineFP[Amin.input_t](
Amin(dim, keep_dims),
data,
Amin.aten_op,
@@ -81,9 +82,9 @@ def test_amin_tosa_MI(test_data: Amin.input_t):
@common.parametrize("test_data", Amin.test_data)
-def test_amin_tosa_BI(test_data: Amin.input_t):
+def test_amin_tosa_INT(test_data: Amin.input_t):
data, dim, keep_dims = test_data()
- pipeline = TosaPipelineBI[Amin.input_t](
+ pipeline = TosaPipelineINT[Amin.input_t](
Amin(dim, keep_dims),
data,
Amin.aten_op,
@@ -91,7 +92,7 @@ def test_amin_tosa_BI(test_data: Amin.input_t):
pipeline.run()
-def test_amin_u55_BI_not_delegated():
+def test_amin_u55_INT_not_delegated():
data, dim, keep_dims = Amin.test_data["rank_4_all_dim"]()
pipeline = OpNotSupportedPipeline[Amin.input_t](
Amin(dim, keep_dims),
@@ -108,9 +109,9 @@ def test_amin_u55_BI_not_delegated():
@common.parametrize("test_data", Amin.test_data, fvp_xfails, strict=False)
@common.XfailIfNoCorstone320
-def test_amin_u85_BI(test_data: Amin.input_t):
+def test_amin_u85_INT(test_data: Amin.input_t):
data, dim, keep_dims = test_data()
- pipeline = EthosU85PipelineBI[Amin.input_t](
+ pipeline = EthosU85PipelineINT[Amin.input_t](
Amin(dim, keep_dims),
data,
Amin.aten_op,
@@ -120,22 +121,22 @@ def test_amin_u85_BI(test_data: Amin.input_t):
@common.parametrize("test_data", Min.test_data)
-def test_min_dim_tosa_MI_to_amin(test_data: Min.input_t):
+def test_min_dim_tosa_FP_to_amin(test_data: Min.input_t):
data, dim = test_data()
- pipeline = TosaPipelineMI[Min.input_t](Min(dim), data, "torch.ops.aten.min")
+ pipeline = TosaPipelineFP[Min.input_t](Min(dim), data, "torch.ops.aten.min")
pipeline.run()
@common.parametrize("test_data", Min.test_data)
-def test_min_dim_tosa_BI_to_amin(test_data: Min.input_t):
+def test_min_dim_tosa_INT_to_amin(test_data: Min.input_t):
data, dim = test_data()
module = Min(dim)
- pipeline = TosaPipelineBI[Min.input_t](module, data, "torch.ops.aten.amin")
+ pipeline = TosaPipelineINT[Min.input_t](module, data, "torch.ops.aten.amin")
pipeline.run()
@pytest.mark.xfail(reason="MLETORCH-718 : Quantization of indices in arm_quantizer")
-def test_min_dim_tosa_BI_not_delegated():
+def test_min_dim_tosa_INT_not_delegated():
data, dim = Min.test_data["rank_4_dim_3"]()
pipeline = OpNotSupportedPipeline[Min.input_t](
MinWithIndex(dim),
@@ -146,7 +147,56 @@ def test_min_dim_tosa_BI_not_delegated():
pipeline.run()
-def test_min_dim_tosa_MI_not_delegated():
+def test_min_dim_tosa_FP_not_delegated():
data, dim = Min.test_data["rank_4_dim_3"]()
pipeline = OpNotSupportedPipeline[Min.input_t](MinWithIndex(dim), data, {})
pipeline.run()
+
+
+@common.parametrize("test_data", Amin.test_data)
+@common.SkipIfNoModelConverter
+def test_amin_vgf_FP(test_data: Amin.input_t):
+ data, dim, keep_dims = test_data()
+ pipeline = VgfPipeline[Amin.input_t](
+ Amin(dim, keep_dims), data, Amin.aten_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Amin.test_data)
+@common.SkipIfNoModelConverter
+def test_amin_vgf_INT(test_data: Amin.input_t):
+ data, dim, keep_dims = test_data()
+ pipeline = VgfPipeline[Amin.input_t](
+ Amin(dim, keep_dims),
+ data,
+ Amin.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Min.test_data)
+@common.SkipIfNoModelConverter
+def test_min_dim_vgf_FP_to_amin(test_data: Min.input_t):
+ data, dim = test_data()
+ pipeline = VgfPipeline[Min.input_t](
+ Min(dim),
+ data,
+ "torch.ops.aten.min",
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Min.test_data)
+@common.SkipIfNoModelConverter
+def test_min_dim_vgf_INT_to_amin(test_data: Min.input_t):
+ data, dim = test_data()
+ pipeline = VgfPipeline[Min.input_t](
+ Min(dim),
+ data,
+ "torch.ops.aten.amin",
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_any.py b/backends/arm/test/ops/test_any.py
index 338c5f05cc6..ae738480048 100644
--- a/backends/arm/test/ops/test_any.py
+++ b/backends/arm/test/ops/test_any.py
@@ -9,10 +9,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -122,9 +123,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data)
-def test_any_tosa_MI(test_data: input_t1):
+def test_any_tosa_FP(test_data: input_t1):
op, test_input = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
op,
test_input(),
op.aten_op,
@@ -137,9 +138,9 @@ def test_any_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", test_data)
-def test_any_tosa_BI(test_data: input_t1):
+def test_any_tosa_INT(test_data: input_t1):
op, test_input = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
op,
test_input(),
op.aten_op,
@@ -154,7 +155,7 @@ def test_any_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", test_data)
-def test_any_u55_BI(test_data: input_t1):
+def test_any_u55_INT(test_data: input_t1):
# Tests that we don't delegate these ops since they are not supported on U55.
op, test_input = test_data()
pipeline = OpNotSupportedPipeline[input_t1](
@@ -169,9 +170,9 @@ def test_any_u55_BI(test_data: input_t1):
@common.parametrize("test_data", test_data)
@common.XfailIfNoCorstone320
-def test_any_u85_BI(test_data: input_t1):
+def test_any_u85_INT(test_data: input_t1):
op, test_input = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
op,
test_input(),
op.aten_op,
@@ -184,3 +185,33 @@ def test_any_u85_BI(test_data: input_t1):
pipeline.pop_stage("quantize")
pipeline.pop_stage("check.quant_nodes")
pipeline.run()
+
+
+@common.parametrize("test_data", test_data)
+@common.SkipIfNoModelConverter
+def test_any_vgf_FP(test_data: input_t1):
+ op, data_fn = test_data()
+ pipeline = VgfPipeline[input_t1](
+ op,
+ data_fn(),
+ op.aten_op,
+ op.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data)
+@common.SkipIfNoModelConverter
+def test_any_vgf_INT(test_data: input_t1):
+ op, data_fn = test_data()
+ pipeline = VgfPipeline[input_t1](
+ op,
+ data_fn(),
+ op.aten_op,
+ op.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_arange.py b/backends/arm/test/ops/test_arange.py
index dc2a6cefa12..ede00768f52 100644
--- a/backends/arm/test/ops/test_arange.py
+++ b/backends/arm/test/ops/test_arange.py
@@ -10,10 +10,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = tuple[torch.Tensor]
@@ -53,9 +54,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", ArangeAdd.test_data)
-def test_arange_start_step_tosa_MI(test_data: test_data_t):
+def test_arange_start_step_tosa_FP(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
ArangeAdd(*init_data),
input_data(),
ArangeAdd.aten_op,
@@ -65,9 +66,9 @@ def test_arange_start_step_tosa_MI(test_data: test_data_t):
@common.parametrize("test_data", ArangeAdd.test_data_dtypes)
-def test_arange_start_step_tosa_MI_dtypes(test_data: test_data_t):
+def test_arange_start_step_tosa_FP_dtypes(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
ArangeAdd(*init_data),
input_data(),
ArangeAdd.aten_op,
@@ -77,9 +78,9 @@ def test_arange_start_step_tosa_MI_dtypes(test_data: test_data_t):
@common.parametrize("test_data", ArangeAdd.test_data)
-def test_arange_start_step_tosa_BI(test_data: test_data_t):
+def test_arange_start_step_tosa_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
ArangeAdd(*init_data),
input_data(),
ArangeAdd.aten_op,
@@ -91,9 +92,9 @@ def test_arange_start_step_tosa_BI(test_data: test_data_t):
@common.parametrize("test_data", ArangeAdd.test_data)
@common.XfailIfNoCorstone300
-def test_arange_start_step_u55_BI(test_data: test_data_t):
+def test_arange_start_step_u55_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
ArangeAdd(*init_data),
input_data(),
ArangeAdd.aten_op,
@@ -104,9 +105,9 @@ def test_arange_start_step_u55_BI(test_data: test_data_t):
@common.parametrize("test_data", ArangeAdd.test_data)
@common.XfailIfNoCorstone320
-def test_arange_start_step_u85_BI(test_data: test_data_t):
+def test_arange_start_step_u85_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
ArangeAdd(*init_data),
input_data(),
ArangeAdd.aten_op,
@@ -115,6 +116,36 @@ def test_arange_start_step_u85_BI(test_data: test_data_t):
pipeline.run()
+@common.parametrize("test_data", ArangeAdd.test_data)
+@common.SkipIfNoModelConverter
+def test_arange_start_step_vgf_FP(test_data: test_data_t):
+ input_data, init_data = test_data
+ module = ArangeAdd(*init_data)
+ pipeline = VgfPipeline[input_t](
+ module,
+ input_data(),
+ module.aten_op,
+ module.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", ArangeAdd.test_data)
+@common.SkipIfNoModelConverter
+def test_arange_start_step_vgf_INT(test_data: test_data_t):
+ input_data, init_data = test_data
+ module = ArangeAdd(*init_data)
+ pipeline = VgfPipeline[input_t](
+ module,
+ input_data(),
+ module.aten_op,
+ module.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
class LinspaceAdd(torch.nn.Module):
aten_op: str = "torch.ops.aten.linspace.default"
exir_op: str = "executorch_exir_dialects_edge__ops_aten_arange_default"
@@ -134,9 +165,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", LinspaceAdd.test_data)
-def test_linspace_tosa_MI(test_data):
+def test_linspace_tosa_FP(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
LinspaceAdd(*init_data),
input_data(),
LinspaceAdd.aten_op,
@@ -146,15 +177,42 @@ def test_linspace_tosa_MI(test_data):
@common.parametrize("test_data", LinspaceAdd.test_data)
-def test_linspace_tosa_BI(test_data: test_data_t):
+def test_linspace_tosa_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
LinspaceAdd(*init_data),
input_data(),
LinspaceAdd.aten_op,
LinspaceAdd.exir_op,
)
- pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+@common.parametrize("test_data", LinspaceAdd.test_data)
+@common.SkipIfNoModelConverter
+def test_linspace_vgf_FP(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ LinspaceAdd(*init_data),
+ input_data(),
+ LinspaceAdd.aten_op,
+ LinspaceAdd.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", LinspaceAdd.test_data)
+@common.SkipIfNoModelConverter
+def test_linspace_vgf_INT(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ LinspaceAdd(*init_data),
+ input_data(),
+ LinspaceAdd.aten_op,
+ LinspaceAdd.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
pipeline.run()
@@ -162,20 +220,30 @@ def test_linspace_tosa_BI(test_data: test_data_t):
@pytest.mark.skip(reason=skip_str)
-def test_arange_tosa_MI():
+def test_arange_tosa_FP():
+ pass
+
+
+@pytest.mark.skip(reason=skip_str)
+def test_arange_tosa_INT():
+ pass
+
+
+@pytest.mark.skip(reason=skip_str)
+def test_arange_u55_INT():
pass
@pytest.mark.skip(reason=skip_str)
-def test_arange_tosa_BI():
+def test_arange_u85_INT():
pass
@pytest.mark.skip(reason=skip_str)
-def test_arange_u55_BI():
+def test_arange_vgf_FP():
pass
@pytest.mark.skip(reason=skip_str)
-def test_arange_u85_BI():
+def test_arange_vgf_INT():
pass
diff --git a/backends/arm/test/ops/test_asin.py b/backends/arm/test/ops/test_asin.py
index ccb1b3bfc30..9c37bddbd92 100644
--- a/backends/arm/test/ops/test_asin.py
+++ b/backends/arm/test/ops/test_asin.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = Tuple[torch.Tensor] # Input x
@@ -37,8 +38,8 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_asin_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t](
+def test_asin_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t](
Asin(),
(test_data(),),
aten_op,
@@ -48,8 +49,8 @@ def test_asin_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_asin_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t](
+def test_asin_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t](
Asin(),
(test_data(),),
aten_op=[],
@@ -60,8 +61,8 @@ def test_asin_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_asin_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t](
+def test_asin_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t](
Asin(),
(test_data(),),
aten_ops=[],
@@ -71,10 +72,34 @@ def test_asin_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_asin_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t](
+def test_asin_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t](
Asin(),
(test_data(),),
aten_ops=[],
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_asin_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t](
+ Asin(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_asin_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t](
+ Asin(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_asinh.py b/backends/arm/test/ops/test_asinh.py
new file mode 100644
index 00000000000..305c822601c
--- /dev/null
+++ b/backends/arm/test/ops/test_asinh.py
@@ -0,0 +1,104 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+input_t = Tuple[torch.Tensor] # Input x
+aten_op = "torch.ops.aten.asinh.default"
+
+test_data_suite = {
+ "zeros": lambda: torch.zeros(1, 5, 3, 2),
+ "ones": lambda: torch.ones(10, 10, 10),
+ "neg_ones": lambda: -torch.ones(10, 10, 10),
+ "rand": lambda: (torch.rand(10, 10) - 0.5) * 20,
+ "ramp": lambda: torch.linspace(-10.0, 10.0, steps=160),
+ "near_zero": lambda: torch.tensor([-1e-6, 0.0, 1e-6]),
+ "large": lambda: torch.tensor([-100.0, -10.0, 0.0, 10.0, 100.0]),
+ "rand_4d": lambda: torch.randn(1, 3, 4, 5),
+}
+
+
+class Asinh(torch.nn.Module):
+ def forward(self, x):
+ return torch.asinh(x)
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_asinh_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t](
+ Asinh(),
+ (test_data(),),
+ aten_op,
+ exir_op=[],
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_asinh_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t](
+ Asinh(),
+ (test_data(),),
+ aten_op=[],
+ exir_op=[],
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.XfailIfNoCorstone300
+def test_asinh_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t](
+ Asinh(),
+ (test_data(),),
+ aten_ops=[],
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.XfailIfNoCorstone320
+def test_asinh_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t](
+ Asinh(),
+ (test_data(),),
+ aten_ops=[],
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_asinh_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t](
+ Asinh(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_asinh_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t](
+ Asinh(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_at.py b/backends/arm/test/ops/test_at.py
index 3d2f5ef7cf2..b8a20760820 100644
--- a/backends/arm/test/ops/test_at.py
+++ b/backends/arm/test/ops/test_at.py
@@ -8,8 +8,9 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op_mm = "torch.ops.aten.matmul.default"
@@ -78,56 +79,56 @@ def forward(self, x1: torch.Tensor, x2: torch.Tensor, x3: torch.Tensor):
@common.parametrize("test_data", AtMatMulSingleInput.test_data_generators)
-def test_atmatmul_single_input_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_atmatmul_single_input_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
AtMatMulSingleInput(), test_data(), aten_op_mm, exir_op_mm
)
pipeline.run()
@common.parametrize("test_data", AtMatMulDoubleInput.test_data_generators)
-def test_atmatmul_double_input_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_atmatmul_double_input_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
AtMatMulDoubleInput(), test_data(), aten_op_mm, exir_op_mm
)
pipeline.run()
@common.parametrize("test_data", AtMatMulMixedPattern1.test_data_generators)
-def test_atmatmul_mixed_pattern1_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_atmatmul_mixed_pattern1_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
AtMatMulMixedPattern1(), test_data(), aten_op_mm, exir_op_mm
)
pipeline.run()
@common.parametrize("test_data", AtMatMulMixedPattern2.test_data_generators)
-def test_atmatmul_mixed_pattern2_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_atmatmul_mixed_pattern2_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
AtMatMulMixedPattern2(), test_data(), aten_op_mm, exir_op_mm
)
pipeline.run()
@common.parametrize("test_data", AtMatMulSingleInput.test_data_generators)
-def test_atmatmul_single_input_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_atmatmul_single_input_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
AtMatMulSingleInput(), test_data(), aten_op_mm, exir_op_mm
)
pipeline.run()
@common.parametrize("test_data", AtMatMulDoubleInput.test_data_generators)
-def test_atmatmul_double_input_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_atmatmul_double_input_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
AtMatMulDoubleInput(), test_data(), aten_op_mm, exir_op_mm
)
pipeline.run()
@common.parametrize("test_data", AtMatMulMixedPattern1.test_data_generators)
-def test_atmatmul_mixed_pattern1_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_atmatmul_mixed_pattern1_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
AtMatMulMixedPattern1(),
test_data(),
aten_op_mm,
@@ -138,8 +139,8 @@ def test_atmatmul_mixed_pattern1_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", AtMatMulMixedPattern2.test_data_generators)
-def test_atmatmul_mixed_pattern2_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_atmatmul_mixed_pattern2_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
AtMatMulMixedPattern2(),
test_data(),
aten_op_mm,
@@ -147,3 +148,109 @@ def test_atmatmul_mixed_pattern2_tosa_BI(test_data: input_t1):
qtol=1,
)
pipeline.run()
+
+
+@common.parametrize("test_data", AtMatMulSingleInput.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_atmatmul_single_input_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AtMatMulSingleInput(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AtMatMulDoubleInput.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_atmatmul_double_input_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AtMatMulDoubleInput(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AtMatMulMixedPattern1.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_atmatmul_mixed_pattern1_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AtMatMulMixedPattern1(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AtMatMulMixedPattern2.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_atmatmul_mixed_pattern2_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AtMatMulMixedPattern2(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AtMatMulSingleInput.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_atmatmul_single_input_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AtMatMulSingleInput(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AtMatMulDoubleInput.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_atmatmul_double_input_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AtMatMulDoubleInput(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AtMatMulMixedPattern1.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_atmatmul_mixed_pattern1_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AtMatMulMixedPattern1(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ qtol=1,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AtMatMulMixedPattern2.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_atmatmul_mixed_pattern2_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ AtMatMulMixedPattern2(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ qtol=1,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_atan.py b/backends/arm/test/ops/test_atan.py
index 3d6f8cd8fa8..51114d2800f 100644
--- a/backends/arm/test/ops/test_atan.py
+++ b/backends/arm/test/ops/test_atan.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.atan.default"
@@ -39,8 +40,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_atan_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_atan_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Atan(),
(test_data,),
aten_op=aten_op,
@@ -50,8 +51,8 @@ def test_atan_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_atan_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_atan_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Atan(),
(test_data,),
aten_op=aten_op,
@@ -62,8 +63,8 @@ def test_atan_tosa_BI(test_data: Tuple):
@common.XfailIfNoCorstone300
@common.parametrize("test_data", test_data_suite)
-def test_atan_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_atan_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Atan(),
(test_data,),
aten_ops=aten_op,
@@ -74,11 +75,37 @@ def test_atan_u55_BI(test_data: Tuple):
@common.XfailIfNoCorstone320
@common.parametrize("test_data", test_data_suite)
-def test_atan_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_atan_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Atan(),
(test_data,),
aten_ops=aten_op,
exir_ops=exir_op,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_atan_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Atan(),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_atan_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Atan(),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_atanh.py b/backends/arm/test/ops/test_atanh.py
index 446e6ee311a..12754a34646 100644
--- a/backends/arm/test/ops/test_atanh.py
+++ b/backends/arm/test/ops/test_atanh.py
@@ -9,10 +9,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.atanh.default"
@@ -40,8 +41,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_atanh_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_atanh_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Atanh(),
(test_data,),
aten_op=aten_op,
@@ -51,8 +52,8 @@ def test_atanh_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_atanh_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_atanh_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Atanh(),
(test_data,),
aten_op=aten_op,
@@ -63,8 +64,8 @@ def test_atanh_tosa_BI(test_data: Tuple):
@common.XfailIfNoCorstone300
@common.parametrize("test_data", test_data_suite)
-def test_atanh_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_atanh_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Atanh(),
(test_data,),
aten_ops=aten_op,
@@ -75,11 +76,37 @@ def test_atanh_u55_BI(test_data: Tuple):
@common.XfailIfNoCorstone320
@common.parametrize("test_data", test_data_suite)
-def test_atanh_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_atanh_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Atanh(),
(test_data,),
aten_ops=aten_op,
exir_ops=exir_op,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_atanh_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Atanh(),
+ (test_data,),
+ aten_op=aten_op,
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_atanh_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Atanh(),
+ (test_data,),
+ aten_op=aten_op,
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_avg_pool2d.py b/backends/arm/test/ops/test_avg_pool2d.py
index d1bce608156..be54c76e68b 100644
--- a/backends/arm/test/ops/test_avg_pool2d.py
+++ b/backends/arm/test/ops/test_avg_pool2d.py
@@ -15,11 +15,12 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.avg_pool2d.default"
@@ -113,10 +114,10 @@ def forward(self, *args, **kwargs):
@common.parametrize("test_module", test_modules)
-def test_avg_pool2d_tosa_MI(test_module):
+def test_avg_pool2d_tosa_FP(test_module):
model, input_tensor = test_module()
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
model,
input_tensor,
aten_op,
@@ -127,10 +128,10 @@ def test_avg_pool2d_tosa_MI(test_module):
@common.parametrize("test_module", test_modules)
-def test_avg_pool2d_tosa_BI(test_module):
+def test_avg_pool2d_tosa_INT(test_module):
model, input_tensor = test_module()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
input_tensor,
aten_op,
@@ -142,10 +143,10 @@ def test_avg_pool2d_tosa_BI(test_module):
@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone300
-def test_avg_pool2d_u55_BI(test_module):
+def test_avg_pool2d_u55_INT(test_module):
model, input_tensor = test_module()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
input_tensor,
aten_op,
@@ -157,10 +158,10 @@ def test_avg_pool2d_u55_BI(test_module):
@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone320
-def test_avg_pool2d_u85_BI(test_module):
+def test_avg_pool2d_u85_INT(test_module):
model, input_tensor = test_module()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
input_tensor,
aten_op,
@@ -170,6 +171,34 @@ def test_avg_pool2d_u85_BI(test_module):
pipeline.run()
+@common.parametrize("test_module", test_modules)
+@common.SkipIfNoModelConverter
+def test_avg_pool2d_vgf_FP(test_module):
+ model, input_tensor = test_module()
+ pipeline = VgfPipeline[input_t](
+ model,
+ input_tensor,
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.SkipIfNoModelConverter
+def test_avg_pool2d_vgf_INT(test_module):
+ model, input_tensor = test_module()
+ pipeline = VgfPipeline[input_t](
+ model,
+ input_tensor,
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
reject_modules = {
"kernel_1x1_stride_1_pad_0": lambda: (AvgPool2d(1, 1, 0), torch.rand(2, 5, 5, 5)),
"kernel_2x9_stride_1_pad_1": lambda: (
@@ -192,7 +221,7 @@ def test_avg_pool2d_u85_BI(test_module):
@common.parametrize("reject_module", reject_modules)
-def test_avg_pool2d_u55_BI_not_delegated(reject_module):
+def test_avg_pool2d_u55_INT_not_delegated(reject_module):
model, test_data = reject_module()
diff --git a/backends/arm/test/ops/test_batch_norm.py b/backends/arm/test/ops/test_batch_norm.py
index eb0d4306e6e..a28180b7b57 100644
--- a/backends/arm/test/ops/test_batch_norm.py
+++ b/backends/arm/test/ops/test_batch_norm.py
@@ -13,11 +13,12 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor] # Input x
@@ -76,9 +77,9 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_native_batch_norm_legit_no_training_tosa_MI(test_data: Tuple):
+def test_native_batch_norm_legit_no_training_tosa_FP(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
BatchNorm2d(*model_params),
(test_data,),
aten_op=BatchNorm2d.aten_op,
@@ -87,7 +88,7 @@ def test_native_batch_norm_legit_no_training_tosa_MI(test_data: Tuple):
# TODO(MLETORCH-100: Quantized stand-alone batch norms)
-def test_native_batch_norm_legit_no_training_tosa_BI_not_delegated():
+def test_native_batch_norm_legit_no_training_tosa_INT_not_delegated():
test_data, model_params = test_data_suite["rand_1_3_254_254"]()
OpNotSupportedPipeline[input_t1](
BatchNorm2d(*model_params),
@@ -99,8 +100,28 @@ def test_native_batch_norm_legit_no_training_tosa_BI_not_delegated():
).run()
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_native_batch_norm_legit_no_training_vgf_FP(test_data: Tuple):
+ inp, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ BatchNorm2d(*model_params),
+ (inp,),
+ aten_op=BatchNorm2d.aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_native_batch_norm_legit_no_training_vgf_INT(test_data: Tuple):
+ # TODO(MLETORCH-100: Quantized stand-alone batch norms)
+ pass
+
+
# TODO(MLETORCH-100: Quantized stand-alone batch norms)
-def test_native_batch_norm_legit_no_training_u55_BI_not_delegated():
+def test_native_batch_norm_legit_no_training_u55_INT_not_delegated():
test_data, model_params = test_data_suite["rand_1_3_254_254"]()
OpNotSupportedPipeline[input_t1](
BatchNorm2d(*model_params),
@@ -114,7 +135,7 @@ def test_native_batch_norm_legit_no_training_u55_BI_not_delegated():
# TODO(MLETORCH-100: Quantized stand-alone batch norms)
-def test_native_batch_norm_legit_no_training_u85_BI_not_delegated():
+def test_native_batch_norm_legit_no_training_u85_INT_not_delegated():
test_data, model_params = test_data_suite["rand_1_3_254_254"]()
OpNotSupportedPipeline[input_t1](
BatchNorm2d(*model_params),
@@ -169,9 +190,9 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_native_batch_norm_legit_no_training_tosa_MI_conv(test_data: Tuple):
+def test_native_batch_norm_legit_no_training_tosa_FP_conv(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
BatchNorm2dConv(*model_params),
(test_data,),
aten_op=BatchNorm2dConv.aten_ops,
@@ -180,9 +201,9 @@ def test_native_batch_norm_legit_no_training_tosa_MI_conv(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_native_batch_norm_legit_no_training_tosa_BI_conv(test_data: Tuple):
+def test_native_batch_norm_legit_no_training_tosa_INT_conv(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
BatchNorm2dConv(*model_params),
(test_data,),
aten_op=BatchNorm2dConv.aten_ops[0], # Bn is removed before check
@@ -193,9 +214,9 @@ def test_native_batch_norm_legit_no_training_tosa_BI_conv(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_native_batch_norm_legit_no_training_u55_BI_conv(test_data: Tuple):
+def test_native_batch_norm_legit_no_training_u55_INT_conv(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
BatchNorm2dConv(*model_params),
(test_data,),
aten_ops=BatchNorm2dConv.aten_ops[0], # Bn is removed before check
@@ -207,9 +228,9 @@ def test_native_batch_norm_legit_no_training_u55_BI_conv(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_native_batch_norm_legit_no_training_u85_BI_conv(test_data: Tuple):
+def test_native_batch_norm_legit_no_training_u85_INT_conv(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
BatchNorm2dConv(*model_params),
(test_data,),
aten_ops=BatchNorm2dConv.aten_ops[0], # Bn is removed before check
@@ -219,6 +240,33 @@ def test_native_batch_norm_legit_no_training_u85_BI_conv(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_native_batch_norm_legit_no_training_vgf_FP_conv(test_data: Tuple):
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ BatchNorm2dConv(*model_params),
+ (test_data,),
+ aten_op=BatchNorm2dConv.aten_ops,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_native_batch_norm_legit_no_training_vgf_INT_conv(test_data: Tuple):
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ BatchNorm2dConv(*model_params),
+ (test_data,),
+ aten_op=BatchNorm2dConv.aten_ops[0], # Bn is removed before check
+ qtol=1,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
class BatchNorm2dNoStats(torch.nn.Module):
"""
Decomposes into _native_batch_norm_legit.no_stats
@@ -253,9 +301,9 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_native_batch_norm_legit_no_stats_tosa_MI(test_data: Tuple):
+def test_native_batch_norm_legit_no_stats_tosa_FP(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
BatchNorm2dNoStats(*model_params),
(test_data,),
aten_op=BatchNorm2dNoStats.aten_ops,
@@ -266,9 +314,9 @@ def test_native_batch_norm_legit_no_stats_tosa_MI(test_data: Tuple):
@pytest.mark.skip(
reason="MLETORCH-999: Add support for _native_batch_norm_legit.no_stats."
)
-def test_native_batch_norm_legit_no_stats_tosa_BI(test_data: Tuple):
+def test_native_batch_norm_legit_no_stats_tosa_INT(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
BatchNorm2dNoStats(*model_params),
(test_data,),
aten_op=BatchNorm2dNoStats.aten_ops,
@@ -282,9 +330,9 @@ def test_native_batch_norm_legit_no_stats_tosa_BI(test_data: Tuple):
)
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_native_batch_norm_legit_no_stats_u55_BI(test_data: Tuple):
+def test_native_batch_norm_legit_no_stats_u55_INT(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
BatchNorm2dNoStats(*model_params),
(test_data,),
aten_op=BatchNorm2dNoStats.aten_ops,
@@ -299,9 +347,9 @@ def test_native_batch_norm_legit_no_stats_u55_BI(test_data: Tuple):
)
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_native_batch_norm_legit_no_stats_u85_BI(test_data: Tuple):
+def test_native_batch_norm_legit_no_stats_u85_INT(test_data: Tuple):
test_data, model_params = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
BatchNorm2dNoStats(*model_params),
(test_data,),
aten_op=BatchNorm2dNoStats.aten_ops,
@@ -309,3 +357,33 @@ def test_native_batch_norm_legit_no_stats_u85_BI(test_data: Tuple):
qtol=1,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_native_batch_norm_legit_no_stats_vgf_FP(test_data: Tuple):
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ BatchNorm2dNoStats(*model_params),
+ (test_data,),
+ aten_op=BatchNorm2dNoStats.aten_ops,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@pytest.mark.skip(
+ reason="MLETORCH-999: Add support for _native_batch_norm_legit.no_stats."
+)
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_native_batch_norm_legit_no_stats_vgf_INT(test_data: Tuple):
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ BatchNorm2dNoStats(*model_params),
+ (test_data,),
+ aten_op=BatchNorm2dNoStats.aten_ops,
+ qtol=1,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_bitwise.py b/backends/arm/test/ops/test_bitwise.py
index d29ea7c91f2..1c0f0e36a6a 100644
--- a/backends/arm/test/ops/test_bitwise.py
+++ b/backends/arm/test/ops/test_bitwise.py
@@ -9,10 +9,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -128,12 +129,14 @@ def forward(self, tensor: torch.Tensor, scalar: int):
return tensor.bitwise_or(scalar)
-# Bitwise AND
+#########
+## AND ##
+#########
@common.parametrize("test_data", And().test_data)
-def test_bitwise_and_tensor_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_bitwise_and_tensor_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
And(),
test_data(),
And().aten_op,
@@ -146,8 +149,8 @@ def test_bitwise_and_tensor_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", AndScalar.test_data)
-def test_bitwise_and_scalar_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_bitwise_and_scalar_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
AndScalar(),
test_data(),
AndScalar.aten_op,
@@ -160,8 +163,8 @@ def test_bitwise_and_scalar_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", And().test_data)
-def test_bitwise_and_tensor_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_bitwise_and_tensor_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
And(),
test_data(),
And().aten_op,
@@ -176,8 +179,8 @@ def test_bitwise_and_tensor_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", AndScalar.test_data)
-def test_bitwise_and_scalar_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_bitwise_and_scalar_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
AndScalar(),
test_data(),
AndScalar.aten_op,
@@ -192,7 +195,7 @@ def test_bitwise_and_scalar_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", And().test_data)
-def test_bitwise_and_tensor_u55_BI(test_data: input_t2):
+def test_bitwise_and_tensor_u55_INT(test_data: input_t2):
# Tests that we don't delegate these ops since they are not supported on U55.
pipeline = OpNotSupportedPipeline[input_t2](
And(),
@@ -205,7 +208,7 @@ def test_bitwise_and_tensor_u55_BI(test_data: input_t2):
@common.parametrize("test_data", AndScalar.test_data)
-def test_bitwise_and_scalar_u55_BI(test_data: input_t2):
+def test_bitwise_and_scalar_u55_INT(test_data: input_t2):
# There will be one full op which will be delegated.
num_delegates = 1
num_exir = 0
@@ -225,8 +228,8 @@ def test_bitwise_and_scalar_u55_BI(test_data: input_t2):
@common.parametrize("test_data", AndScalar.test_data)
@common.XfailIfNoCorstone320
-def test_bitwise_and_scalar_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_bitwise_and_scalar_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
AndScalar(),
test_data(),
AndScalar.aten_op,
@@ -243,8 +246,8 @@ def test_bitwise_and_scalar_u85_BI(test_data: input_t2):
@common.parametrize("test_data", And().test_data)
@common.XfailIfNoCorstone320
-def test_bitwise_and_tensor_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_bitwise_and_tensor_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
And(),
test_data(),
And().aten_op,
@@ -259,9 +262,82 @@ def test_bitwise_and_tensor_u85_BI(test_data: input_t2):
pipeline.run()
+@common.parametrize("test_data", And().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_and_tensor_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ And(),
+ test_data(),
+ And().aten_op,
+ And().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AndScalar().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_and_scalar_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ AndScalar(),
+ test_data(),
+ AndScalar().aten_op,
+ AndScalar().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", And().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_and_tensor_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ And(),
+ test_data(),
+ And().aten_op,
+ And().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+@common.parametrize("test_data", AndScalar().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_and_scalar_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ AndScalar(),
+ test_data(),
+ AndScalar().aten_op,
+ AndScalar().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+#########
+## XOR ##
+#########
+
+
@common.parametrize("test_data", Xor().test_data)
-def test_bitwise_xor_tensor_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_bitwise_xor_tensor_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
Xor(),
test_data(),
Xor().aten_op,
@@ -274,8 +350,8 @@ def test_bitwise_xor_tensor_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", XorScalar.test_data)
-def test_bitwise_xor_scalar_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_bitwise_xor_scalar_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
XorScalar(),
test_data(),
XorScalar.aten_op,
@@ -288,8 +364,8 @@ def test_bitwise_xor_scalar_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", Xor().test_data)
-def test_bitwise_xor_tensor_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_bitwise_xor_tensor_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
Xor(),
test_data(),
Xor().aten_op,
@@ -304,8 +380,8 @@ def test_bitwise_xor_tensor_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", XorScalar.test_data)
-def test_bitwise_xor_scalar_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_bitwise_xor_scalar_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
XorScalar(),
test_data(),
XorScalar.aten_op,
@@ -320,7 +396,7 @@ def test_bitwise_xor_scalar_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", Xor().test_data)
-def test_bitwise_xor_tensor_u55_BI(test_data: input_t2):
+def test_bitwise_xor_tensor_u55_INT(test_data: input_t2):
# Tests that we don't delegate these ops since they are not supported on U55.
pipeline = OpNotSupportedPipeline[input_t2](
Xor(),
@@ -333,7 +409,7 @@ def test_bitwise_xor_tensor_u55_BI(test_data: input_t2):
@common.parametrize("test_data", XorScalar.test_data)
-def test_bitwise_xor_scalar_u55_BI(test_data: input_t2):
+def test_bitwise_xor_scalar_u55_INT(test_data: input_t2):
# There will be one full op which will be delegated.
num_delegates = 1
num_exir = 0
@@ -353,8 +429,8 @@ def test_bitwise_xor_scalar_u55_BI(test_data: input_t2):
@common.parametrize("test_data", Xor().test_data)
@common.XfailIfNoCorstone320
-def test_bitwise_xor_tensor_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_bitwise_xor_tensor_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
Xor(),
test_data(),
Xor().aten_op,
@@ -371,8 +447,8 @@ def test_bitwise_xor_tensor_u85_BI(test_data: input_t2):
@common.parametrize("test_data", XorScalar.test_data)
@common.XfailIfNoCorstone320
-def test_bitwise_xor_scalar_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_bitwise_xor_scalar_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
XorScalar(),
test_data(),
XorScalar.aten_op,
@@ -387,9 +463,82 @@ def test_bitwise_xor_scalar_u85_BI(test_data: input_t2):
pipeline.run()
+@common.parametrize("test_data", Xor().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_xor_tensor_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Xor(),
+ test_data(),
+ Xor().aten_op,
+ Xor().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", XorScalar().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_xor_scalar_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ XorScalar(),
+ test_data(),
+ XorScalar().aten_op,
+ XorScalar().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Xor().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_xor_tensor_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Xor(),
+ test_data(),
+ Xor().aten_op,
+ Xor().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+@common.parametrize("test_data", XorScalar().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_xor_scalar_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ XorScalar(),
+ test_data(),
+ XorScalar().aten_op,
+ XorScalar().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+########
+## OR ##
+########
+
+
@common.parametrize("test_data", Or().test_data)
-def test_bitwise_or_tensor_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_bitwise_or_tensor_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
Or(),
test_data(),
Or().aten_op,
@@ -402,8 +551,8 @@ def test_bitwise_or_tensor_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", OrScalar.test_data)
-def test_bitwise_or_scalar_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_bitwise_or_scalar_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
OrScalar(),
test_data(),
OrScalar.aten_op,
@@ -416,8 +565,8 @@ def test_bitwise_or_scalar_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", Or().test_data)
-def test_bitwise_or_tensor_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_bitwise_or_tensor_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
Or(),
test_data(),
Or().aten_op,
@@ -432,8 +581,8 @@ def test_bitwise_or_tensor_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", OrScalar.test_data)
-def test_bitwise_or_scalar_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_bitwise_or_scalar_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
OrScalar(),
test_data(),
OrScalar.aten_op,
@@ -448,7 +597,7 @@ def test_bitwise_or_scalar_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", Or().test_data)
-def test_bitwise_or_tensor_u55_BI(test_data: input_t2):
+def test_bitwise_or_tensor_u55_INT(test_data: input_t2):
# Tests that we don't delegate these ops since they are not supported on U55.
pipeline = OpNotSupportedPipeline[input_t2](
Or(),
@@ -461,7 +610,7 @@ def test_bitwise_or_tensor_u55_BI(test_data: input_t2):
@common.parametrize("test_data", OrScalar.test_data)
-def test_bitwise_or_scalar_u55_BI(test_data: input_t2):
+def test_bitwise_or_scalar_u55_INT(test_data: input_t2):
# There will be one full op which will be delegated.
num_delegates = 1
num_exir = 0
@@ -481,8 +630,8 @@ def test_bitwise_or_scalar_u55_BI(test_data: input_t2):
@common.parametrize("test_data", Or().test_data)
@common.XfailIfNoCorstone320
-def test_bitwise_or_tensor_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_bitwise_or_tensor_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
Or(),
test_data(),
Or().aten_op,
@@ -499,8 +648,8 @@ def test_bitwise_or_tensor_u85_BI(test_data: input_t2):
@common.parametrize("test_data", OrScalar.test_data)
@common.XfailIfNoCorstone320
-def test_bitwise_or_scalar_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_bitwise_or_scalar_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
OrScalar(),
test_data(),
OrScalar.aten_op,
@@ -513,3 +662,71 @@ def test_bitwise_or_scalar_u85_BI(test_data: input_t2):
pipeline.pop_stage("quantize")
pipeline.pop_stage("check.quant_nodes")
pipeline.run()
+
+
+@common.parametrize("test_data", Or().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_or_tensor_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Or(),
+ test_data(),
+ Or().aten_op,
+ Or().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", OrScalar().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_or_scalar_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ OrScalar(),
+ test_data(),
+ OrScalar().aten_op,
+ OrScalar().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Or().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_or_tensor_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Or(),
+ test_data(),
+ Or().aten_op,
+ Or().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+@common.parametrize("test_data", OrScalar().test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_or_scalar_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ OrScalar(),
+ test_data(),
+ OrScalar().aten_op,
+ OrScalar().exir_op,
+ atol=0,
+ rtol=0,
+ qtol=0,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_bmm.py b/backends/arm/test/ops/test_bmm.py
index 6b66abbda01..7c0fc1665bb 100644
--- a/backends/arm/test/ops/test_bmm.py
+++ b/backends/arm/test/ops/test_bmm.py
@@ -13,10 +13,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op_bmm = "torch.ops.aten.bmm.default"
@@ -57,31 +58,31 @@ def forward(self, x):
@common.parametrize("test_data", BMM.test_data_generators)
-def test_bmm_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](BMM(), test_data(), aten_op_bmm, exir_op_bmm)
+def test_bmm_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](BMM(), test_data(), aten_op_bmm, exir_op_bmm)
pipeline.run()
@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLETORCH-534)
@common.parametrize("test_data", BMMSingleInput.test_data_generators)
-def test_bmm_tosa_MI_single_input(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_bmm_tosa_FP_single_input(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
BMMSingleInput(), test_data(), aten_op_bmm, exir_op_bmm
)
pipeline.run()
@common.parametrize("test_data", BMM.test_data_generators)
-def test_bmm_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_bmm_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
BMM(), test_data(), aten_op_bmm, exir_op_bmm, qtol=1
)
pipeline.run()
@common.parametrize("test_data", BMMSingleInput.test_data_generators)
-def test_bmm_tosa_BI_single_input(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_bmm_tosa_INT_single_input(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
BMMSingleInput(), test_data(), aten_op_bmm, exir_op_bmm
)
pipeline.change_args("run_method_and_compare_outputs", qtol=1)
@@ -90,8 +91,8 @@ def test_bmm_tosa_BI_single_input(test_data: input_t1):
@common.parametrize("test_data", BMM.test_data_generators)
@common.XfailIfNoCorstone300
-def test_bmm_u55_BI(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_bmm_u55_INT(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
BMM(),
test_data(),
aten_op_bmm,
@@ -103,8 +104,8 @@ def test_bmm_u55_BI(test_data: input_t1):
@common.parametrize("test_data", BMM.test_data_generators)
@common.XfailIfNoCorstone320
-def test_bmm_u85_BI(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_bmm_u85_INT(test_data: input_t1):
+ pipeline = EthosU85PipelineINT[input_t1](
BMM(),
test_data(),
aten_op_bmm,
@@ -116,8 +117,8 @@ def test_bmm_u85_BI(test_data: input_t1):
@common.parametrize("test_data", BMMSingleInput.test_data_generators)
@common.XfailIfNoCorstone300
-def test_bmm_u55_BI_single_input(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_bmm_u55_INT_single_input(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
BMMSingleInput(),
test_data(),
aten_op_bmm,
@@ -129,8 +130,8 @@ def test_bmm_u55_BI_single_input(test_data: input_t1):
@common.parametrize("test_data", BMMSingleInput.test_data_generators)
@common.XfailIfNoCorstone320
-def test_bmm_u85_BI_single_input(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_bmm_u85_INT_single_input(test_data: input_t1):
+ pipeline = EthosU85PipelineINT[input_t1](
BMMSingleInput(),
test_data(),
aten_op_bmm,
@@ -138,3 +139,53 @@ def test_bmm_u85_BI_single_input(test_data: input_t1):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", BMM.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_bmm_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ BMM(), test_data(), aten_op_bmm, exir_op_bmm, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", BMMSingleInput.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_bmm_vgf_FP_single_input(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ BMMSingleInput(),
+ test_data(),
+ aten_op_bmm,
+ exir_op_bmm,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", BMM.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_bmm_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ BMM(),
+ test_data(),
+ aten_op_bmm,
+ exir_op_bmm,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", BMMSingleInput.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_bmm_vgf_INT_single_input(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ BMMSingleInput(),
+ test_data(),
+ aten_op_bmm,
+ exir_op_bmm,
+ tosa_version="TOSA-1.0+INT",
+ )
+ # TODO: MLETORCH-1136 Change args of run_method_and_compare_outputs of the vgf tests
+ # pipeline.change_args("run_method_and_compare_outputs", qtol=1)
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py
index d5ebd6fe569..826689622fb 100644
--- a/backends/arm/test/ops/test_cat.py
+++ b/backends/arm/test/ops/test_cat.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor] # Input x
@@ -70,8 +71,8 @@ def forward(self, t: tuple[torch.Tensor, ...], dim: int) -> torch.Tensor:
@common.parametrize("test_data", Cat.test_parameters)
-def test_cat_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_cat_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Cat(),
test_data(),
aten_op,
@@ -80,11 +81,11 @@ def test_cat_tosa_MI(test_data: Tuple):
pipeline.run()
-def test_cat_tosa_MI_4d():
+def test_cat_tosa_FP_4d():
square = torch.ones((2, 2, 2, 2))
for dim in range(-3, 3):
test_data = ((square, square.clone()), dim)
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Cat(),
test_data,
aten_op,
@@ -94,8 +95,8 @@ def test_cat_tosa_MI_4d():
@common.parametrize("test_data", Cat.test_parameters)
-def test_cat_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_cat_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Cat(),
test_data(),
aten_op,
@@ -114,8 +115,8 @@ def test_cat_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", Cat.test_parameters, x_fails)
@common.XfailIfNoCorstone300
-def test_cat_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_cat_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Cat(),
test_data(),
aten_op,
@@ -127,8 +128,8 @@ def test_cat_u55_BI(test_data: Tuple):
@common.parametrize("test_data", Cat.test_parameters, x_fails)
@common.XfailIfNoCorstone320
-def test_cat_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_cat_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Cat(),
test_data(),
aten_op,
@@ -136,3 +137,25 @@ def test_cat_u85_BI(test_data: Tuple):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", Cat.test_parameters)
+@common.SkipIfNoModelConverter
+def test_cat_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Cat(), test_data(), aten_op, exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Cat.test_parameters)
+@common.SkipIfNoModelConverter
+def test_cat_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Cat(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_ceil.py b/backends/arm/test/ops/test_ceil.py
index 5235e6f4027..64e9040a974 100644
--- a/backends/arm/test/ops/test_ceil.py
+++ b/backends/arm/test/ops/test_ceil.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor]
@@ -43,9 +44,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data)
-def test_ceil_tosa_MI(test_data: input_t1):
+def test_ceil_tosa_FP(test_data: input_t1):
module, data = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
module,
(data,),
module.aten_op,
@@ -55,9 +56,9 @@ def test_ceil_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", test_data)
-def test_ceil_tosa_BI(test_data: input_t1):
+def test_ceil_tosa_INT(test_data: input_t1):
module, data = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
module,
(data,),
module.aten_op,
@@ -70,9 +71,9 @@ def test_ceil_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", test_data)
@common.XfailIfNoCorstone300
-def test_ceil_u55_BI(test_data: input_t1):
+def test_ceil_u55_INT(test_data: input_t1):
module, data = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
module,
(data,),
module.aten_op,
@@ -84,9 +85,9 @@ def test_ceil_u55_BI(test_data: input_t1):
@common.parametrize("test_data", test_data)
@common.XfailIfNoCorstone320
-def test_ceil_u85_BI(test_data: input_t1):
+def test_ceil_u85_INT(test_data: input_t1):
module, data = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
module,
(data,),
module.aten_op,
@@ -94,3 +95,33 @@ def test_ceil_u85_BI(test_data: input_t1):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data)
+@common.SkipIfNoModelConverter
+def test_ceil_vgf_FP(test_data: input_t1):
+ module, data = test_data()
+ pipeline = VgfPipeline[input_t1](
+ module,
+ (data,),
+ module.aten_op,
+ module.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data)
+@common.SkipIfNoModelConverter
+def test_ceil_vgf_INT(test_data: input_t1):
+ module, data = test_data()
+ pipeline = VgfPipeline[input_t1](
+ module,
+ (data,),
+ module.aten_op,
+ module.exir_op,
+ atol=0.06,
+ rtol=0.01,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_clamp.py b/backends/arm/test/ops/test_clamp.py
index b05e0e08eec..ba490ccc0c6 100644
--- a/backends/arm/test/ops/test_clamp.py
+++ b/backends/arm/test/ops/test_clamp.py
@@ -11,10 +11,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.clamp.default"
@@ -51,12 +52,12 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_clamp_tosa_MI(test_data):
+def test_clamp_tosa_FP(test_data):
input_tensor, min_val, max_val = test_data()
model = Clamp(min_val, max_val)
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
model,
(input_tensor,),
aten_op,
@@ -67,12 +68,12 @@ def test_clamp_tosa_MI(test_data):
@common.parametrize("test_data", test_data_suite)
-def test_clamp_tosa_BI(test_data):
+def test_clamp_tosa_INT(test_data):
input_tensor, min_val, max_val = test_data()
model = Clamp(min_val, max_val)
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
(input_tensor,),
aten_op,
@@ -85,12 +86,12 @@ def test_clamp_tosa_BI(test_data):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_clamp_u55_BI(test_data):
+def test_clamp_u55_INT(test_data):
input_tensor, min_val, max_val = test_data()
model = Clamp(min_val, max_val)
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
(input_tensor,),
aten_op,
@@ -104,12 +105,12 @@ def test_clamp_u55_BI(test_data):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_clamp_u85_BI(test_data):
+def test_clamp_u85_INT(test_data):
input_tensor, min_val, max_val = test_data()
model = Clamp(min_val, max_val)
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
(input_tensor,),
aten_op,
@@ -119,3 +120,35 @@ def test_clamp_u85_BI(test_data):
pipeline.change_args("run_method_and_compare_outputs", qtol=1)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_clamp_vgf_FP(test_data):
+ input_tensor, min_val, max_val = test_data()
+ model = Clamp(min_val, max_val)
+ pipeline = VgfPipeline[input_t](
+ model,
+ (input_tensor,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_clamp_vgf_INT(test_data):
+ input_tensor, min_val, max_val = test_data()
+ model = Clamp(min_val, max_val)
+ pipeline = VgfPipeline[input_t](
+ model,
+ (input_tensor,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ # TODO: MLETORCH-1136 Change args of run_method_and_compare_outputs of the vgf tests
+ # pipeline.change_args("run_method_and_compare_outputs", qtol=1)
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_clone.py b/backends/arm/test/ops/test_clone.py
index 5a754b90934..7a24848697e 100644
--- a/backends/arm/test/ops/test_clone.py
+++ b/backends/arm/test/ops/test_clone.py
@@ -15,10 +15,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.clone.default"
@@ -46,9 +47,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_clone_tosa_MI(test_data: Tuple[torch.Tensor]):
+def test_clone_tosa_FP(test_data: Tuple[torch.Tensor]):
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
Clone(),
test_data(),
aten_op,
@@ -59,8 +60,8 @@ def test_clone_tosa_MI(test_data: Tuple[torch.Tensor]):
@common.parametrize("test_data", test_data_suite)
-def test_clone_tosa_BI(test_data):
- pipeline = TosaPipelineBI[input_t](
+def test_clone_tosa_INT(test_data):
+ pipeline = TosaPipelineINT[input_t](
Clone(),
test_data(),
aten_op,
@@ -74,8 +75,8 @@ def test_clone_tosa_BI(test_data):
@pytest.mark.xfail(
reason="Empty subgraph leads to Vela compilation failure. See: https://jira.arm.com/browse/MLBEDSW-10477"
)
-def test_clone_u55_BI(test_data):
- pipeline = EthosU55PipelineBI[input_t](
+def test_clone_u55_INT(test_data):
+ pipeline = EthosU55PipelineINT[input_t](
Clone(),
test_data(),
aten_op,
@@ -91,8 +92,8 @@ def test_clone_u55_BI(test_data):
@pytest.mark.xfail(
reason="Empty subgraph leads to Vela compilation failure. See: https://jira.arm.com/browse/MLBEDSW-10477"
)
-def test_clone_u85_BI(test_data):
- pipeline = EthosU85PipelineBI[input_t](
+def test_clone_u85_INT(test_data):
+ pipeline = EthosU85PipelineINT[input_t](
Clone(),
test_data(),
aten_op,
@@ -101,3 +102,25 @@ def test_clone_u85_BI(test_data):
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_clone_vgf_FP(test_data):
+ pipeline = VgfPipeline[input_t](
+ Clone(), test_data(), aten_op, exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_clone_vgf_INT(test_data):
+ pipeline = VgfPipeline[input_t](
+ Clone(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_constant_pad_nd.py b/backends/arm/test/ops/test_constant_pad_nd.py
index 0a81fd0f97d..d70249c31d1 100644
--- a/backends/arm/test/ops/test_constant_pad_nd.py
+++ b/backends/arm/test/ops/test_constant_pad_nd.py
@@ -11,8 +11,9 @@
import torch.nn.functional as F
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.pad.default"
@@ -53,9 +54,9 @@ def forward(self, x: torch.Tensor):
"test_data",
test_data_suite,
)
-def test_constant_pad_nd_tosa_MI(test_data: Tuple):
+def test_constant_pad_nd_tosa_FP(test_data: Tuple):
test_data, padding, value = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
ConstantPadND(padding, value),
(test_data,),
aten_op,
@@ -65,12 +66,40 @@ def test_constant_pad_nd_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_constant_pad_nd_tosa_BI(test_data: Tuple):
+def test_constant_pad_nd_tosa_INT(test_data: Tuple):
test_data, padding, value = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
ConstantPadND(padding, value),
(test_data,),
aten_op,
exir_op,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_constant_pad_nd_vgf_FP(test_data: Tuple):
+ inp, padding, value = test_data()
+ pipeline = VgfPipeline[input_t1](
+ ConstantPadND(padding, value),
+ (inp,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_constant_pad_nd_vgf_INT(test_data: Tuple):
+ inp, padding, value = test_data()
+ pipeline = VgfPipeline[input_t1](
+ ConstantPadND(padding, value),
+ (inp,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_conv1d.py b/backends/arm/test/ops/test_conv1d.py
index cc8245ba126..ac66bc1556b 100644
--- a/backends/arm/test/ops/test_conv1d.py
+++ b/backends/arm/test/ops/test_conv1d.py
@@ -9,10 +9,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.conv1d.default"
@@ -249,7 +250,7 @@ def forward(self, x):
batches=1,
)
-test_data_MI = {
+test_data_FP = {
"2_3x2x40_nobias": lambda: conv1d_2_3x2x40_nobias,
"3_1x3x256_st1": lambda: conv1d_3_1x3x256_st1,
"3_1x3x12_st2_pd1": lambda: conv1d_3_1x3x12_st2_pd1,
@@ -265,16 +266,16 @@ def forward(self, x):
"two_conv1d": lambda: two_conv1d,
}
-test_data_BI = {
+test_data_INT = {
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))
- for (k, v) in test_data_MI.items()
+ for (k, v) in test_data_FP.items()
for q in [True, False]
}
-@common.parametrize("test_data", test_data_MI)
-def test_convolution_1d_tosa_MI(test_data):
- pipeline = TosaPipelineMI[input_t](
+@common.parametrize("test_data", test_data_FP)
+def test_convolution_1d_tosa_FP(test_data):
+ pipeline = TosaPipelineFP[input_t](
test_data(),
test_data().get_inputs(),
aten_op,
@@ -283,10 +284,10 @@ def test_convolution_1d_tosa_MI(test_data):
pipeline.run()
-@common.parametrize("test_data", test_data_BI)
-def test_convolution_1d_tosa_BI(test_data):
+@common.parametrize("test_data", test_data_INT)
+def test_convolution_1d_tosa_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -297,11 +298,11 @@ def test_convolution_1d_tosa_BI(test_data):
pipeline.run()
-@common.parametrize("test_data", test_data_BI)
+@common.parametrize("test_data", test_data_INT)
@common.XfailIfNoCorstone300
-def test_convolution_1d_u55_BI(test_data):
+def test_convolution_1d_u55_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -313,11 +314,11 @@ def test_convolution_1d_u55_BI(test_data):
pipeline.run()
-@common.parametrize("test_data", test_data_BI)
+@common.parametrize("test_data", test_data_INT)
@common.XfailIfNoCorstone320
-def test_convolution_1d_u85_BI(test_data):
+def test_convolution_1d_u85_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -327,3 +328,31 @@ def test_convolution_1d_u85_BI(test_data):
qtol=1,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_FP)
+@common.SkipIfNoModelConverter
+def test_convolution_1d_vgf_FP(test_data):
+ pipeline = VgfPipeline[input_t](
+ test_data(),
+ test_data().get_inputs(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_INT)
+@common.SkipIfNoModelConverter
+def test_convolution_1d_vgf_INT(test_data):
+ model, per_channel_quantization = test_data()
+ pipeline = VgfPipeline[input_t](
+ model,
+ model.get_inputs(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ per_channel_quantization=per_channel_quantization,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py
index 54e9157284e..0d23d2a6c7e 100644
--- a/backends/arm/test/ops/test_conv2d.py
+++ b/backends/arm/test/ops/test_conv2d.py
@@ -9,11 +9,12 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.conv2d.default"
@@ -356,8 +357,8 @@ def forward(self, x):
)
# Shenanigan to get a nicer output when test fails. With unittest it looks like:
-# FAIL: test_convolution_2d_tosa_BI_2_3x3_1x3x12x12_st2_pd1
-test_data_MI = {
+# FAIL: test_convolution_2d_tosa_INT_2_3x3_1x3x12x12_st2_pd1
+test_data_FP = {
"2x2_3x2x40x40_nobias": lambda: conv2d_2x2_3x2x40x40_nobias,
"3x3_1x3x256x256_st1": lambda: conv2d_3x3_1x3x256x256_st1,
"3x3_1x3x12x12_st2_pd1": lambda: conv2d_3x3_1x3x12x12_st2_pd1,
@@ -381,9 +382,9 @@ def forward(self, x):
}
# Generate a new test set paired with per_channel_quant=True/False.
-test_data_BI = {
+test_data_INT = {
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))
- for (k, v) in test_data_MI.items()
+ for (k, v) in test_data_FP.items()
for q in [True, False]
}
@@ -399,10 +400,10 @@ def forward(self, x):
input_t = Tuple[torch.Tensor]
-@common.parametrize("test_data", test_data_MI)
-def test_convolution_2d_tosa_MI(test_data):
+@common.parametrize("test_data", test_data_FP)
+def test_convolution_2d_tosa_FP(test_data):
model = test_data()
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
model,
model.get_inputs(),
aten_op,
@@ -411,10 +412,10 @@ def test_convolution_2d_tosa_MI(test_data):
pipeline.run()
-@common.parametrize("test_data", test_data_BI)
-def test_convolution_2d_tosa_BI(test_data):
+@common.parametrize("test_data", test_data_INT)
+def test_convolution_2d_tosa_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -425,11 +426,11 @@ def test_convolution_2d_tosa_BI(test_data):
pipeline.run()
-@common.parametrize("test_data", test_data_BI, fvp_xfails)
+@common.parametrize("test_data", test_data_INT, fvp_xfails)
@common.XfailIfNoCorstone300
-def test_convolution_2d_u55_BI(test_data):
+def test_convolution_2d_u55_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -440,11 +441,11 @@ def test_convolution_2d_u55_BI(test_data):
pipeline.run()
-@common.parametrize("test_data", test_data_BI, fvp_xfails)
+@common.parametrize("test_data", test_data_INT, fvp_xfails)
@common.XfailIfNoCorstone320
-def test_convolution_u85_BI(test_data):
+def test_convolution_u85_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -455,6 +456,35 @@ def test_convolution_u85_BI(test_data):
pipeline.run()
+@common.parametrize("test_data", test_data_FP)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_FP(test_data):
+ model = test_data()
+ pipeline = VgfPipeline[input_t](
+ model,
+ model.get_inputs(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_INT)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_INT(test_data):
+ model, per_channel_quantization = test_data()
+ pipeline = VgfPipeline[input_t](
+ model,
+ model.get_inputs(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ per_channel_quantization=per_channel_quantization,
+ )
+ pipeline.run()
+
+
reject_suite = {
"large_stride": lambda: Conv2d(
in_channels=1,
@@ -490,7 +520,7 @@ def test_convolution_u85_BI(test_data):
@common.parametrize("module", reject_suite)
-def test_convolution_2d_u55_BI_not_delegated(module: Conv2d):
+def test_convolution_2d_u55_INT_not_delegated(module: Conv2d):
OpNotSupportedPipeline(
module(),
module().get_inputs(),
diff --git a/backends/arm/test/ops/test_conv3d.py b/backends/arm/test/ops/test_conv3d.py
index 1a8ea5c3dd5..b26f75daa1a 100644
--- a/backends/arm/test/ops/test_conv3d.py
+++ b/backends/arm/test/ops/test_conv3d.py
@@ -10,11 +10,12 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.conv3d.default"
@@ -304,7 +305,7 @@ def forward(self, x):
batches=1,
)
-test_data_MI = {
+test_data_FP = {
"2x2_3x2x40x40_nobias": lambda: conv3d_2x2_3x2x40x40_nobias,
"3x3_1x3x256x256_st1": lambda: conv3d_3x3_1x3x256x256_st1,
"3x3_1x3x12x12_st2_pd1": lambda: conv3d_3x3_1x3x12x12_st2_pd1,
@@ -324,29 +325,29 @@ def forward(self, x):
}
# Generate a new test set paired with per_channel_quant=True/False.
-test_data_BI = {
+test_data_INT = {
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))
- for (k, v) in test_data_MI.items()
+ for (k, v) in test_data_FP.items()
for q in [True, False]
}
input_t = Tuple[torch.Tensor]
-@common.parametrize("test_data", test_data_MI)
+@common.parametrize("test_data", test_data_FP)
@pytest.mark.skip # Not implemented, skip until it is.
-def test_convolution_3d_tosa_MI(test_data):
- pipeline = TosaPipelineMI[input_t](
+def test_convolution_3d_tosa_FP(test_data):
+ pipeline = TosaPipelineFP[input_t](
test_data(), test_data().get_inputs(), aten_op, exir_op
)
pipeline.run()
-@common.parametrize("test_data", test_data_BI)
+@common.parametrize("test_data", test_data_INT)
@pytest.mark.skip # Not implemented, skip until it is.
-def test_convolution_3d_tosa_BI(test_data):
+def test_convolution_3d_tosa_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -357,11 +358,11 @@ def test_convolution_3d_tosa_BI(test_data):
pipeline.run()
-@common.parametrize("test_data", test_data_BI)
+@common.parametrize("test_data", test_data_INT)
@pytest.mark.skip # Not implemented, skip until it is.
-def test_convolution_3d_u55_BI(test_data):
+def test_convolution_3d_u55_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -372,11 +373,11 @@ def test_convolution_3d_u55_BI(test_data):
pipeline.run()
-@common.parametrize("test_data", test_data_BI)
+@common.parametrize("test_data", test_data_INT)
@pytest.mark.skip # Not implemented, skip until it is.
-def test_convolution_3d_u85_BI(test_data):
+def test_convolution_3d_u85_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
model.get_inputs(),
aten_op,
@@ -387,6 +388,35 @@ def test_convolution_3d_u85_BI(test_data):
pipeline.run()
+@common.parametrize("test_data", test_data_FP)
+@pytest.mark.skip # Not implemented, skip until it is.
+@common.SkipIfNoModelConverter
+def test_convolution_3d_vgf_FP(test_data):
+ pipeline = VgfPipeline[input_t](
+ test_data(),
+ test_data().get_inputs(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_INT)
+@pytest.mark.skip # Not implemented, skip until it is.
+@common.SkipIfNoModelConverter
+def test_convolution_3d_vgf_INT(test_data):
+ model, per_channel_quantization = test_data()
+ pipeline = VgfPipeline[input_t](
+ model,
+ model.get_inputs(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
reject_suite = {
"large_stride": lambda: Conv3d(
in_channels=1,
@@ -412,7 +442,7 @@ def test_convolution_3d_u85_BI(test_data):
@common.parametrize("module", reject_suite)
-def test_convolution_u55_BI_not_delegated_3d(module: Conv3d):
+def test_convolution_u55_INT_not_delegated_3d(module: Conv3d):
OpNotSupportedPipeline(
module(),
module().get_inputs(),
diff --git a/backends/arm/test/ops/test_conv_combos.py b/backends/arm/test/ops/test_conv_combos.py
index d3218258087..76502daf45c 100644
--- a/backends/arm/test/ops/test_conv_combos.py
+++ b/backends/arm/test/ops/test_conv_combos.py
@@ -11,10 +11,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor]
@@ -36,7 +37,7 @@ class ComboBlockBottleneckResidual(torch.nn.Module):
"executorch_exir_dialects_edge__ops_aten_add_Tensor",
]
- test_data_BI = {
+ test_data_INT = {
"per_channel_quant=True": True,
"per_channel_quant=False": False,
}
@@ -119,12 +120,12 @@ class ComboConvBatchnormRelu6(torch.nn.Module):
"executorch_exir_dialects_edge__ops_aten_hardtanh_default",
]
- test_data_MI = {
+ test_data_FP = {
"affine=True": True,
"affine=False": False,
}
- test_data_BI = {
+ test_data_INT = {
"affine=True,per_channel_quant=True": (True, True),
"affine=True,per_channel_quant=False": (True, False),
"affine=False,per_channel_quant=True": (False, True),
@@ -159,7 +160,7 @@ class ComboConvRelu6(torch.nn.Module):
"executorch_exir_dialects_edge__ops_aten_hardtanh_default",
]
- test_data_MI = {
+ test_data_FP = {
"combo_conv_relu_2_x_4d": lambda: (2 * torch.randn(1, 3, 256, 256),),
"combo_conv_relu_0_5_x_4d": lambda: (0.5 * torch.randn(1, 3, 256, 256),),
"combo_conv_relu_4d": lambda: (torch.randn(1, 3, 256, 256),),
@@ -168,10 +169,10 @@ class ComboConvRelu6(torch.nn.Module):
}
# Generate a new test set paired with per_channel_quant=True/False.
- test_data_BI = {
+ test_data_INT = {
# test_name: (input, per_channel_quant)
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))
- for (k, v) in test_data_MI.items()
+ for (k, v) in test_data_FP.items()
for q in [True, False]
}
@@ -194,7 +195,7 @@ class ComboConvAvgPool2d(torch.nn.Module):
"executorch_exir_dialects_edge__ops_aten_avg_pool2d_default",
]
- test_data_MI = {
+ test_data_FP = {
"combo_conv_avgpool_20_x_4d": lambda: (20 * torch.randn(1, 3, 64, 32),),
"combo_conv_avgpool_4d": lambda: (torch.randn(1, 3, 100, 200),),
"combo_conv_avgpool_5_x_4d_randn": lambda: (5 * torch.randn(1, 3, 256, 256),),
@@ -202,10 +203,10 @@ class ComboConvAvgPool2d(torch.nn.Module):
}
# Generate a new test set paired with per_channel_quant=True/False.
- test_data_BI = {
+ test_data_INT = {
# test_name: (input, per_channel_quant)
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))
- for (k, v) in test_data_MI.items()
+ for (k, v) in test_data_FP.items()
for q in [True, False]
}
@@ -227,9 +228,9 @@ def forward(self, x):
####################
-def test_convolution_2d_tosa_MI_meandim():
+def test_convolution_2d_tosa_FP_meandim():
model = ComboConv2dMeandim()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
model,
model.get_inputs(),
aten_op=[],
@@ -238,9 +239,9 @@ def test_convolution_2d_tosa_MI_meandim():
pipeline.run()
-def test_convolution_2d_tosa_BI_meandim():
+def test_convolution_2d_tosa_INT_meandim():
model = ComboConv2dMeandim()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
model,
model.get_inputs(),
aten_op=[],
@@ -250,9 +251,9 @@ def test_convolution_2d_tosa_BI_meandim():
@common.XfailIfNoCorstone300
-def test_convolution_2d_u55_BI_meandim():
+def test_convolution_2d_u55_INT_meandim():
model = ComboConv2dMeandim()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
model,
model.get_inputs(),
aten_ops=[],
@@ -263,9 +264,9 @@ def test_convolution_2d_u55_BI_meandim():
@common.XfailIfNoCorstone320
-def test_convolution_2d_u85_BI_meandim():
+def test_convolution_2d_u85_INT_meandim():
model = ComboConv2dMeandim()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
model,
model.get_inputs(),
aten_ops=[],
@@ -275,16 +276,42 @@ def test_convolution_2d_u85_BI_meandim():
pipeline.run()
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_FP_meandim():
+ model = ComboConv2dMeandim()
+ pipeline = VgfPipeline[input_t1](
+ model,
+ model.get_inputs(),
+ aten_op=[],
+ exir_op=ComboConv2dMeandim.edge_op_list,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_INT_meandim():
+ model = ComboConv2dMeandim()
+ pipeline = VgfPipeline[input_t1](
+ model,
+ model.get_inputs(),
+ aten_op=[],
+ exir_op=ComboConv2dMeandim.edge_op_list,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
##############################
## Conv + batch norm + relu ##
##############################
-@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_MI)
-def test_convolution_2d_tosa_MI_batchnorm_relu6(test_data):
+@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_FP)
+def test_convolution_2d_tosa_FP_batchnorm_relu6(test_data):
affine = test_data
model = ComboConvBatchnormRelu6(affine)
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
model,
model.get_inputs(),
aten_op=[],
@@ -294,11 +321,11 @@ def test_convolution_2d_tosa_MI_batchnorm_relu6(test_data):
@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307)
-@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_BI)
-def test_convolution_2d_tosa_BI_batchnorm_relu6(test_data):
+@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_INT)
+def test_convolution_2d_tosa_INT_batchnorm_relu6(test_data):
affine, per_channel_quantization = test_data
model = ComboConvBatchnormRelu6(affine)
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
model,
model.get_inputs(),
aten_op=[],
@@ -309,12 +336,12 @@ def test_convolution_2d_tosa_BI_batchnorm_relu6(test_data):
pipeline.run()
-@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_BI)
+@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_INT)
@common.XfailIfNoCorstone300
-def test_convolution_2d_u55_BI_batchnorm_relu6(test_data):
+def test_convolution_2d_u55_INT_batchnorm_relu6(test_data):
affine, per_channel_quantization = test_data
model = ComboConvBatchnormRelu6(affine)
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
model,
model.get_inputs(),
aten_ops=[],
@@ -325,12 +352,12 @@ def test_convolution_2d_u55_BI_batchnorm_relu6(test_data):
pipeline.run()
-@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_BI)
+@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_INT)
@common.XfailIfNoCorstone320
-def test_convolution_2d_u85_BI_batchnorm_relu6(test_data):
+def test_convolution_2d_u85_INT_batchnorm_relu6(test_data):
affine, per_channel_quantization = test_data
model = ComboConvBatchnormRelu6(affine)
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
model,
model.get_inputs(),
aten_ops=[],
@@ -341,15 +368,46 @@ def test_convolution_2d_u85_BI_batchnorm_relu6(test_data):
pipeline.run()
+@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_FP)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_FP_batchnorm_relu6(test_data):
+ affine = test_data
+ model = ComboConvBatchnormRelu6(affine)
+ pipeline = VgfPipeline[input_t1](
+ model,
+ model.get_inputs(),
+ aten_op=[],
+ exir_op=ComboConvBatchnormRelu6.edge_op_list,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", ComboConvBatchnormRelu6.test_data_INT)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_INT_batchnorm_relu6(test_data):
+ affine, per_channel_quantization = test_data
+ model = ComboConvBatchnormRelu6(affine)
+ pipeline = VgfPipeline[input_t1](
+ model,
+ model.get_inputs(),
+ aten_op=[],
+ exir_op=ComboConvBatchnormRelu6.edge_op_list,
+ tosa_version="TOSA-1.0+INT",
+ per_channel_quantization=per_channel_quantization,
+ )
+ pipeline.run()
+
+
##################
## Conv + ReLU6 ##
##################
-@common.parametrize("test_data", ComboConvRelu6.test_data_MI)
-def test_convolution_2d_tosa_MI_relu6(test_data):
+@common.parametrize("test_data", ComboConvRelu6.test_data_FP)
+def test_convolution_2d_tosa_FP_relu6(test_data):
model = ComboConvRelu6()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
model,
test_data(),
aten_op=[],
@@ -359,11 +417,11 @@ def test_convolution_2d_tosa_MI_relu6(test_data):
@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307)
-@common.parametrize("test_data", ComboConvRelu6.test_data_BI)
-def test_convolution_2d_tosa_BI_relu6(test_data):
+@common.parametrize("test_data", ComboConvRelu6.test_data_INT)
+def test_convolution_2d_tosa_INT_relu6(test_data):
input, per_channel_quantization = test_data()
model = ComboConvRelu6()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
model,
input,
aten_op=[],
@@ -373,12 +431,12 @@ def test_convolution_2d_tosa_BI_relu6(test_data):
pipeline.run()
-@common.parametrize("test_data", ComboConvRelu6.test_data_BI)
+@common.parametrize("test_data", ComboConvRelu6.test_data_INT)
@common.XfailIfNoCorstone300
-def test_convolution_2d_u55_BI_relu6(test_data):
+def test_convolution_2d_u55_INT_relu6(test_data):
input, per_channel_quantization = test_data()
model = ComboConvRelu6()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
model,
input,
aten_ops=[],
@@ -389,12 +447,12 @@ def test_convolution_2d_u55_BI_relu6(test_data):
pipeline.run()
-@common.parametrize("test_data", ComboConvRelu6.test_data_BI)
+@common.parametrize("test_data", ComboConvRelu6.test_data_INT)
@common.XfailIfNoCorstone320
-def test_convolution_2d_u85_BI_relu6(test_data):
+def test_convolution_2d_u85_INT_relu6(test_data):
input, per_channel_quantization = test_data()
model = ComboConvRelu6()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
model,
input,
aten_ops=[],
@@ -405,12 +463,42 @@ def test_convolution_2d_u85_BI_relu6(test_data):
pipeline.run()
+@common.parametrize("test_data", ComboConvRelu6.test_data_FP)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_FP_relu6(test_data):
+ model = ComboConvRelu6()
+ pipeline = VgfPipeline[input_t1](
+ model,
+ test_data(),
+ aten_op=[],
+ exir_op=ComboConvRelu6.edge_op_list,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", ComboConvRelu6.test_data_INT)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_INT_relu6(test_data):
+ input, per_channel_quantization = test_data()
+ model = ComboConvRelu6()
+ pipeline = VgfPipeline[input_t1](
+ model,
+ input,
+ aten_op=[],
+ exir_op=ComboConvRelu6.edge_op_list,
+ tosa_version="TOSA-1.0+INT",
+ per_channel_quantization=per_channel_quantization,
+ )
+ pipeline.run()
+
+
###############################
## Block bottleneck residual ##
###############################
-def test_convolution_2d_tosa_MI_block_bottleneck():
+def test_convolution_2d_tosa_FP_block_bottleneck():
model = ComboBlockBottleneckResidual()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
model,
model.get_inputs(),
aten_op=[],
@@ -419,12 +507,12 @@ def test_convolution_2d_tosa_MI_block_bottleneck():
pipeline.run()
-@common.parametrize("test_data", ComboBlockBottleneckResidual.test_data_BI)
+@common.parametrize("test_data", ComboBlockBottleneckResidual.test_data_INT)
@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307)
-def test_convolution_2d_tosa_BI_block_bottleneck(test_data):
+def test_convolution_2d_tosa_INT_block_bottleneck(test_data):
per_channel_quantization = test_data
model = ComboBlockBottleneckResidual()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
model,
model.get_inputs(),
aten_op=[],
@@ -435,12 +523,12 @@ def test_convolution_2d_tosa_BI_block_bottleneck(test_data):
pipeline.run()
-@common.parametrize("test_data", ComboBlockBottleneckResidual.test_data_BI)
+@common.parametrize("test_data", ComboBlockBottleneckResidual.test_data_INT)
@common.XfailIfNoCorstone300
-def test_convolution_2d_u55_BI_block_bottleneck(test_data):
+def test_convolution_2d_u55_INT_block_bottleneck(test_data):
per_channel_quantization = test_data
model = ComboBlockBottleneckResidual()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
model,
model.get_inputs(),
aten_ops=[],
@@ -451,12 +539,12 @@ def test_convolution_2d_u55_BI_block_bottleneck(test_data):
pipeline.run()
-@common.parametrize("test_data", ComboBlockBottleneckResidual.test_data_BI)
+@common.parametrize("test_data", ComboBlockBottleneckResidual.test_data_INT)
@common.XfailIfNoCorstone320
-def test_convolution_2d_u85_BI_block_bottleneck(test_data):
+def test_convolution_2d_u85_INT_block_bottleneck(test_data):
per_channel_quantization = test_data
model = ComboBlockBottleneckResidual()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
model,
model.get_inputs(),
aten_ops=[],
@@ -467,15 +555,46 @@ def test_convolution_2d_u85_BI_block_bottleneck(test_data):
pipeline.run()
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_FP_block_bottleneck():
+ model = ComboBlockBottleneckResidual()
+ pipeline = VgfPipeline[input_t1](
+ model,
+ model.get_inputs(),
+ aten_op=[],
+ exir_op=ComboBlockBottleneckResidual.edge_op_list,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", ComboBlockBottleneckResidual.test_data_INT)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_INT_block_bottleneck(test_data):
+ per_channel_quantization = test_data
+ model = ComboBlockBottleneckResidual()
+ pipeline = VgfPipeline[input_t1](
+ model,
+ model.get_inputs(),
+ aten_op=[],
+ exir_op=ComboBlockBottleneckResidual.edge_op_list,
+ tosa_version="TOSA-1.0+INT",
+ per_channel_quantization=per_channel_quantization,
+ )
+ # TODO: MLETORCH-1136 Change args of run_method_and_compare_outputs of the vgf tests
+ # pipeline.change_args("run_method_and_compare_outputs", model.get_inputs(), qtol=1)
+ pipeline.run()
+
+
######################
## Conv + AvgPool2d ##
######################
-@common.parametrize("test_data", ComboConvAvgPool2d.test_data_MI)
-def test_convolution_2d_tosa_MI_avgpool2d(test_data):
+@common.parametrize("test_data", ComboConvAvgPool2d.test_data_FP)
+def test_convolution_2d_tosa_FP_avgpool2d(test_data):
model = ComboConvAvgPool2d()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
model,
test_data(),
aten_op=[],
@@ -485,11 +604,11 @@ def test_convolution_2d_tosa_MI_avgpool2d(test_data):
@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307)
-@common.parametrize("test_data", ComboConvAvgPool2d.test_data_BI)
-def test_convolution_2d_tosa_BI_avgpool2d(test_data):
+@common.parametrize("test_data", ComboConvAvgPool2d.test_data_INT)
+def test_convolution_2d_tosa_INT_avgpool2d(test_data):
input, per_channel_quantization = test_data()
model = ComboConvAvgPool2d()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
model,
input,
aten_op=[],
@@ -499,12 +618,12 @@ def test_convolution_2d_tosa_BI_avgpool2d(test_data):
pipeline.run()
-@common.parametrize("test_data", ComboConvAvgPool2d.test_data_BI)
+@common.parametrize("test_data", ComboConvAvgPool2d.test_data_INT)
@common.XfailIfNoCorstone300
-def test_convolution_2d_u55_BI_avgpool2d(test_data):
+def test_convolution_2d_u55_INT_avgpool2d(test_data):
input, per_channel_quantization = test_data()
model = ComboConvAvgPool2d()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
model,
input,
aten_ops=[],
@@ -515,12 +634,12 @@ def test_convolution_2d_u55_BI_avgpool2d(test_data):
pipeline.run()
-@common.parametrize("test_data", ComboConvAvgPool2d.test_data_BI)
+@common.parametrize("test_data", ComboConvAvgPool2d.test_data_INT)
@common.XfailIfNoCorstone320
-def test_convolution_2d_u85_BI_avgpool2d(test_data):
+def test_convolution_2d_u85_INT_avgpool2d(test_data):
input, per_channel_quantization = test_data()
model = ComboConvAvgPool2d()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
model,
input,
aten_ops=[],
@@ -529,3 +648,33 @@ def test_convolution_2d_u85_BI_avgpool2d(test_data):
per_channel_quantization=per_channel_quantization,
)
pipeline.run()
+
+
+@common.parametrize("test_data", ComboConvAvgPool2d.test_data_FP)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_FP_avgpool2d(test_data):
+ model = ComboConvAvgPool2d()
+ pipeline = VgfPipeline[input_t1](
+ model,
+ test_data(),
+ aten_op=[],
+ exir_op=ComboConvAvgPool2d.edge_op_list,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", ComboConvAvgPool2d.test_data_INT)
+@common.SkipIfNoModelConverter
+def test_convolution_2d_vgf_INT_avgpool2d(test_data):
+ input, per_channel_quantization = test_data()
+ model = ComboConvAvgPool2d()
+ pipeline = VgfPipeline[input_t1](
+ model,
+ input,
+ aten_op=[],
+ exir_op=ComboConvAvgPool2d.edge_op_list,
+ tosa_version="TOSA-1.0+INT",
+ per_channel_quantization=per_channel_quantization,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_conv_constant_pad_nd.py b/backends/arm/test/ops/test_conv_constant_pad_nd.py
index 61497578fb6..636c18ef753 100644
--- a/backends/arm/test/ops/test_conv_constant_pad_nd.py
+++ b/backends/arm/test/ops/test_conv_constant_pad_nd.py
@@ -14,8 +14,9 @@
import torch.nn.functional as F
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.pad.default"
@@ -91,9 +92,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_constant_pad_nd_tosa_MI(test_data: Tuple):
+def test_constant_pad_nd_tosa_FP(test_data: Tuple):
test_data, padding, value = test_data
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
ConstantPadND(padding, value),
(test_data,),
aten_op,
@@ -103,9 +104,9 @@ def test_constant_pad_nd_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_constant_pad_nd_tosa_BI(test_data: Tuple):
+def test_constant_pad_nd_tosa_INT(test_data: Tuple):
test_data, padding, value = test_data
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
ConstantPadND(padding, value),
(test_data,),
aten_op,
@@ -114,3 +115,31 @@ def test_constant_pad_nd_tosa_BI(test_data: Tuple):
rtol=0.01,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_constant_pad_nd_vgf_FP(test_data: Tuple):
+ test_data, padding, value = test_data
+ pipeline = VgfPipeline[input_t1](
+ ConstantPadND(padding, value),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_constant_pad_nd_vgf_INT(test_data: Tuple):
+ test_data, padding, value = test_data
+ pipeline = VgfPipeline[input_t1](
+ ConstantPadND(padding, value),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_cos.py b/backends/arm/test/ops/test_cos.py
index 7cfd32d2bd2..acb950f2a2e 100644
--- a/backends/arm/test/ops/test_cos.py
+++ b/backends/arm/test/ops/test_cos.py
@@ -11,10 +11,11 @@
import torch
from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.cos.default"
@@ -39,8 +40,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@pytest.mark.tosa_ref_model
-def test_cos_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_cos_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Cos(),
(test_data,),
aten_op,
@@ -53,8 +54,8 @@ def test_cos_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@pytest.mark.tosa_ref_model
-def test_cos_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_cos_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Cos(),
(test_data,),
aten_op,
@@ -65,8 +66,8 @@ def test_cos_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_cos_tosa_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_cos_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Cos(),
(test_data,),
aten_op,
@@ -77,8 +78,8 @@ def test_cos_tosa_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_cos_tosa_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_cos_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Cos(),
(test_data,),
aten_op,
@@ -86,3 +87,29 @@ def test_cos_tosa_u85_BI(test_data: Tuple):
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_cos_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Cos(),
+ (test_data,),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_cos_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Cos(),
+ (test_data,),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_cosh.py b/backends/arm/test/ops/test_cosh.py
new file mode 100644
index 00000000000..14b7def60cd
--- /dev/null
+++ b/backends/arm/test/ops/test_cosh.py
@@ -0,0 +1,107 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Tuple
+
+import torch
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+aten_op = "torch.ops.aten.cosh.default"
+exir_op = "executorch_exir_dialects_edge__ops_aten__cosh_default"
+
+input_t1 = Tuple[torch.Tensor] # Input x
+
+test_data_suite = {
+ # (test_name, test_data)
+ "zeros": torch.zeros(10, 10, 10),
+ "zeros_4D": torch.zeros(1, 10, 32, 7),
+ "zeros_alt_shape": torch.zeros(10, 3, 5),
+ "ones": torch.ones(15, 10, 7),
+ "ones_4D": torch.ones(1, 3, 32, 16),
+ "rand": torch.rand(10, 10) - 0.5,
+ "rand_alt_shape": torch.rand(10, 3, 5) - 0.5,
+ "rand_4D": torch.rand(1, 6, 5, 7) - 0.5,
+ "randn_pos": torch.randn(10) + 10,
+ "randn_neg": torch.randn(10) - 10,
+ "ramp": torch.arange(-16, 16, 0.2),
+ "large": 100 * torch.ones(1, 1),
+ "small": 0.000001 * torch.ones(1, 1),
+ "small_rand": torch.rand(100) * 0.01,
+ "biggest": torch.tensor([700.0, 710.0, 750.0]),
+}
+
+
+class Cosh(torch.nn.Module):
+ def forward(self, x: torch.Tensor):
+ return torch.cosh(x)
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_cosh_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
+ Cosh(),
+ (test_data,),
+ aten_op,
+ exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_cosh_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
+ Cosh(), (test_data,), aten_op=aten_op, exir_op=exir_op
+ )
+ pipeline.run()
+
+
+@common.XfailIfNoCorstone300
+@common.parametrize("test_data", test_data_suite)
+def test_cosh_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
+ Cosh(), (test_data,), aten_ops=aten_op, exir_ops=exir_op
+ )
+ pipeline.run()
+
+
+@common.XfailIfNoCorstone320
+@common.parametrize("test_data", test_data_suite)
+def test_cosh_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
+ Cosh(), (test_data,), aten_ops=aten_op, exir_ops=exir_op
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_cosh_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Cosh(),
+ (test_data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_cosh_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Cosh(),
+ (test_data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_cumsum.py b/backends/arm/test/ops/test_cumsum.py
new file mode 100644
index 00000000000..ce175fb37c0
--- /dev/null
+++ b/backends/arm/test/ops/test_cumsum.py
@@ -0,0 +1,122 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+input_t1 = Tuple[torch.Tensor, int]
+aten_op = "torch.ops.aten.cumsum.default"
+
+"""
+Tests the aten.cumsum operator by decomposing it into a convolution and
+verifying results across various dims and pipelines.
+"""
+
+
+class CumsumModule(torch.nn.Module):
+ test_parameters = {
+ "1d_dim0": lambda: (torch.rand(10), 0),
+ "1d_dim_neg1": lambda: (torch.rand(10), -1),
+ "2d_dim1": lambda: (torch.rand(5, 6), 1),
+ "3d_dim2": lambda: (torch.rand(2, 3, 4), 2),
+ "3d_dim0": lambda: (torch.rand(2, 3, 4), 0),
+ "4d_dim3": lambda: (torch.rand(1, 2, 3, 4), 3),
+ "4d_dim1": lambda: (torch.rand(1, 2, 3, 4), 1),
+ }
+
+ def forward(self, x: torch.Tensor, dim: int) -> torch.Tensor:
+ return torch.cumsum(x, dim)
+
+
+@common.parametrize("test_data", CumsumModule.test_parameters)
+def test_cumsum_tosa_FP(test_data: input_t1):
+ module = CumsumModule()
+ args = test_data()
+ pipeline = TosaPipelineFP[input_t1](
+ module,
+ args,
+ aten_op,
+ exir_op=[],
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", CumsumModule.test_parameters)
+def test_cumsum_tosa_INT(test_data: input_t1):
+ module = CumsumModule()
+ args = test_data()
+ pipeline = TosaPipelineINT[input_t1](
+ module,
+ args,
+ aten_op,
+ exir_op=[],
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", CumsumModule.test_parameters)
+@common.SkipIfNoModelConverter
+def test_cumsum_vgf_FP(test_data: input_t1):
+ module = CumsumModule()
+ args = test_data()
+ pipeline = VgfPipeline[input_t1](
+ module,
+ args,
+ aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", CumsumModule.test_parameters)
+@common.SkipIfNoModelConverter
+def test_cumsum_vgf_INT(test_data: input_t1):
+ module = CumsumModule()
+ args = test_data()
+ pipeline = VgfPipeline[input_t1](
+ module,
+ args,
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", CumsumModule.test_parameters)
+@common.XfailIfNoCorstone300
+def test_cumsum_u55_INT(test_data: input_t1):
+ module = CumsumModule()
+ args = test_data()
+ pipeline = EthosU55PipelineINT[input_t1](
+ module,
+ args,
+ aten_ops=aten_op,
+ exir_ops=[],
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", CumsumModule.test_parameters)
+@common.XfailIfNoCorstone320
+def test_cumsum_u85_INT(test_data: input_t1):
+ module = CumsumModule()
+ args = test_data()
+ pipeline = EthosU85PipelineINT[input_t1](
+ module,
+ args,
+ aten_ops=aten_op,
+ exir_ops=[],
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_depthwise_conv.py b/backends/arm/test/ops/test_depthwise_conv.py
index 4a6150317b5..bf6aad840ac 100644
--- a/backends/arm/test/ops/test_depthwise_conv.py
+++ b/backends/arm/test/ops/test_depthwise_conv.py
@@ -11,10 +11,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = Tuple[torch.Tensor] # Input x
@@ -154,7 +155,7 @@
)
# Shenanigan to get a nicer output when test fails.
-test_data_conv2d_MI = {
+test_data_conv2d_FP = {
"2x2_1x6x4x4_gp6_st1": lambda: dw_conv2d_2x2_1x6x4x4_gp6_st1,
"3x3_1x3x256x256_gp3_st1": lambda: dw_conv2d_3x3_1x3x256x256_gp3_st1,
"3x3_1x4x256x256_gp4_nobias": lambda: dw_conv2d_3x3_1x4x256x256_gp4_nobias,
@@ -164,9 +165,9 @@
}
# Generate a new test set paired with per_channel_quant=True/False.
-test_data_conv2d_BI = {
+test_data_conv2d_INT = {
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))
- for (k, v) in test_data_conv2d_MI.items()
+ for (k, v) in test_data_conv2d_FP.items()
for q in [True, False]
}
@@ -182,7 +183,7 @@
for q in [True, False]
}
-test_data_conv1d_MI = {
+test_data_conv1d_FP = {
"2_1x6x4_gp6_st1": lambda: dw_conv1d_2_1x6x4_gp6_st1,
"two_dw_conv1d": lambda: two_dw_conv1d,
"3_1x3x256_gp3_st1": lambda: dw_conv1d_3_1x3x256_gp3_st1,
@@ -190,16 +191,16 @@
}
# Generate a new test set paired with per_channel_quant=True/False.
-test_data_conv1d_BI = {
+test_data_conv1d_INT = {
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))
- for (k, v) in test_data_conv1d_MI.items()
+ for (k, v) in test_data_conv1d_FP.items()
for q in [True, False]
}
-@common.parametrize("test_data", test_data_conv1d_MI | test_data_conv2d_MI)
-def test_depthwise_convolution_2d_tosa_MI(test_data: torch.nn.Module):
- pipeline = TosaPipelineMI[input_t](
+@common.parametrize("test_data", test_data_conv1d_FP | test_data_conv2d_FP)
+def test_depthwise_convolution_2d_tosa_FP(test_data: torch.nn.Module):
+ pipeline = TosaPipelineFP[input_t](
test_data(),
test_data().get_inputs(),
aten_op=[],
@@ -209,10 +210,10 @@ def test_depthwise_convolution_2d_tosa_MI(test_data: torch.nn.Module):
@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307)
-@common.parametrize("test_data", test_data_conv1d_BI | test_data_conv2d_BI)
-def test_depthwise_convolution_2d_tosa_BI(test_data):
+@common.parametrize("test_data", test_data_conv1d_INT | test_data_conv2d_INT)
+def test_depthwise_convolution_2d_tosa_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
model.get_inputs(),
aten_op=[],
@@ -222,6 +223,34 @@ def test_depthwise_convolution_2d_tosa_BI(test_data):
pipeline.run()
+@common.parametrize("test_data", test_data_conv1d_FP | test_data_conv2d_FP)
+@common.SkipIfNoModelConverter
+def test_depthwise_convolution_2d_vgf_FP(test_data: torch.nn.Module):
+ model = test_data()
+ pipeline = VgfPipeline[input_t](
+ model,
+ model.get_inputs(),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_conv1d_INT | test_data_conv2d_INT)
+@common.SkipIfNoModelConverter
+def test_depthwise_convolution_2d_vgf_INT(test_data):
+ model, per_channel_quantization = test_data()
+ pipeline = VgfPipeline[input_t](
+ model,
+ model.get_inputs(),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
x_fails = {
f"{k},per_channel_quant={q}": reason
for k, reason in {
@@ -233,10 +262,10 @@ def test_depthwise_convolution_2d_tosa_BI(test_data):
@common.XfailIfNoCorstone300 # TODO: MLETORCH-516
-@common.parametrize("test_data", test_data_conv2d_BI, x_fails)
-def test_depthwise_convolution_2d_u55_BI(test_data):
+@common.parametrize("test_data", test_data_conv2d_INT, x_fails)
+def test_depthwise_convolution_2d_u55_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
model.get_inputs(),
aten_ops=[],
@@ -248,10 +277,10 @@ def test_depthwise_convolution_2d_u55_BI(test_data):
@common.XfailIfNoCorstone300 # TODO: MLETORCH-516
-@common.parametrize("test_data", test_data_conv1d_BI)
-def test_depthwise_convolution_1d_u55_BI(test_data):
+@common.parametrize("test_data", test_data_conv1d_INT)
+def test_depthwise_convolution_1d_u55_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
model.get_inputs(),
aten_ops=[],
@@ -263,10 +292,10 @@ def test_depthwise_convolution_1d_u55_BI(test_data):
@common.XfailIfNoCorstone320 # TODO: MLETORCH-516
-@common.parametrize("test_data", test_data_conv2d_BI, x_fails)
-def test_depthwise_convolution_2d_u85_BI(test_data):
+@common.parametrize("test_data", test_data_conv2d_INT, x_fails)
+def test_depthwise_convolution_2d_u85_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
model.get_inputs(),
aten_ops=[],
@@ -278,10 +307,10 @@ def test_depthwise_convolution_2d_u85_BI(test_data):
@common.XfailIfNoCorstone320 # TODO: MLETORCH-516
-@common.parametrize("test_data", test_data_conv1d_BI, x_fails)
-def test_depthwise_convolution_1d_u85_BI(test_data):
+@common.parametrize("test_data", test_data_conv1d_INT, x_fails)
+def test_depthwise_convolution_1d_u85_INT(test_data):
model, per_channel_quantization = test_data()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
model.get_inputs(),
aten_ops=[],
diff --git a/backends/arm/test/ops/test_div.py b/backends/arm/test/ops/test_div.py
index 0e1ca005fa1..026939758a0 100644
--- a/backends/arm/test/ops/test_div.py
+++ b/backends/arm/test/ops/test_div.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.div.Tensor"
@@ -89,14 +90,14 @@ def forward(
@common.parametrize("test_data", test_data_suite)
-def test_div_tensor_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](Div(), test_data(), aten_op, exir_op)
+def test_div_tensor_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](Div(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_div_tensor_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](Div(), test_data(), aten_op=[], exir_op=[])
+def test_div_tensor_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](Div(), test_data(), aten_op=[], exir_op=[])
pipeline.run()
@@ -112,8 +113,8 @@ def test_div_tensor_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite, xfails=x_fails)
@common.XfailIfNoCorstone300
-def test_div_tensor_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_div_tensor_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Div(),
test_data(),
aten_ops=[],
@@ -125,8 +126,8 @@ def test_div_tensor_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite, xfails=x_fails)
@common.XfailIfNoCorstone320
-def test_div_tensor_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_div_tensor_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Div(),
test_data(),
aten_ops=[],
@@ -134,3 +135,25 @@ def test_div_tensor_u85_BI(test_data: Tuple):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_div_tensor_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Div(), test_data(), aten_op, exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_div_tensor_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Div(),
+ test_data(),
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_elu.py b/backends/arm/test/ops/test_elu.py
new file mode 100644
index 00000000000..884f54c0202
--- /dev/null
+++ b/backends/arm/test/ops/test_elu.py
@@ -0,0 +1,133 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+import torch.nn as nn
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+test_data_suite = {
+ # (test_name, test_data)
+ "zeros_default": lambda: (1.0, torch.zeros(1, 10, 10, 10)),
+ "ones_default": lambda: (1.0, torch.ones(10, 10, 10)),
+ "rand_default": lambda: (1.0, torch.rand(10, 10) - 0.5),
+ "randn_pos_default": lambda: (1.0, torch.randn(1, 2, 3, 3) + 10),
+ "randn_neg_default": lambda: (1.0, torch.randn(2, 4, 3) - 10),
+ "ramp_default": lambda: (1.0, torch.arange(-16, 16, 0.2)),
+ "large_pos_default": lambda: (1.0, torch.randn(3, 3) * 1e6 + 1e7),
+ "large_neg_default": lambda: (1.0, -torch.empty(5).uniform_(1e5, 1e8)),
+ "small_pos_default": lambda: (1.0, torch.empty(5).uniform_(1e-8, 1e-5)),
+ "small_neg_default": lambda: (1.0, -torch.empty(5).uniform_(1e-8, 1e-5)),
+ "zeros_custom": lambda: (2.0, torch.zeros(1, 10, 10, 10)),
+ "ones_custom": lambda: (2.0, torch.ones(10, 10, 10)),
+ "rand_custom": lambda: (2.0, torch.rand(10, 10) - 0.5),
+ "randn_pos_custom": lambda: (2.0, torch.randn(1, 3, 3) + 10),
+ "randn_neg_custom": lambda: (2.0, torch.randn(1, 2, 4, 3) - 10),
+ "ramp_custom": lambda: (2.0, torch.arange(-16, 16, 0.2)),
+ "large_pos_custom": lambda: (2.0, torch.randn(3, 3) * 1e6 + 1e7),
+ "large_neg_custom": lambda: (2.0, -torch.empty(5).uniform_(1e5, 1e8)),
+ "small_pos_custom": lambda: (2.0, torch.empty(5).uniform_(1e-8, 1e-5)),
+ "small_neg_custom": lambda: (2.0, -torch.empty(5).uniform_(1e-8, 1e-5)),
+ "zeros_zero": lambda: (0.0, torch.zeros(1, 10, 10, 10)),
+ "ones_zero": lambda: (0.0, torch.ones(10, 10, 10)),
+ "rand_zero": lambda: (0.0, torch.rand(10, 10) - 0.5),
+ "randn_pos_zero": lambda: (0.0, torch.randn(1, 3, 3) + 10),
+ "randn_neg_zero": lambda: (0.0, torch.randn(1, 2, 4, 3) - 10),
+ "ramp_zero": lambda: (0.0, torch.arange(-16, 16, 0.2)),
+ "large_pos_zero": lambda: (0.0, torch.randn(3, 3) * 1e6 + 1e7),
+ "large_neg_zero": lambda: (0.0, -torch.empty(5).uniform_(1e5, 1e8)),
+ "small_pos_zero": lambda: (0.0, torch.empty(5).uniform_(1e-8, 1e-5)),
+ "small_neg_zero": lambda: (0.0, -torch.empty(5).uniform_(1e-8, 1e-5)),
+}
+
+
+class Elu(nn.Module):
+ aten_op = "torch.ops.aten.elu.default"
+ exir_op = "executorch_exir_dialects_edge__ops_aten__elu_default"
+
+ def __init__(self, input_alpha: float = 1.0):
+ super().__init__()
+ self.elu = torch.nn.ELU(alpha=input_alpha)
+
+ def forward(self, input_: torch.Tensor):
+ return self.elu(input_)
+
+
+input_t1 = Tuple[torch.Tensor]
+
+
+@common.parametrize("test_module", test_data_suite)
+def test_elu_tosa_FP(test_module: input_t1):
+ alpha, test_data = test_module()
+ pipeline = TosaPipelineFP[input_t1](
+ Elu(alpha), (test_data,), aten_op=Elu.aten_op, exir_op=Elu.exir_op
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_suite)
+def test_elu_tosa_INT(test_module: input_t1):
+ alpha, test_data = test_module()
+ pipeline = TosaPipelineINT[input_t1](
+ Elu(alpha), (test_data,), aten_op=Elu.aten_op, exir_op=Elu.exir_op
+ )
+ pipeline.run()
+
+
+@common.XfailIfNoCorstone300
+@common.parametrize("test_module", test_data_suite)
+def test_elu_u55_INT(test_module: input_t1):
+ alpha, test_data = test_module()
+ pipeline = EthosU55PipelineINT[input_t1](
+ Elu(alpha), (test_data,), aten_ops=Elu.aten_op, exir_ops=Elu.exir_op
+ )
+ pipeline.run()
+
+
+@common.XfailIfNoCorstone320
+@common.parametrize("test_module", test_data_suite)
+def test_elu_u85_INT(test_module: input_t1):
+ alpha, test_data = test_module()
+ pipeline = EthosU85PipelineINT[input_t1](
+ Elu(alpha), (test_data,), aten_ops=Elu.aten_op, exir_ops=Elu.exir_op
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+@common.parametrize("test_module", test_data_suite)
+def test_elu_vgf_FP(test_module: input_t1):
+ alpha, test_data = test_module()
+ pipeline = VgfPipeline[input_t1](
+ Elu(alpha),
+ (test_data,),
+ aten_op=Elu.aten_op,
+ exir_op=Elu.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+@common.parametrize("test_module", test_data_suite)
+def test_elu_vgf_INT(test_module: input_t1):
+ alpha, test_data = test_module()
+ pipeline = VgfPipeline[input_t1](
+ Elu(alpha),
+ (test_data,),
+ aten_op=Elu.aten_op,
+ exir_op=Elu.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_embedding.py b/backends/arm/test/ops/test_embedding.py
index 5696346b225..b0a4647c3ae 100644
--- a/backends/arm/test/ops/test_embedding.py
+++ b/backends/arm/test/ops/test_embedding.py
@@ -11,8 +11,9 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -57,9 +58,9 @@ def forward(self, weights: torch.Tensor, indices: torch.Tensor):
@common.parametrize("test_input", test_input)
-def test_embedding_tosa_MI(test_input: input_params):
+def test_embedding_tosa_FP(test_input: input_params):
op = Embedding()
- pipeline = TosaPipelineMI[input_params](
+ pipeline = TosaPipelineFP[input_params](
op,
test_input,
op.aten_op,
@@ -71,9 +72,9 @@ def test_embedding_tosa_MI(test_input: input_params):
@common.parametrize("test_input", test_input)
-def test_embedding_tosa_BI(test_input: input_params):
+def test_embedding_tosa_INT(test_input: input_params):
op = Embedding()
- pipeline = TosaPipelineBI[input_params](
+ pipeline = TosaPipelineINT[input_params](
op,
test_input,
op.aten_op,
@@ -84,3 +85,37 @@ def test_embedding_tosa_BI(test_input: input_params):
pipeline.pop_stage("check_count.exir")
pipeline.run()
+
+
+@common.parametrize("test_input", test_input)
+@common.SkipIfNoModelConverter
+def test_embedding_vgf_FP(test_input: input_params):
+ op = Embedding()
+ pipeline = VgfPipeline[input_params](
+ op,
+ test_input,
+ op.aten_op,
+ op.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ use_to_edge_transform_and_lower=True,
+ transform_passes=[InsertCastForOpsWithInt64InputPass()],
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_input", test_input)
+@common.SkipIfNoModelConverter
+def test_embedding_vgf_INT(test_input: input_params):
+ op = Embedding()
+ pipeline = VgfPipeline[input_params](
+ op,
+ test_input,
+ op.aten_op,
+ op.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ use_to_edge_transform_and_lower=True,
+ )
+ pipeline.pop_stage("check.aten")
+ pipeline.pop_stage("check_count.exir")
+
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_eq.py b/backends/arm/test/ops/test_eq.py
index bd6cace00a5..b840869ba48 100644
--- a/backends/arm/test/ops/test_eq.py
+++ b/backends/arm/test/ops/test_eq.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = Tuple[torch.Tensor]
@@ -77,8 +78,8 @@ def get_inputs(self):
@common.parametrize("test_module", test_data_tensor)
-def test_eq_scalar_tosa_MI_tensor(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_eq_scalar_tosa_FP_tensor(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
Equal.aten_op_Tensor,
@@ -88,8 +89,8 @@ def test_eq_scalar_tosa_MI_tensor(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_eq_scalar_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_eq_scalar_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
Equal.aten_op_Scalar,
@@ -99,8 +100,8 @@ def test_eq_scalar_tosa_MI(test_module):
@common.parametrize("test_module", test_data_tensor)
-def test_eq_scalar_tosa_BI_tensor(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_eq_scalar_tosa_INT_tensor(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
Equal.aten_op_Tensor,
@@ -110,8 +111,8 @@ def test_eq_scalar_tosa_BI_tensor(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_eq_scalar_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_eq_scalar_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
Equal.aten_op_Tensor,
@@ -122,7 +123,7 @@ def test_eq_scalar_tosa_BI(test_module):
@common.parametrize("test_module", test_data_tensor)
@common.XfailIfNoCorstone300
-def test_eq_scalar_u55_BI_tensor(test_module):
+def test_eq_scalar_u55_INT_tensor(test_module):
# EQUAL is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -136,7 +137,7 @@ def test_eq_scalar_u55_BI_tensor(test_module):
@common.parametrize("test_module", test_data_scalar)
@common.XfailIfNoCorstone300
-def test_eq_scalar_u55_BI(test_module):
+def test_eq_scalar_u55_INT(test_module):
# EQUAL is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -158,8 +159,8 @@ def test_eq_scalar_u55_BI(test_module):
strict=False,
)
@common.XfailIfNoCorstone320
-def test_eq_scalar_u85_BI_tensor(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_eq_scalar_u85_INT_tensor(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
Equal.aten_op_Tensor,
@@ -178,8 +179,8 @@ def test_eq_scalar_u85_BI_tensor(test_module):
strict=False,
)
@common.XfailIfNoCorstone320
-def test_eq_scalar_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_eq_scalar_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
Equal.aten_op_Tensor,
@@ -187,3 +188,47 @@ def test_eq_scalar_u85_BI(test_module):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_eq_scalar_vgf_FP_tensor(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(), test_module().get_inputs(), Equal.aten_op_Tensor, Equal.exir_op
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_eq_scalar_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(), test_module().get_inputs(), Equal.aten_op_Scalar, Equal.exir_op
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_eq_scalar_vgf_INT_tensor(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ Equal.aten_op_Tensor,
+ Equal.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_eq_scalar_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ Equal.aten_op_Tensor,
+ Equal.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_erf.py b/backends/arm/test/ops/test_erf.py
index e7136036c65..363b1e2d8c9 100644
--- a/backends/arm/test/ops/test_erf.py
+++ b/backends/arm/test/ops/test_erf.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.erf.default"
@@ -34,21 +35,21 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", Erf.test_data)
-def test_erf_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](Erf(), test_data(), aten_op, exir_op)
+def test_erf_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](Erf(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Erf.test_data)
-def test_erf_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](Erf(), test_data(), aten_op, exir_op)
+def test_erf_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](Erf(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", Erf.test_data)
@common.XfailIfNoCorstone300
-def test_erf_u55_BI(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_erf_u55_INT(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
Erf(), test_data(), aten_op, exir_op, run_on_fvp=True
)
pipeline.run()
@@ -56,8 +57,30 @@ def test_erf_u55_BI(test_data: input_t1):
@common.parametrize("test_data", Erf.test_data)
@common.XfailIfNoCorstone320
-def test_erf_u85_BI(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_erf_u85_INT(test_data: input_t1):
+ pipeline = EthosU85PipelineINT[input_t1](
Erf(), test_data(), aten_op, exir_op, run_on_fvp=True
)
pipeline.run()
+
+
+@common.parametrize("test_data", Erf.test_data)
+@common.SkipIfNoModelConverter
+def test_erf_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Erf(), test_data(), aten_op, exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Erf.test_data)
+@common.SkipIfNoModelConverter
+def test_erf_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Erf(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_exp.py b/backends/arm/test/ops/test_exp.py
index 9218455916a..6eaacc71d86 100644
--- a/backends/arm/test/ops/test_exp.py
+++ b/backends/arm/test/ops/test_exp.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
test_data_suite = {
@@ -38,8 +39,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", test_data_suite)
-def test_exp_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_exp_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Exp(),
(test_data(),),
aten_op,
@@ -49,8 +50,8 @@ def test_exp_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_exp_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_exp_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Exp(),
(test_data(),),
aten_op,
@@ -61,8 +62,8 @@ def test_exp_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_exp_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_exp_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Exp(),
(test_data(),),
aten_op,
@@ -74,8 +75,8 @@ def test_exp_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_exp_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_exp_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Exp(),
(test_data(),),
aten_op,
@@ -83,3 +84,29 @@ def test_exp_u85_BI(test_data: Tuple):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_exp_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Exp(),
+ (test_data(),),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_exp_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Exp(),
+ (test_data(),),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_expand.py b/backends/arm/test/ops/test_expand.py
index 8f84c39dd27..607d8650946 100644
--- a/backends/arm/test/ops/test_expand.py
+++ b/backends/arm/test/ops/test_expand.py
@@ -16,10 +16,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.expand.default"
@@ -48,8 +49,8 @@ def forward(self, x: torch.Tensor, m: Sequence):
@common.parametrize("test_data", Expand.test_parameters | Expand.test_reject_set)
-def test_expand_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_expand_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Expand(),
test_data(),
aten_op,
@@ -59,8 +60,8 @@ def test_expand_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", Expand.test_parameters | Expand.test_reject_set)
-def test_expand_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_expand_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Expand(),
test_data(),
aten_op,
@@ -78,8 +79,8 @@ def test_expand_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", Expand.test_parameters, x_fails)
@common.XfailIfNoCorstone300
-def test_expand_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_expand_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Expand(),
test_data(),
aten_op,
@@ -91,8 +92,8 @@ def test_expand_u55_BI(test_data: Tuple):
@common.parametrize("test_data", Expand.test_parameters, x_fails)
@common.XfailIfNoCorstone320
-def test_expand_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_expand_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Expand(),
test_data(),
aten_op,
@@ -102,13 +103,39 @@ def test_expand_u85_BI(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", Expand.test_parameters | Expand.test_reject_set)
+@common.SkipIfNoModelConverter
+def test_expand_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Expand(),
+ test_data(),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Expand.test_parameters | Expand.test_reject_set)
+@common.SkipIfNoModelConverter
+def test_expand_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Expand(),
+ test_data(),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
@common.parametrize("test_data", Expand.test_reject_set)
@common.XfailIfNoCorstone300
@pytest.mark.xfail(
reason="MLETORCH-716: Node will be optimized away and Vela can't handle empty graphs"
)
-def test_expand_u55_BI_failure_set(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_expand_u55_INT_failure_set(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Expand(),
test_data(),
aten_op,
@@ -123,8 +150,8 @@ def test_expand_u55_BI_failure_set(test_data: Tuple):
@pytest.mark.xfail(
reason="MLETORCH-716: Node will be optimized away and Vela can't handle empty graphs"
)
-def test_expand_u85_BI_failure_set(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_expand_u85_INT_failure_set(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Expand(),
test_data(),
aten_op,
diff --git a/backends/arm/test/ops/test_expm1.py b/backends/arm/test/ops/test_expm1.py
new file mode 100644
index 00000000000..dad95b24f7b
--- /dev/null
+++ b/backends/arm/test/ops/test_expm1.py
@@ -0,0 +1,113 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+aten_op = "torch.ops.aten.expm1.default"
+exir_op = "executorch_exir_dialects_edge__ops_aten_expm1_default"
+
+input_t1 = Tuple[torch.Tensor]
+
+test_data_suite = {
+ "zeroes": torch.zeros(1, 10, 10, 10),
+ "ones": torch.ones(10, 2, 3),
+ "rand": torch.rand(10, 10) - 0.5,
+ "near_zero": torch.randn(100) * 0.01,
+ "taylor_small": torch.empty(5).uniform_(
+ -0.35, 0.35
+ ), # test cases for taylor series expansion
+ "randn_large_pos": torch.randn(10) + 10,
+ "randn_large_neg": torch.randn(10) - 10,
+ "ramp": torch.arange(-16, 16, 0.2),
+}
+
+
+class Expm1(torch.nn.Module):
+
+ def forward(self, x: torch.Tensor):
+ return torch.expm1(x)
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_expm1_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
+ Expm1(),
+ (test_data,),
+ aten_op=aten_op,
+ exir_op=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_expm1_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
+ Expm1(),
+ (test_data,),
+ aten_op=aten_op,
+ exir_op=exir_op,
+ )
+ pipeline.run()
+
+
+@common.XfailIfNoCorstone300
+@common.parametrize("test_data", test_data_suite)
+def test_expm1_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
+ Expm1(),
+ (test_data,),
+ aten_ops=aten_op,
+ exir_ops=exir_op,
+ )
+ pipeline.run()
+
+
+@common.XfailIfNoCorstone320
+@common.parametrize("test_data", test_data_suite)
+def test_expm1_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
+ Expm1(),
+ (test_data,),
+ aten_ops=aten_op,
+ exir_ops=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_expm1_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Expm1(),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_expm1_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Expm1(),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_eye.py b/backends/arm/test/ops/test_eye.py
index ef9256a6a08..48f93379fc0 100644
--- a/backends/arm/test/ops/test_eye.py
+++ b/backends/arm/test/ops/test_eye.py
@@ -6,11 +6,12 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = tuple[torch.Tensor]
@@ -48,9 +49,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", EyeAdd.test_data)
-def test_eye_tosa_MI(test_data: test_data_t):
+def test_eye_tosa_FP(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
EyeAdd(*init_data),
input_data(),
EyeAdd.aten_op,
@@ -59,9 +60,9 @@ def test_eye_tosa_MI(test_data: test_data_t):
@common.parametrize("test_data", EyeAdd.test_data)
-def test_eye_tosa_BI(test_data: test_data_t):
+def test_eye_tosa_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
EyeAdd(*init_data),
input_data(),
EyeAdd.aten_op,
@@ -72,9 +73,9 @@ def test_eye_tosa_BI(test_data: test_data_t):
@common.parametrize("test_data", EyeAdd.test_data)
@common.XfailIfNoCorstone300
-def test_eye_u55_BI(test_data: test_data_t):
+def test_eye_u55_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
EyeAdd(*init_data),
input_data(),
EyeAdd.aten_op,
@@ -86,9 +87,9 @@ def test_eye_u55_BI(test_data: test_data_t):
@common.parametrize("test_data", EyeAdd.test_data)
@common.XfailIfNoCorstone320
-def test_eye_u85_BI(test_data: test_data_t):
+def test_eye_u85_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
EyeAdd(*init_data),
input_data(),
EyeAdd.aten_op,
@@ -98,6 +99,39 @@ def test_eye_u85_BI(test_data: test_data_t):
pipeline.run()
+@common.parametrize(
+ "test_data",
+ EyeAdd.test_data,
+)
+@common.SkipIfNoModelConverter
+def test_eye_vgf_FP(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ EyeAdd(*init_data),
+ input_data(),
+ EyeAdd.aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ EyeAdd.test_data,
+)
+@common.SkipIfNoModelConverter
+def test_eye_vgf_INT(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ EyeAdd(*init_data),
+ input_data(),
+ EyeAdd.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
@common.parametrize(
"test_data",
EyeAdd.test_data_not_delegated,
@@ -107,7 +141,7 @@ def test_eye_u85_BI(test_data: test_data_t):
"int32_int64": "MLETORCG-716: Do not delegate empty networks to vela",
},
)
-def test_eye_tosa_BI_not_delegated(test_data: test_data_t):
+def test_eye_tosa_INT_not_delegated(test_data: test_data_t):
input_data, init_data = test_data
pipeline = OpNotSupportedPipeline[input_t](
EyeAdd(*init_data), input_data(), non_delegated_ops={}, quantize=True
diff --git a/backends/arm/test/ops/test_floor.py b/backends/arm/test/ops/test_floor.py
index 87c9ae8d4bd..c66ef1c5d27 100644
--- a/backends/arm/test/ops/test_floor.py
+++ b/backends/arm/test/ops/test_floor.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor]
@@ -43,9 +44,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data)
-def test_floor_tosa_MI(test_data: input_t1):
+def test_floor_tosa_FP(test_data: input_t1):
module, data = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
module,
(data,),
module.aten_op,
@@ -55,9 +56,9 @@ def test_floor_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", test_data)
-def test_floor_tosa_BI(test_data: input_t1):
+def test_floor_tosa_INT(test_data: input_t1):
module, data = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
module,
(data,),
module.aten_op,
@@ -70,9 +71,9 @@ def test_floor_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", test_data)
@common.XfailIfNoCorstone300
-def test_floor_u55_BI(test_data: input_t1):
+def test_floor_u55_INT(test_data: input_t1):
module, data = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
module,
(data,),
module.aten_op,
@@ -84,9 +85,9 @@ def test_floor_u55_BI(test_data: input_t1):
@common.parametrize("test_data", test_data)
@common.XfailIfNoCorstone320
-def test_floor_u85_BI(test_data: input_t1):
+def test_floor_u85_INT(test_data: input_t1):
module, data = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
module,
(data,),
module.aten_op,
@@ -94,3 +95,33 @@ def test_floor_u85_BI(test_data: input_t1):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data)
+@common.SkipIfNoModelConverter
+def test_floor_vgf_FP(test_data: input_t1):
+ module, data = test_data()
+ pipeline = VgfPipeline[input_t1](
+ module,
+ (data,),
+ module.aten_op,
+ module.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data)
+@common.SkipIfNoModelConverter
+def test_floor_vgf_INT(test_data: input_t1):
+ module, data = test_data()
+ pipeline = VgfPipeline[input_t1](
+ module,
+ (data,),
+ module.aten_op,
+ module.exir_op,
+ atol=0.06,
+ rtol=0.01,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_full.py b/backends/arm/test/ops/test_full.py
index 13a3146f2fe..9e2c9b4d8be 100644
--- a/backends/arm/test/ops/test_full.py
+++ b/backends/arm/test/ops/test_full.py
@@ -15,10 +15,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor, int]
@@ -76,8 +77,8 @@ def forward(self, input_tensor: torch.Tensor, value):
return input_tensor + torch.full_like(input_tensor, value)
-def test_full_tosa_MI_only():
- pipeline = TosaPipelineMI[input_t1](
+def test_full_tosa_FP_only():
+ pipeline = TosaPipelineFP[input_t1](
Full(),
(),
aten_op=[],
@@ -86,9 +87,9 @@ def test_full_tosa_MI_only():
pipeline.run()
-def test_full_tosa_MI_const():
+def test_full_tosa_FP_const():
test_data = (torch.rand((2, 2, 3, 3)) * 10,)
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
AddConstFull(),
test_data,
aten_op=[],
@@ -98,8 +99,8 @@ def test_full_tosa_MI_const():
@common.parametrize("test_data", FullLike.test_parameters)
-def test_full_like_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_full_like_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
FullLike(),
test_data(),
aten_op=[],
@@ -108,9 +109,21 @@ def test_full_like_tosa_MI(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", FullLike.test_parameters)
+def test_full_like_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
+ FullLike(),
+ test_data(),
+ aten_op=[],
+ exir_op=exir_op,
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
@common.parametrize("test_data", AddVariableFull.test_parameters)
-def test_full_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_full_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
AddVariableFull(),
test_data,
aten_op=[],
@@ -120,8 +133,8 @@ def test_full_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", AddVariableFull.test_parameters)
-def test_full_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_full_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
AddVariableFull(),
test_data,
aten_op=[],
@@ -130,22 +143,61 @@ def test_full_tosa_BI(test_data: Tuple):
pipeline.run()
-@common.parametrize("test_data", FullLike.test_parameters)
-def test_full_like_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
- FullLike(),
- test_data(),
+@common.SkipIfNoModelConverter
+def test_full_vgf_FP_only():
+ pipeline = VgfPipeline[input_t1](
+ Full(),
+ (),
aten_op=[],
exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_full_vgf_FP_const():
+ test_data = (torch.rand((2, 2, 3, 3)) * 10,)
+ pipeline = VgfPipeline[input_t1](
+ AddConstFull(),
+ test_data,
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AddVariableFull.test_parameters)
+@common.SkipIfNoModelConverter
+def test_full_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ AddVariableFull(),
+ test_data,
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AddVariableFull.test_parameters)
+@common.SkipIfNoModelConverter
+def test_full_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ AddVariableFull(),
+ test_data,
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+INT",
)
- pipeline.pop_stage("check.quant_nodes")
pipeline.run()
@common.parametrize("test_data", AddVariableFull.test_parameters)
@common.XfailIfNoCorstone320
-def test_full_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_full_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
AddVariableFull(),
test_data,
aten_ops=[],
@@ -158,8 +210,8 @@ def test_full_u85_BI(test_data: Tuple):
@common.parametrize("test_data", AddVariableFull.test_parameters)
@common.XfailIfNoCorstone300
-def test_full_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_full_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
AddVariableFull(),
test_data,
aten_ops=[],
@@ -174,9 +226,9 @@ def test_full_u55_BI(test_data: Tuple):
@pytest.mark.skip(
"This fails since full outputs int64 by default if 'fill_value' is integer, which our backend doesn't support."
)
-def test_full_tosa_MI_integer_value():
+def test_full_tosa_FP_integer_value():
test_data = (torch.ones((2, 2)), 1.0)
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
AddVariableFull(),
test_data,
aten_op=[],
@@ -191,9 +243,9 @@ def test_full_tosa_MI_integer_value():
@pytest.mark.skip(
"This fails since the fill value in the full tensor is set at compile time by the example data (1.)."
)
-def test_full_tosa_MI_set_value_at_runtime(tosa_version: str):
+def test_full_tosa_FP_set_value_at_runtime(tosa_version: str):
test_data = (torch.ones((2, 2)), 1.0)
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
AddVariableFull(),
test_data,
aten_op=[],
diff --git a/backends/arm/test/ops/test_ge.py b/backends/arm/test/ops/test_ge.py
index 19c036be526..c66f6d164b9 100644
--- a/backends/arm/test/ops/test_ge.py
+++ b/backends/arm/test/ops/test_ge.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = Tuple[torch.Tensor]
@@ -77,8 +78,8 @@ def get_inputs(self):
@common.parametrize("test_module", test_data_tensor)
-def test_ge_tensor_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_ge_tensor_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
GreaterEqual.aten_op_tensor,
@@ -88,8 +89,8 @@ def test_ge_tensor_tosa_MI(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_ge_scalar_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_ge_scalar_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
GreaterEqual.aten_op_scalar,
@@ -99,8 +100,8 @@ def test_ge_scalar_tosa_MI(test_module):
@common.parametrize("test_module", test_data_tensor)
-def test_ge_tensor_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_ge_tensor_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
GreaterEqual.aten_op_tensor,
@@ -110,8 +111,8 @@ def test_ge_tensor_tosa_BI(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_ge_scalar_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_ge_scalar_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
GreaterEqual.aten_op_tensor,
@@ -122,7 +123,7 @@ def test_ge_scalar_tosa_BI(test_module):
@common.parametrize("test_module", test_data_tensor)
@common.XfailIfNoCorstone300
-def test_ge_tensor_u55_BI(test_module):
+def test_ge_tensor_u55_INT(test_module):
# GREATER_EQUAL is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -136,7 +137,7 @@ def test_ge_tensor_u55_BI(test_module):
@common.parametrize("test_module", test_data_scalar)
@common.XfailIfNoCorstone300
-def test_ge_scalar_u55_BI(test_module):
+def test_ge_scalar_u55_INT(test_module):
# GREATER_EQUAL is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -155,8 +156,8 @@ def test_ge_scalar_u55_BI(test_module):
xfails={"ge_tensor_rank4_randn": "MLETORCH-847: Boolean eq result unstable on U85"},
)
@common.XfailIfNoCorstone320
-def test_ge_tensor_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_ge_tensor_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
GreaterEqual.aten_op_tensor,
@@ -172,8 +173,8 @@ def test_ge_tensor_u85_BI(test_module):
xfails={"ge_scalar_rank4_randn": "MLETORCH-847: Boolean eq result unstable on U85"},
)
@common.XfailIfNoCorstone320
-def test_ge_scalar_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_ge_scalar_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
GreaterEqual.aten_op_tensor,
@@ -181,3 +182,55 @@ def test_ge_scalar_u85_BI(test_module):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_ge_tensor_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ GreaterEqual.aten_op_tensor,
+ GreaterEqual.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_ge_tensor_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ GreaterEqual.aten_op_tensor,
+ GreaterEqual.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_ge_scalar_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ GreaterEqual.aten_op_scalar,
+ GreaterEqual.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_ge_scalar_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ GreaterEqual.aten_op_tensor,
+ GreaterEqual.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_gelu.py b/backends/arm/test/ops/test_gelu.py
index 6ac9b5dabf5..264f6b95e71 100644
--- a/backends/arm/test/ops/test_gelu.py
+++ b/backends/arm/test/ops/test_gelu.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor]
@@ -81,9 +82,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", Gelu.test_data)
-def test_gelu_tosa_MI(test_data: input_t1):
+def test_gelu_tosa_FP(test_data: input_t1):
approximate, test_data = test_data()
- TosaPipelineMI[input_t1](
+ TosaPipelineFP[input_t1](
Gelu(approximate),
(test_data,),
Gelu.aten_op,
@@ -93,9 +94,9 @@ def test_gelu_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", Gelu.test_data)
-def test_gelu_tosa_BI(test_data: input_t1):
+def test_gelu_tosa_INT(test_data: input_t1):
approximate, test_data = test_data()
- TosaPipelineBI[input_t1](
+ TosaPipelineINT[input_t1](
Gelu(approximate),
(test_data,),
Gelu.aten_op,
@@ -105,9 +106,9 @@ def test_gelu_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", Gelu.test_data)
@common.XfailIfNoCorstone300
-def test_gelu_u55_BI(test_data: input_t1):
+def test_gelu_u55_INT(test_data: input_t1):
approximate, test_data = test_data()
- EthosU55PipelineBI[input_t1](
+ EthosU55PipelineINT[input_t1](
Gelu(approximate),
(test_data,),
Gelu.aten_op,
@@ -117,11 +118,39 @@ def test_gelu_u55_BI(test_data: input_t1):
@common.parametrize("test_data", Gelu.test_data)
@common.XfailIfNoCorstone320
-def test_gelu_u85_BI(test_data: input_t1):
+def test_gelu_u85_INT(test_data: input_t1):
approximate, test_data = test_data()
- EthosU85PipelineBI[input_t1](
+ EthosU85PipelineINT[input_t1](
Gelu(approximate),
(test_data,),
Gelu.aten_op,
Gelu.exir_op,
).run()
+
+
+@common.parametrize("test_data", Gelu.test_data)
+@common.SkipIfNoModelConverter
+def test_gelu_vgf_FP(test_data: input_t1):
+ approximate, data = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Gelu(approximate),
+ (data,),
+ Gelu.aten_op,
+ Gelu.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Gelu.test_data)
+@common.SkipIfNoModelConverter
+def test_gelu_vgf_INT(test_data: input_t1):
+ approximate, data = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Gelu(approximate),
+ (data,),
+ Gelu.aten_op,
+ Gelu.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_glu.py b/backends/arm/test/ops/test_glu.py
new file mode 100644
index 00000000000..c19fb892c92
--- /dev/null
+++ b/backends/arm/test/ops/test_glu.py
@@ -0,0 +1,130 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+import torch.nn.functional as F
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+aten_op = "torch.ops.aten.glu.default"
+exir_op = "executorch_exir_dialects_edge__ops_aten__glu_default"
+
+
+input_t1 = Tuple[torch.Tensor]
+
+test_data_suite = {
+ "zeros": [torch.zeros(10, 10, 2), -1],
+ "ones": [torch.ones(10, 10, 2), -1],
+ "rand": [torch.rand(10, 10, 2) - 0.5, -1],
+ "randn_pos": [torch.randn(10, 2) + 10, -1],
+ "randn_neg": [torch.randn(10, 2) - 10, -1],
+ "ramp": [torch.linspace(-16, 15.8, 160).reshape(-1, 2), -1],
+ "zeros_custom_dim": [torch.zeros(7, 10, 5), 1],
+ "rand_custom_dim": [torch.rand(10, 3, 3) - 0.5, 0],
+}
+
+
+class Glu(torch.nn.Module):
+
+ def forward(self, a: torch.Tensor, dim: int) -> torch.Tensor:
+ return F.glu(a, dim=dim)
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+)
+def test_glu_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
+ Glu(),
+ (*test_data,),
+ aten_op,
+ exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+)
+def test_glu_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
+ Glu(),
+ (*test_data,),
+ aten_op=[],
+ exir_op=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+)
+@common.XfailIfNoCorstone300
+def test_glu_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
+ Glu(),
+ (*test_data,),
+ aten_ops=[],
+ exir_ops=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+)
+@common.XfailIfNoCorstone320
+def test_glu_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
+ Glu(),
+ (*test_data,),
+ aten_ops=[],
+ exir_ops=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+)
+@common.SkipIfNoModelConverter
+def test_glu_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Glu(),
+ (*test_data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+)
+@common.SkipIfNoModelConverter
+def test_glu_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Glu(),
+ (*test_data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_group_norm.py b/backends/arm/test/ops/test_group_norm.py
index 9c5517d9dae..5fa4cd328de 100644
--- a/backends/arm/test/ops/test_group_norm.py
+++ b/backends/arm/test/ops/test_group_norm.py
@@ -6,10 +6,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -61,10 +62,10 @@ def forward(
@common.parametrize("test_data", test_data_suite)
-def test_native_group_norm_tosa_MI(test_data):
+def test_native_group_norm_tosa_FP(test_data):
aten_op = "torch.ops.aten.group_norm.default"
exir_op = "executorch_exir_dialects_edge__ops_aten_native_group_norm_default"
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
test_data[1],
test_data[0],
aten_op=aten_op,
@@ -84,10 +85,10 @@ def test_native_group_norm_tosa_MI(test_data):
},
strict=False,
)
-def test_native_group_norm_tosa_BI(test_data):
+def test_native_group_norm_tosa_INT(test_data):
aten_op = "torch.ops.aten.sub.Tensor" # 'sub' op arbitrarily chosen to confirm groupnorm was decomposed
exir_op = "executorch_exir_dialects_edge__ops_aten_native_group_norm_default"
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
test_data[1],
test_data[0],
aten_op=aten_op,
@@ -109,8 +110,8 @@ def test_native_group_norm_tosa_BI(test_data):
strict=False,
)
@common.XfailIfNoCorstone300
-def test_native_group_norm_u55_BI(test_data):
- pipeline = EthosU55PipelineBI[input_t](
+def test_native_group_norm_u55_INT(test_data):
+ pipeline = EthosU55PipelineINT[input_t](
test_data[1],
test_data[0],
"torch.ops.aten.sub.Tensor", # 'sub' op arbitrarily chosen to confirm groupnorm was decomposed
@@ -133,8 +134,8 @@ def test_native_group_norm_u55_BI(test_data):
strict=False,
)
@common.XfailIfNoCorstone320
-def test_native_group_norm_u85_BI(test_data):
- pipeline = EthosU85PipelineBI[input_t](
+def test_native_group_norm_u85_INT(test_data):
+ pipeline = EthosU85PipelineINT[input_t](
test_data[1],
test_data[0],
"torch.ops.aten.sub.Tensor", # 'sub' op arbitrarily chosen to confirm groupnorm was decomposed
@@ -143,3 +144,56 @@ def test_native_group_norm_u85_BI(test_data):
)
pipeline.change_args("run_method_and_compare_outputs", atol=1, qtol=1)
pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+ xfails={
+ "randn_1_12_8_6_groups_12": "MLETORCH-925: Fix numerical issue",
+ "rand_6_8_10_12_groups_1": "MLETORCH-925: Fix numerical issue",
+ "rand_6_8_10_12_groups_4_no_affine": "MLETORCH-925: Fix numerical issue",
+ "rand_6_8_10_12_groups_8": "MLETORCH-925: Fix numerical issue",
+ },
+ strict=False,
+)
+@common.SkipIfNoModelConverter
+def test_native_group_norm_vgf_FP(test_data):
+ aten_op = "torch.ops.aten.group_norm.default"
+ exir_op = "executorch_exir_dialects_edge__ops_aten_native_group_norm_default"
+ model, inp = test_data
+ pipeline = VgfPipeline[input_t](
+ inp,
+ model,
+ aten_op=aten_op,
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+ xfails={
+ "randn_1_12_8_6_groups_12": "MLETORCH-925: Fix numerical issue",
+ "rand_6_8_10_12_groups_1": "MLETORCH-925: Fix numerical issue",
+ "rand_6_8_10_12_groups_4_no_affine": "MLETORCH-925: Fix numerical issue",
+ "rand_6_8_10_12_groups_8": "MLETORCH-925: Fix numerical issue",
+ },
+ strict=False,
+)
+@common.SkipIfNoModelConverter
+def test_native_group_norm_vgf_INT(test_data):
+ aten_op = "torch.ops.aten.sub.Tensor"
+ exir_op = "executorch_exir_dialects_edge__ops_aten_native_group_norm_default"
+ model, inp = test_data
+ pipeline = VgfPipeline[input_t](
+ inp,
+ model,
+ aten_op=aten_op,
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+INT",
+ atol=0.1, # TODO: "MLETORCH-925: Fix numerical issue for aten.native_group_norm"
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_gt.py b/backends/arm/test/ops/test_gt.py
index 0a1b97928fd..83c85e5f9fc 100644
--- a/backends/arm/test/ops/test_gt.py
+++ b/backends/arm/test/ops/test_gt.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -78,8 +79,8 @@ def get_inputs(self):
@common.parametrize("test_module", test_data_tensor)
-def test_gt_tensor_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_gt_tensor_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
Greater.aten_op_tensor,
@@ -89,8 +90,8 @@ def test_gt_tensor_tosa_MI(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_gt_scalar_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_gt_scalar_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
Greater.aten_op_scalar,
@@ -100,8 +101,8 @@ def test_gt_scalar_tosa_MI(test_module):
@common.parametrize("test_module", test_data_tensor)
-def test_gt_tensor_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_gt_tensor_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
Greater.aten_op_tensor,
@@ -111,8 +112,8 @@ def test_gt_tensor_tosa_BI(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_gt_scalar_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_gt_scalar_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
Greater.aten_op_tensor,
@@ -123,7 +124,7 @@ def test_gt_scalar_tosa_BI(test_module):
@common.parametrize("test_module", test_data_tensor)
@common.XfailIfNoCorstone300
-def test_gt_tensor_u55_BI(test_module):
+def test_gt_tensor_u55_INT(test_module):
# Greater is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -137,7 +138,7 @@ def test_gt_tensor_u55_BI(test_module):
@common.parametrize("test_module", test_data_scalar)
@common.XfailIfNoCorstone300
-def test_gt_scalar_u55_BI(test_module):
+def test_gt_scalar_u55_INT(test_module):
# Greater is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -158,8 +159,8 @@ def test_gt_scalar_u55_BI(test_module):
},
)
@common.XfailIfNoCorstone320
-def test_gt_tensor_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_gt_tensor_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
Greater.aten_op_tensor,
@@ -177,8 +178,8 @@ def test_gt_tensor_u85_BI(test_module):
},
)
@common.XfailIfNoCorstone320
-def test_gt_scalar_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_gt_scalar_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
Greater.aten_op_tensor,
@@ -186,3 +187,55 @@ def test_gt_scalar_u85_BI(test_module):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_gt_tensor_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ Greater.aten_op_tensor,
+ Greater.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_gt_scalar_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ Greater.aten_op_scalar,
+ Greater.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_gt_tensor_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ Greater.aten_op_tensor,
+ Greater.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_gt_scalar_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ Greater.aten_op_tensor,
+ Greater.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_hardsigmoid.py b/backends/arm/test/ops/test_hardsigmoid.py
index 399c6088e89..5f591c15617 100644
--- a/backends/arm/test/ops/test_hardsigmoid.py
+++ b/backends/arm/test/ops/test_hardsigmoid.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.hardsigmoid.default"
@@ -40,8 +41,8 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_hardsigmoid_tosa_MI(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](
+def test_hardsigmoid_tosa_FP(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](
Hardsigmoid(),
(test_data(),),
aten_op,
@@ -51,8 +52,8 @@ def test_hardsigmoid_tosa_MI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_hardsigmoid_tosa_BI(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_hardsigmoid_tosa_INT(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Hardsigmoid(),
(test_data(),),
aten_op,
@@ -63,8 +64,8 @@ def test_hardsigmoid_tosa_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_hardsigmoid_u55_BI(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_hardsigmoid_u55_INT(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Hardsigmoid(),
(test_data(),),
aten_op,
@@ -77,8 +78,8 @@ def test_hardsigmoid_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_hardsigmoid_u85_BI(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_hardsigmoid_u85_INT(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Hardsigmoid(),
(test_data(),),
aten_op,
@@ -87,3 +88,25 @@ def test_hardsigmoid_u85_BI(test_data: torch.Tensor):
use_to_edge_transform_and_lower=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_hardsigmoid_vgf_FP(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Hardsigmoid(), (test_data(),), aten_op, exir_op=[], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_hardsigmoid_vgf_INT(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Hardsigmoid(),
+ (test_data(),),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_hardswish.py b/backends/arm/test/ops/test_hardswish.py
index bd61346e3db..00db0cb296b 100644
--- a/backends/arm/test/ops/test_hardswish.py
+++ b/backends/arm/test/ops/test_hardswish.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.hardswish.default"
@@ -42,21 +43,21 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_hardswish_tosa_MI(test_data):
- pipeline = TosaPipelineMI[input_t1](Hardswish(), (test_data(),), aten_op, exir_op)
+def test_hardswish_tosa_FP(test_data):
+ pipeline = TosaPipelineFP[input_t1](Hardswish(), (test_data(),), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_hardswish_tosa_BI(test_data):
- pipeline = TosaPipelineBI[input_t1](Hardswish(), (test_data(),), aten_op, exir_op)
+def test_hardswish_tosa_INT(test_data):
+ pipeline = TosaPipelineINT[input_t1](Hardswish(), (test_data(),), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_hardswish_u55_BI(test_data):
- EthosU55PipelineBI[input_t1](
+def test_hardswish_u55_INT(test_data):
+ EthosU55PipelineINT[input_t1](
Hardswish(),
(test_data(),),
aten_op,
@@ -68,8 +69,8 @@ def test_hardswish_u55_BI(test_data):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_hardswish_u85_BI(test_data):
- EthosU85PipelineBI[input_t1](
+def test_hardswish_u85_INT(test_data):
+ EthosU85PipelineINT[input_t1](
Hardswish(),
(test_data(),),
aten_op,
@@ -77,3 +78,25 @@ def test_hardswish_u85_BI(test_data):
run_on_fvp=True,
use_to_edge_transform_and_lower=True,
).run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_hardswish_vgf_FP(test_data):
+ pipeline = VgfPipeline[input_t1](
+ Hardswish(), (test_data(),), aten_op, exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_hardswish_vgf_INT(test_data):
+ pipeline = VgfPipeline[input_t1](
+ Hardswish(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_hardtanh.py b/backends/arm/test/ops/test_hardtanh.py
index 5c8cfffbb2d..28f7e717351 100644
--- a/backends/arm/test/ops/test_hardtanh.py
+++ b/backends/arm/test/ops/test_hardtanh.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
test_data_suite = {
@@ -46,14 +47,14 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_hardtanh_tosa_MI(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t](HardTanh(), (test_data(),), aten_op, exir_op)
+def test_hardtanh_tosa_FP(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t](HardTanh(), (test_data(),), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_hardtanh_tosa_BI(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t](
+def test_hardtanh_tosa_INT(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t](
HardTanh(),
(test_data(),),
aten_op,
@@ -64,8 +65,8 @@ def test_hardtanh_tosa_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_hardtanh_u55_BI(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t](
+def test_hardtanh_u55_INT(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t](
HardTanh(),
(test_data(),),
aten_op,
@@ -77,8 +78,8 @@ def test_hardtanh_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_hardtanh_u85_BI(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t](
+def test_hardtanh_u85_INT(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t](
HardTanh(),
(test_data(),),
aten_op,
@@ -86,3 +87,25 @@ def test_hardtanh_u85_BI(test_data: torch.Tensor):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_hardtanh_vgf_FP(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t](
+ HardTanh(), (test_data(),), aten_op, exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_hardtanh_vgf_INT(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t](
+ HardTanh(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_index_select.py b/backends/arm/test/ops/test_index_select.py
index a3045e421aa..95ebaa62a38 100644
--- a/backends/arm/test/ops/test_index_select.py
+++ b/backends/arm/test/ops/test_index_select.py
@@ -9,9 +9,13 @@
import pytest
import torch
+
+from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ OpNotSupportedPipeline,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -78,19 +82,19 @@ def forward(self, input_: torch.Tensor, dim, index_: torch.Tensor):
@pytest.mark.parametrize("test_data", list(test_data.values()))
-def test_index_select_tosa_MI(test_data: input_params):
+def test_index_select_tosa_FP(test_data: input_params):
op, test_input = test_data
- pipeline = TosaPipelineMI[input_params](
+ pipeline = TosaPipelineFP[input_params](
op, test_input, op.aten_op, op.exir_op, use_to_edge_transform_and_lower=True
)
pipeline.run()
@pytest.mark.parametrize("test_data", list(test_data.values())[:-1])
-def test_index_select_tosa_BI(test_data: input_params):
+def test_index_select_tosa_INT(test_data: input_params):
op, test_input = test_data
- pipeline = TosaPipelineBI[input_params](
+ pipeline = TosaPipelineINT[input_params](
op,
test_input,
op.aten_op,
@@ -101,10 +105,10 @@ def test_index_select_tosa_BI(test_data: input_params):
@pytest.mark.parametrize("test_data", list(test_data.values())[-1:])
-def test_index_select_tosa_BI_rand(test_data: input_params):
+def test_index_select_tosa_INT_rand(test_data: input_params):
op, test_input = test_data
- pipeline = TosaPipelineBI[input_params](
+ pipeline = TosaPipelineINT[input_params](
op,
test_input,
op.aten_op,
@@ -115,3 +119,63 @@ def test_index_select_tosa_BI_rand(test_data: input_params):
"run_method_and_compare_outputs", inputs=test_input, atol=0.9, rtol=0.2, qtol=1
)
pipeline.run()
+
+
+@pytest.mark.parametrize("test_data", list(test_data.values())[-1:])
+def test_index_select_u55_INT_not_delegated(test_data: input_params):
+ op, test_input = test_data
+
+ pipeline = OpNotSupportedPipeline[input_params](
+ op,
+ test_input,
+ {op.exir_op: 1},
+ quantize=True,
+ u55_subset=True,
+ )
+ pipeline.run()
+
+
+@pytest.mark.parametrize("test_data", list(test_data.values()))
+@common.SkipIfNoModelConverter
+def test_index_select_vgf_FP(test_data: input_params):
+ op, inp = test_data
+ pipeline = VgfPipeline[input_params](
+ op,
+ inp,
+ op.aten_op,
+ op.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@pytest.mark.parametrize("test_data", list(test_data.values())[:-1])
+@common.SkipIfNoModelConverter
+def test_index_select_vgf_INT(test_data: input_params):
+ op, inp = test_data
+ pipeline = VgfPipeline[input_params](
+ op,
+ inp,
+ op.aten_op,
+ op.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@pytest.mark.parametrize("test_data", list(test_data.values())[-1:])
+@common.SkipIfNoModelConverter
+def test_index_select_vgf_INT_rand(test_data: input_params):
+ op, inp = test_data
+ pipeline = VgfPipeline[input_params](
+ op,
+ inp,
+ op.aten_op,
+ op.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ # TODO: MLETORCH-1136 Change args of run_method_and_compare_outputs of the vgf tests
+ # pipeline.change_args(
+ # "run_method_and_compare_outputs", inputs=test_input, atol=0.9, rtol=0.2, qtol=1
+ # )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_index_tensor.py b/backends/arm/test/ops/test_index_tensor.py
index f1f6f5171d8..557846922b8 100644
--- a/backends/arm/test/ops/test_index_tensor.py
+++ b/backends/arm/test/ops/test_index_tensor.py
@@ -10,8 +10,9 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ OpNotSupportedPipeline,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
@@ -102,11 +103,11 @@ def forward(
"test_4d_ellipsis_middle": "Ellipsis before index unsupported",
},
)
-def test_index_tensor_tosa_MI_ellipsis(test_data: input_params):
+def test_index_tensor_tosa_FP_ellipsis(test_data: input_params):
test_input = test_data
with torch.no_grad():
(
- TosaPipelineMI[input_params](
+ TosaPipelineFP[input_params](
IndexTensor_Ellipsis(),
test_input,
IndexTensorTestCommon.aten_op,
@@ -126,11 +127,11 @@ def test_index_tensor_tosa_MI_ellipsis(test_data: input_params):
"test_4d_ellipsis_middle": "Ellipsis before index unsupported",
},
)
-def test_index_tensor_tosa_BI_ellipsis(test_data: input_params):
+def test_index_tensor_tosa_INT_ellipsis(test_data: input_params):
test_input = test_data
with torch.no_grad():
(
- TosaPipelineBI[input_params](
+ TosaPipelineINT[input_params](
IndexTensor_Ellipsis(),
test_input,
IndexTensorTestCommon.aten_op,
@@ -216,11 +217,11 @@ def forward(
"test_4d_slice_middle": "Slice before index unsupported",
},
)
-def test_index_tensor_tosa_MI_slice(test_data: input_params_slice):
+def test_index_tensor_tosa_FP_slice(test_data: input_params_slice):
test_input = test_data
with torch.no_grad():
(
- TosaPipelineMI[input_params_slice](
+ TosaPipelineFP[input_params_slice](
IndexTensor_Slice(),
test_input,
IndexTensorTestCommon.aten_op,
@@ -241,11 +242,11 @@ def test_index_tensor_tosa_MI_slice(test_data: input_params_slice):
"test_4d_slice_middle": "Slice before index unsupported",
},
)
-def test_index_tensor_tosa_BI_slice(test_data: input_params_slice):
+def test_index_tensor_tosa_INT_slice(test_data: input_params_slice):
test_input = test_data
with torch.no_grad():
(
- TosaPipelineBI[input_params_slice](
+ TosaPipelineINT[input_params_slice](
IndexTensor_Slice(),
test_input,
IndexTensorTestCommon.aten_op,
@@ -383,11 +384,11 @@ def forward(self, input_: torch.Tensor, indices: Tuple[None | torch.Tensor]):
@common.parametrize("test_data", IndexTensor.test_data)
-def test_index_tensor_tosa_MI(test_data: input_params):
+def test_index_tensor_tosa_FP(test_data: input_params):
test_input = test_data
with torch.no_grad():
(
- TosaPipelineMI[input_params](
+ TosaPipelineFP[input_params](
IndexTensor(),
test_input,
IndexTensorTestCommon.aten_op,
@@ -399,11 +400,11 @@ def test_index_tensor_tosa_MI(test_data: input_params):
@common.parametrize("test_data", IndexTensor.test_data)
-def test_index_tensor_tosa_BI(test_data: input_params):
+def test_index_tensor_tosa_INT(test_data: input_params):
test_input = test_data
with torch.no_grad():
(
- TosaPipelineBI[input_params](
+ TosaPipelineINT[input_params](
IndexTensor(),
test_input,
IndexTensorTestCommon.aten_op,
@@ -423,11 +424,11 @@ def test_index_tensor_tosa_BI(test_data: input_params):
"test_3d_3_idx_with_none_middle": "None (Unsqueeze) unsupported",
},
)
-def test_index_tensor_tosa_MI_none(test_data: input_params):
+def test_index_tensor_tosa_FP_none(test_data: input_params):
test_input = test_data
with torch.no_grad():
(
- TosaPipelineMI[input_params](
+ TosaPipelineFP[input_params](
IndexTensor(),
test_input,
IndexTensorTestCommon.aten_op,
@@ -449,14 +450,29 @@ def test_index_tensor_tosa_MI_none(test_data: input_params):
"test_3d_3_idx_with_none_middle": "None (Unsqueeze) unsupported",
},
)
-def test_index_tensor_tosa_BI_none(test_data: input_params):
+def test_index_tensor_tosa_INT_none(test_data: input_params):
test_input = test_data
with torch.no_grad():
(
- TosaPipelineBI[input_params](
+ TosaPipelineINT[input_params](
IndexTensor(),
test_input,
IndexTensorTestCommon.aten_op,
IndexTensorTestCommon.exir_op,
).run()
)
+
+
+@common.parametrize("test_data", IndexTensor.test_data)
+@common.XfailIfNoCorstone300
+def test_index_tensor_u55_INT_not_delegated(test_data: input_params):
+ """Ethos-U55 backend BI pipeline test for index.Tensor"""
+ test_input = test_data
+ with torch.no_grad():
+ OpNotSupportedPipeline[input_params](
+ IndexTensor(),
+ test_input,
+ {IndexTensorTestCommon.exir_op: 1},
+ quantize=True,
+ u55_subset=True,
+ ).run()
diff --git a/backends/arm/test/ops/test_layer_norm.py b/backends/arm/test/ops/test_layer_norm.py
index 8d31ef992cb..2c9b83dc7e7 100644
--- a/backends/arm/test/ops/test_layer_norm.py
+++ b/backends/arm/test/ops/test_layer_norm.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -64,9 +65,9 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_native_layer_norm_tosa_MI(test_data):
+def test_native_layer_norm_tosa_FP(test_data):
test_data, model = test_data()
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
model,
test_data,
"torch.ops.aten.layer_norm.default",
@@ -75,9 +76,9 @@ def test_native_layer_norm_tosa_MI(test_data):
@common.parametrize("test_data", test_data_suite)
-def test_native_layer_norm_tosa_BI(test_data):
+def test_native_layer_norm_tosa_INT(test_data):
test_data, model = test_data()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
test_data,
"torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition
@@ -88,9 +89,9 @@ def test_native_layer_norm_tosa_BI(test_data):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_native_layer_norm_u55_BI(test_data):
+def test_native_layer_norm_u55_INT(test_data):
test_data, model = test_data()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
test_data,
"torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition
@@ -102,9 +103,9 @@ def test_native_layer_norm_u55_BI(test_data):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_native_layer_norm_u85_BI(test_data):
+def test_native_layer_norm_u85_INT(test_data):
test_data, model = test_data()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
test_data,
"torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition
@@ -112,3 +113,29 @@ def test_native_layer_norm_u85_BI(test_data):
symmetric_io_quantization=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_native_layer_norm_vgf_FP(test_data):
+ test_input, model = test_data()
+ pipeline = VgfPipeline[input_t](
+ model,
+ test_input,
+ "torch.ops.aten.layer_norm.default",
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_native_layer_norm_vgf_INT(test_data):
+ test_input, model = test_data()
+ pipeline = VgfPipeline[input_t](
+ model,
+ test_input,
+ "torch.ops.aten.sub.Tensor",
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_le.py b/backends/arm/test/ops/test_le.py
index b48bad8248b..6cb185ecb92 100644
--- a/backends/arm/test/ops/test_le.py
+++ b/backends/arm/test/ops/test_le.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -78,8 +79,8 @@ def get_inputs(self):
@common.parametrize("test_module", test_data_tensor)
-def test_le_tensor_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_le_tensor_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
LessEqual.aten_op_tensor,
@@ -89,8 +90,8 @@ def test_le_tensor_tosa_MI(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_le_scalar_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_le_scalar_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
LessEqual.aten_op_scalar,
@@ -100,8 +101,8 @@ def test_le_scalar_tosa_MI(test_module):
@common.parametrize("test_module", test_data_tensor)
-def test_le_tensor_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_le_tensor_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
LessEqual.aten_op_tensor,
@@ -111,8 +112,8 @@ def test_le_tensor_tosa_BI(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_le_scalar_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_le_scalar_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
LessEqual.aten_op_tensor,
@@ -123,7 +124,7 @@ def test_le_scalar_tosa_BI(test_module):
@common.parametrize("test_module", test_data_tensor)
@common.XfailIfNoCorstone300
-def test_le_tensor_u55_BI_not_delegated(test_module):
+def test_le_tensor_u55_INT_not_delegated(test_module):
# GREATER_EQUAL is not supported on U55. LE uses the GREATER_EQUAL Tosa operator.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -137,7 +138,7 @@ def test_le_tensor_u55_BI_not_delegated(test_module):
@common.parametrize("test_module", test_data_scalar)
@common.XfailIfNoCorstone300
-def test_le_scalar_u55_BI_not_delegated(test_module):
+def test_le_scalar_u55_INT_not_delegated(test_module):
# GREATER_EQUAL is not supported on U55. LE uses the GREATER_EQUAL Tosa operator.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -159,8 +160,8 @@ def test_le_scalar_u55_BI_not_delegated(test_module):
},
)
@common.XfailIfNoCorstone320
-def test_le_tensor_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_le_tensor_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
LessEqual.aten_op_tensor,
@@ -179,8 +180,8 @@ def test_le_tensor_u85_BI(test_module):
},
)
@common.XfailIfNoCorstone320
-def test_le_scalar_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_le_scalar_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
LessEqual.aten_op_tensor,
@@ -189,3 +190,55 @@ def test_le_scalar_u85_BI(test_module):
use_to_edge_transform_and_lower=True,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_le_tensor_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ LessEqual.aten_op_tensor,
+ LessEqual.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_le_tensor_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ LessEqual.aten_op_tensor,
+ LessEqual.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_le_scalar_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ LessEqual.aten_op_scalar,
+ LessEqual.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_le_scalar_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ LessEqual.aten_op_tensor,
+ LessEqual.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_leaky_relu.py b/backends/arm/test/ops/test_leaky_relu.py
index a83c2812bf0..c18255a73c0 100644
--- a/backends/arm/test/ops/test_leaky_relu.py
+++ b/backends/arm/test/ops/test_leaky_relu.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.leaky_relu.default"
@@ -37,9 +38,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", LeakyReLU.test_data)
-def test_leaky_relu_tosa_MI(test_data):
+def test_leaky_relu_tosa_FP(test_data):
data, slope = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
LeakyReLU(slope),
data,
[],
@@ -52,9 +53,9 @@ def test_leaky_relu_tosa_MI(test_data):
@common.parametrize("test_data", LeakyReLU.test_data)
-def test_leaky_relu_tosa_BI(test_data):
+def test_leaky_relu_tosa_INT(test_data):
data, slope = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
LeakyReLU(slope),
data,
[],
@@ -66,9 +67,9 @@ def test_leaky_relu_tosa_BI(test_data):
@common.parametrize("test_data", LeakyReLU.test_data)
@common.XfailIfNoCorstone300
-def test_leaky_relu_u55_BI(test_data):
+def test_leaky_relu_u55_INT(test_data):
data, slope = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
LeakyReLU(slope),
data,
[],
@@ -81,9 +82,9 @@ def test_leaky_relu_u55_BI(test_data):
@common.parametrize("test_data", LeakyReLU.test_data)
@common.XfailIfNoCorstone320
-def test_leaky_relu_u85_BI(test_data):
+def test_leaky_relu_u85_INT(test_data):
data, slope = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
LeakyReLU(slope),
data,
[],
@@ -92,3 +93,35 @@ def test_leaky_relu_u85_BI(test_data):
)
pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
pipeline.run()
+
+
+@common.parametrize("test_data", LeakyReLU.test_data)
+@common.SkipIfNoModelConverter
+def test_leaky_relu_vgf_FP(test_data):
+ data, slope = test_data()
+ pipeline = VgfPipeline[input_t1](
+ LeakyReLU(slope),
+ data,
+ [],
+ use_to_edge_transform_and_lower=True,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.add_stage_after(
+ "to_edge_transform_and_lower", pipeline.tester.check_not, [aten_op]
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", LeakyReLU.test_data)
+@common.SkipIfNoModelConverter
+def test_leaky_relu_vgf_INT(test_data):
+ data, slope = test_data()
+ pipeline = VgfPipeline[input_t1](
+ LeakyReLU(slope),
+ data,
+ [],
+ use_to_edge_transform_and_lower=True,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_linalg_vector_norm.py b/backends/arm/test/ops/test_linalg_vector_norm.py
index 27e4bef97e6..1777cffb0a7 100644
--- a/backends/arm/test/ops/test_linalg_vector_norm.py
+++ b/backends/arm/test/ops/test_linalg_vector_norm.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = Tuple[torch.Tensor]
@@ -60,29 +61,29 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_module", test_modules)
-def test_vector_norm_tosa_MI(test_module):
+def test_vector_norm_tosa_FP(test_module):
model, input_tensor = test_module
# We decompose LinalgVectorNorm before quantize stage to have annotations
- # with q/dq nodes. In case of MI, this operator will be decomposed
+ # with q/dq nodes. In case of FP, this operator will be decomposed
# by global decompositions.
aten_op = "torch.ops.aten.linalg_vector_norm.default"
# Should not found this op
exir_op = "executorch_exir_dialects_edge__ops_aten_linalg_vector_norm_default"
- pipeline = TosaPipelineMI[input_t](model, input_tensor, aten_op, exir_op)
+ pipeline = TosaPipelineFP[input_t](model, input_tensor, aten_op, exir_op)
pipeline.run()
@common.parametrize("test_module", test_modules)
-def test_vector_norm_tosa_BI(test_module):
+def test_vector_norm_tosa_INT(test_module):
model, input_tensor = test_module
# Should not found this op
exir_op = "executorch_exir_dialects_edge__ops_aten_linalg_vector_norm_default"
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
model,
input_tensor,
aten_op_q_decomposed_q,
@@ -94,10 +95,10 @@ def test_vector_norm_tosa_BI(test_module):
@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone300
-def test_vector_norm_u55_BI_fvp(test_module):
+def test_vector_norm_u55_INT_fvp(test_module):
model, input_tensor = test_module
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
model,
input_tensor,
aten_op_q_decomposed_q,
@@ -111,11 +112,11 @@ def test_vector_norm_u55_BI_fvp(test_module):
@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone320
-def test_vector_norm_u85_BI_fvp(test_module):
+def test_vector_norm_u85_INT_fvp(test_module):
model, input_tensor = test_module
# The should be decomposed and annotated in DecomposeLinalgVectorNorm pass.
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
model,
input_tensor,
aten_op_q_decomposed_q,
@@ -125,3 +126,37 @@ def test_vector_norm_u85_BI_fvp(test_module):
)
pipeline.pop_stage("check_not.exir")
pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.SkipIfNoModelConverter
+def test_vector_norm_vgf_FP(test_module):
+ model, input_tensor = test_module
+ # FP VGF
+ aten_op = "torch.ops.aten.linalg_vector_norm.default"
+ exir_op = "executorch_exir_dialects_edge__ops_aten_linalg_vector_norm_default"
+ pipeline = VgfPipeline[input_t](
+ model,
+ input_tensor,
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.SkipIfNoModelConverter
+def test_vector_norm_vgf_INT(test_module):
+ model, input_tensor = test_module
+ # Should not found this op
+ exir_op = "executorch_exir_dialects_edge__ops_aten_linalg_vector_norm_default"
+
+ pipeline = VgfPipeline[input_t](
+ model,
+ input_tensor,
+ aten_op_q_decomposed_q,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py
index 14f65a07192..57ce490dae8 100644
--- a/backends/arm/test/ops/test_linear.py
+++ b/backends/arm/test/ops/test_linear.py
@@ -14,17 +14,18 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.linear.default"
input_t1 = Tuple[torch.Tensor]
-test_data_rank1_MI = {
+test_data_rank1_FP = {
# test_name: (test_data, out_features, has_bias)
"model_linear_rank1_zeros": lambda: (
torch.zeros(10),
@@ -58,7 +59,7 @@
),
}
-test_data_rank4_MI = {
+test_data_rank4_FP = {
# test_name: (test_data, out_features, has_bias)
"model_linear_rank4_zeros": lambda: (
torch.zeros(5, 10, 25, 20),
@@ -93,16 +94,16 @@
}
# Generate a new test set paired with per_channel_quant=True/False.
-test_data_rank1_BI = {
+test_data_rank1_INT = {
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (*v(), q))
- for (k, v) in test_data_rank1_MI.items()
+ for (k, v) in test_data_rank1_FP.items()
for q in [True, False]
}
# Generate a new test set paired with per_channel_quant=True/False.
-test_data_rank4_BI = {
+test_data_rank4_INT = {
f"{k},per_channel_quant={q}": (lambda v=v, q=q: (*v(), q))
- for (k, v) in test_data_rank4_MI.items()
+ for (k, v) in test_data_rank4_FP.items()
for q in [True, False]
}
@@ -125,11 +126,11 @@ def forward(self, x):
return self.fc(x)
-@common.parametrize("test_data", test_data_rank1_MI | test_data_rank4_MI)
-def test_linear_tosa_MI(test_data: torch.Tensor):
+@common.parametrize("test_data", test_data_rank1_FP | test_data_rank4_FP)
+def test_linear_tosa_FP(test_data: torch.Tensor):
test_data, out_features, has_bias = test_data()
in_features = test_data.shape[-1]
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Linear(
in_features=in_features,
out_features=out_features,
@@ -143,11 +144,11 @@ def test_linear_tosa_MI(test_data: torch.Tensor):
@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness.
-@common.parametrize("test_data", test_data_rank1_BI | test_data_rank4_BI)
-def test_linear_tosa_BI(test_data: torch.Tensor):
+@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
+def test_linear_tosa_INT(test_data: torch.Tensor):
test_data, out_features, has_bias, per_channel_quantization = test_data()
in_features = test_data.shape[-1]
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
Linear(
in_features=in_features,
out_features=out_features,
@@ -162,12 +163,12 @@ def test_linear_tosa_BI(test_data: torch.Tensor):
pipeline.run()
-@common.parametrize("test_data", test_data_rank1_BI)
+@common.parametrize("test_data", test_data_rank1_INT)
@common.XfailIfNoCorstone300
-def test_linear_u55_BI(test_data: torch.Tensor):
+def test_linear_u55_INT(test_data: torch.Tensor):
test_data, out_features, has_bias, per_channel_quantization = test_data()
in_features = test_data.shape[-1]
- EthosU55PipelineBI[input_t1](
+ EthosU55PipelineINT[input_t1](
Linear(
in_features=in_features,
out_features=out_features,
@@ -198,14 +199,14 @@ def test_linear_u55_BI(test_data: torch.Tensor):
@common.parametrize(
"test_data",
- test_data_rank1_BI | test_data_rank4_BI,
+ test_data_rank1_INT | test_data_rank4_INT,
x_fail,
)
@common.XfailIfNoCorstone320
-def test_linear_u85_BI(test_data: torch.Tensor):
+def test_linear_u85_INT(test_data: torch.Tensor):
test_data, out_features, has_bias, per_channel_quantization = test_data()
in_features = test_data.shape[-1]
- EthosU85PipelineBI[input_t1](
+ EthosU85PipelineINT[input_t1](
Linear(
in_features=in_features,
out_features=out_features,
@@ -218,3 +219,42 @@ def test_linear_u85_BI(test_data: torch.Tensor):
per_channel_quantization=per_channel_quantization,
use_to_edge_transform_and_lower=True,
).run()
+
+
+@common.parametrize("test_data", test_data_rank1_FP | test_data_rank4_FP)
+@common.SkipIfNoModelConverter
+def test_linear_vgf_FP(test_data: torch.Tensor):
+ test_data, out_features, has_bias = test_data()
+ in_features = test_data.shape[-1]
+ pipeline = VgfPipeline[input_t1](
+ Linear(
+ in_features=in_features,
+ out_features=out_features,
+ bias=has_bias,
+ ),
+ (test_data,),
+ aten_op=aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
+@common.SkipIfNoModelConverter
+def test_linear_vgf_INT(test_data: torch.Tensor):
+ test_data, out_features, has_bias, per_channel_quantization = test_data()
+ in_features = test_data.shape[-1]
+ pipeline = VgfPipeline[input_t1](
+ Linear(
+ in_features=in_features,
+ out_features=out_features,
+ bias=has_bias,
+ ),
+ (test_data,),
+ aten_op=aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ per_channel_quantization=per_channel_quantization,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_log.py b/backends/arm/test/ops/test_log.py
index 0ca4510681d..1ed5c57f1ab 100644
--- a/backends/arm/test/ops/test_log.py
+++ b/backends/arm/test/ops/test_log.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.log.default"
@@ -40,21 +41,21 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", test_data_suite)
-def test_log_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](Log(), (test_data(),), aten_op, exir_op)
+def test_log_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](Log(), (test_data(),), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_log_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](Log(), (test_data(),), aten_op, exir_op)
+def test_log_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](Log(), (test_data(),), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_log_u55_BI(test_data: input_t1):
- EthosU55PipelineBI[input_t1](
+def test_log_u55_INT(test_data: input_t1):
+ EthosU55PipelineINT[input_t1](
Log(),
(test_data(),),
aten_op,
@@ -65,11 +66,37 @@ def test_log_u55_BI(test_data: input_t1):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_log_u85_BI(test_data: input_t1):
- EthosU85PipelineBI[input_t1](
+def test_log_u85_INT(test_data: input_t1):
+ EthosU85PipelineINT[input_t1](
Log(),
(test_data(),),
aten_op,
exir_op,
run_on_fvp=True,
).run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_log_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Log(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_log_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Log(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_logical.py b/backends/arm/test/ops/test_logical.py
index 1a056e31b3c..2b160ce7b50 100644
--- a/backends/arm/test/ops/test_logical.py
+++ b/backends/arm/test/ops/test_logical.py
@@ -9,10 +9,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -80,9 +81,14 @@ def forward(self, tensor: torch.Tensor):
return torch.logical_not(tensor)
+#################
+## logical_and ##
+#################
+
+
@common.parametrize("test_data", And().test_data)
-def test_logical_and_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_logical_and_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
And(),
test_data(),
And().aten_op,
@@ -95,8 +101,8 @@ def test_logical_and_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", And().test_data)
-def test_logical_and_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_logical_and_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
And(),
test_data(),
And().aten_op,
@@ -111,7 +117,7 @@ def test_logical_and_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", And().test_data)
-def test_logical_and_u55_BI_not_delegated(test_data: input_t2):
+def test_logical_and_u55_INT_not_delegated(test_data: input_t2):
# Tests that we don't delegate these ops since they are not supported on U55.
pipeline = OpNotSupportedPipeline[input_t2](
And(),
@@ -125,8 +131,8 @@ def test_logical_and_u55_BI_not_delegated(test_data: input_t2):
@common.parametrize("test_data", And().test_data)
@common.XfailIfNoCorstone320
-def test_logical_and_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_logical_and_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
And(),
test_data(),
And().aten_op,
@@ -141,9 +147,42 @@ def test_logical_and_u85_BI(test_data: input_t2):
pipeline.run()
+@common.parametrize("test_data", And().test_data)
+@common.SkipIfNoModelConverter
+def test_logical_and_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ And(),
+ test_data(),
+ And().aten_op,
+ And().exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", And().test_data)
+@common.SkipIfNoModelConverter
+def test_logical_and_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ And(),
+ test_data(),
+ And().aten_op,
+ And().exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+#################
+## logical_xor ##
+#################
+
+
@common.parametrize("test_data", Xor().test_data)
-def test_logical_xor_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_logical_xor_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
Xor(),
test_data(),
Xor().aten_op,
@@ -156,8 +195,8 @@ def test_logical_xor_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", Xor().test_data)
-def test_logical_xor_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_logical_xor_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
Xor(),
test_data(),
Xor().aten_op,
@@ -172,7 +211,7 @@ def test_logical_xor_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", Xor().test_data)
-def test_logical_xor_u55_BI_not_delegated(test_data: input_t2):
+def test_logical_xor_u55_INT_not_delegated(test_data: input_t2):
# Tests that we don't delegate these ops since they are not supported on U55.
pipeline = OpNotSupportedPipeline[input_t2](
Xor(),
@@ -186,8 +225,8 @@ def test_logical_xor_u55_BI_not_delegated(test_data: input_t2):
@common.parametrize("test_data", Xor().test_data)
@common.XfailIfNoCorstone320
-def test_logical_xor_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_logical_xor_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
Xor(),
test_data(),
Xor().aten_op,
@@ -202,9 +241,42 @@ def test_logical_xor_u85_BI(test_data: input_t2):
pipeline.run()
+@common.parametrize("test_data", Xor().test_data)
+@common.SkipIfNoModelConverter
+def test_logical_xor_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Xor(),
+ test_data(),
+ Xor().aten_op,
+ Xor().exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Xor().test_data)
+@common.SkipIfNoModelConverter
+def test_logical_xor_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Xor(),
+ test_data(),
+ Xor().aten_op,
+ Xor().exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+################
+## logical_or ##
+################
+
+
@common.parametrize("test_data", Or().test_data)
-def test_logical_or_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_logical_or_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
Or(),
test_data(),
Or().aten_op,
@@ -217,8 +289,8 @@ def test_logical_or_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", Or().test_data)
-def test_logical_or_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_logical_or_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
Or(),
test_data(),
Or().aten_op,
@@ -233,7 +305,7 @@ def test_logical_or_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", Or().test_data)
-def test_logical_or_u55_BI_not_delegated(test_data: input_t2):
+def test_logical_or_u55_INT_not_delegated(test_data: input_t2):
# Tests that we don't delegate these ops since they are not supported on U55.
pipeline = OpNotSupportedPipeline[input_t2](
Or(),
@@ -247,8 +319,8 @@ def test_logical_or_u55_BI_not_delegated(test_data: input_t2):
@common.parametrize("test_data", Or().test_data)
@common.XfailIfNoCorstone320
-def test_logical_or_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_logical_or_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
Or(),
test_data(),
Or().aten_op,
@@ -263,9 +335,42 @@ def test_logical_or_u85_BI(test_data: input_t2):
pipeline.run()
+@common.parametrize("test_data", Or().test_data)
+@common.SkipIfNoModelConverter
+def test_logical_or_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Or(),
+ test_data(),
+ Or().aten_op,
+ Or().exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Or().test_data)
+@common.SkipIfNoModelConverter
+def test_logical_or_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Or(),
+ test_data(),
+ Or().aten_op,
+ Or().exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+#################
+## logical_not ##
+#################
+
+
@common.parametrize("test_data", Not().test_data)
-def test_logical_not_tosa_MI(test_data: input_t2):
- pipeline = TosaPipelineMI[input_t2](
+def test_logical_not_tosa_FP(test_data: input_t2):
+ pipeline = TosaPipelineFP[input_t2](
Not(),
test_data(),
Not().aten_op,
@@ -278,8 +383,8 @@ def test_logical_not_tosa_MI(test_data: input_t2):
@common.parametrize("test_data", Not().test_data)
-def test_logical_not_tosa_BI(test_data: input_t2):
- pipeline = TosaPipelineBI[input_t2](
+def test_logical_not_tosa_INT(test_data: input_t2):
+ pipeline = TosaPipelineINT[input_t2](
Not(),
test_data(),
Not().aten_op,
@@ -294,7 +399,7 @@ def test_logical_not_tosa_BI(test_data: input_t2):
@common.parametrize("test_data", Not().test_data)
-def test_logical_not_u55_BI_not_delegated(test_data: input_t2):
+def test_logical_not_u55_INT_not_delegated(test_data: input_t2):
# Tests that we don't delegate these ops since they are not supported on U55.
pipeline = OpNotSupportedPipeline[input_t2](
Not(),
@@ -308,8 +413,8 @@ def test_logical_not_u55_BI_not_delegated(test_data: input_t2):
@common.parametrize("test_data", Not().test_data)
@common.XfailIfNoCorstone320
-def test_logical_not_u85_BI(test_data: input_t2):
- pipeline = EthosU85PipelineBI[input_t2](
+def test_logical_not_u85_INT(test_data: input_t2):
+ pipeline = EthosU85PipelineINT[input_t2](
Not(),
test_data(),
Not().aten_op,
@@ -322,3 +427,31 @@ def test_logical_not_u85_BI(test_data: input_t2):
pipeline.pop_stage("quantize")
pipeline.pop_stage("check.quant_nodes")
pipeline.run()
+
+
+@common.parametrize("test_data", Not().test_data)
+@common.SkipIfNoModelConverter
+def test_logical_not_vgf_FP(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Not(),
+ test_data(),
+ Not().aten_op,
+ Not().exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Not().test_data)
+@common.SkipIfNoModelConverter
+def test_logical_not_vgf_INT(test_data: input_t2):
+ pipeline = VgfPipeline[input_t2](
+ Not(),
+ test_data(),
+ Not().aten_op,
+ Not().exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("quantize")
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_logit.py b/backends/arm/test/ops/test_logit.py
new file mode 100644
index 00000000000..8915c151bb9
--- /dev/null
+++ b/backends/arm/test/ops/test_logit.py
@@ -0,0 +1,119 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+aten_op = "torch.ops.aten.logit.default"
+exir_op = "executorch_exir_dialects_edge__ops_aten__logit_default"
+
+input_t1 = Tuple[torch.Tensor]
+
+test_data_suite = {
+ "zeros": [torch.zeros((10, 10, 10)), None],
+ "ones": [torch.ones((10, 10, 10)), None],
+ "uniform_valid": [torch.rand((10, 10, 10)), None],
+ "near_zero": [torch.full((10, 10), 1e-8), None],
+ "near_one": [torch.full((10, 10), 1 - 1e-8), None],
+ "mixed": [torch.tensor([0.0, 1e-5, 0.5, 1 - 1e-5, 1.0]), None],
+ "multi_dim": [torch.rand((2, 3, 4)), None],
+ "eps": [torch.zeros((10, 10, 10)), 1e-6],
+ "invalid_neg": [torch.full((5,), -0.1), 1e-6],
+ "invalid_gt1": [torch.full((5,), 1.1), 1e-6],
+}
+
+
+class Logit(torch.nn.Module):
+
+ def forward(self, x: torch.Tensor, eps: torch.float32):
+ return torch.logit(x, eps=eps)
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_logit_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
+ Logit(),
+ (*test_data,),
+ aten_op=aten_op,
+ exir_op=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_logit_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
+ Logit(),
+ (*test_data,),
+ aten_op=[],
+ exir_op=exir_op,
+ )
+ pipeline.run()
+
+
+@common.XfailIfNoCorstone300
+@common.parametrize("test_data", test_data_suite)
+def test_logit_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
+ Logit(),
+ (*test_data,),
+ aten_ops=[],
+ exir_ops=exir_op,
+ )
+ pipeline.run()
+
+
+@common.XfailIfNoCorstone320
+@common.parametrize("test_data", test_data_suite)
+def test_logit_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
+ Logit(),
+ (*test_data,),
+ aten_ops=[],
+ exir_ops=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+)
+@common.SkipIfNoModelConverter
+def test_logit_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Logit(),
+ (*test_data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ test_data_suite,
+)
+@common.SkipIfNoModelConverter
+def test_logit_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Logit(),
+ (*test_data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_logsoftmax.py b/backends/arm/test/ops/test_logsoftmax.py
index 50132ba8211..b1b934fbcc8 100644
--- a/backends/arm/test/ops/test_logsoftmax.py
+++ b/backends/arm/test/ops/test_logsoftmax.py
@@ -10,10 +10,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.log_softmax.default" # Used for checking that we do not have log_softmax in the graph
@@ -43,9 +44,9 @@ def forward(self, x):
@common.parametrize("test_data", LogSoftmax.test_data)
-def test_log_softmax_tosa_MI(test_data):
+def test_log_softmax_tosa_FP(test_data):
data, dim = test_data()
- pipeline = TosaPipelineMI[input_t1](LogSoftmax(dim), data, [])
+ pipeline = TosaPipelineFP[input_t1](LogSoftmax(dim), data, [])
pipeline.add_stage_after(
"to_edge_transform_and_lower", pipeline.tester.check_not, [exir_op]
)
@@ -55,9 +56,9 @@ def test_log_softmax_tosa_MI(test_data):
@pytest.mark.flaky(reruns=5)
@common.parametrize("test_data", LogSoftmax.test_data)
-def test_log_softmax_tosa_BI(test_data):
+def test_log_softmax_tosa_INT(test_data):
data, dim = test_data()
- pipeline = TosaPipelineBI[input_t1](LogSoftmax(dim), data, [])
+ pipeline = TosaPipelineINT[input_t1](LogSoftmax(dim), data, [])
pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
pipeline.change_args("run_method_and_compare_outputs", qtol=1)
pipeline.run()
@@ -71,9 +72,9 @@ def test_log_softmax_tosa_BI(test_data):
},
)
@common.XfailIfNoCorstone300()
-def test_log_softmax_u55_BI(test_data):
+def test_log_softmax_u55_INT(test_data):
data, dim = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
LogSoftmax(dim),
data,
[],
@@ -92,9 +93,9 @@ def test_log_softmax_u55_BI(test_data):
},
)
@common.XfailIfNoCorstone320
-def test_log_softmax_u85_BI(test_data):
+def test_log_softmax_u85_INT(test_data):
data, dim = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
LogSoftmax(dim),
data,
[],
@@ -103,3 +104,33 @@ def test_log_softmax_u85_BI(test_data):
pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
pipeline.change_args("run_method_and_compare_outputs", qtol=1)
pipeline.run()
+
+
+@common.parametrize("test_data", LogSoftmax.test_data)
+@common.SkipIfNoModelConverter
+def test_log_softmax_vgf_FP(test_data):
+ data, dim = test_data()
+ pipeline = VgfPipeline[input_t1](
+ LogSoftmax(dim), data, [], [], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.add_stage_after(
+ "to_edge_transform_and_lower", pipeline.tester.check_not, [aten_op]
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", LogSoftmax.test_data)
+@common.SkipIfNoModelConverter
+def test_log_softmax_vgf_INT(test_data):
+ data, dim = test_data()
+ pipeline = VgfPipeline[input_t1](
+ LogSoftmax(dim),
+ data,
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
+ # TODO: MLETORCH-1136 Change args of run_method_and_compare_outputs of the vgf tests
+ # pipeline.change_args("run_method_and_compare_outputs", qtol=1)
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_lshift.py b/backends/arm/test/ops/test_lshift.py
index e74e80deeed..bab364a4528 100644
--- a/backends/arm/test/ops/test_lshift.py
+++ b/backends/arm/test/ops/test_lshift.py
@@ -10,18 +10,19 @@
XfailIfNoCorstone320,
)
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
scalar_input_t = tuple[torch.Tensor, int]
class LshiftScalar(torch.nn.Module):
- torch_op_MI = "torch.ops.aten.__lshift__.Scalar"
- torch_op_BI = "torch.ops.aten.bitwise_left_shift.Tensor"
+ torch_op_FP = "torch.ops.aten.__lshift__.Scalar"
+ torch_op_INT = "torch.ops.aten.bitwise_left_shift.Tensor"
exir_op = "executorch_exir_dialects_edge__ops_aten_bitwise_left_shift_Tensor"
test_data = {
"randint_neg_8_int8": (
@@ -67,22 +68,27 @@ def forward(self, x: torch.Tensor, shift: torch.Tensor):
return x.bitwise_left_shift(shift)
+##################
+## LshiftScalar ##
+##################
+
+
@common.parametrize("test_data", LshiftScalar.test_data)
-def test_lshift_scalar_tosa_MI_scalar(test_data):
- TosaPipelineMI[scalar_input_t](
+def test_bitwise_left_shift_scalar_tosa_FP_scalar(test_data):
+ TosaPipelineFP[scalar_input_t](
LshiftScalar(),
test_data,
- LshiftScalar.torch_op_MI,
+ LshiftScalar.torch_op_FP,
LshiftScalar.exir_op,
).run()
@common.parametrize("test_data", LshiftScalar.test_data)
-def test_bitwise_left_shift_tensor_tosa_BI_scalar(test_data):
- pipeline = TosaPipelineBI[scalar_input_t](
+def test_bitwise_left_shift_tensor_tosa_INT_scalar(test_data):
+ pipeline = TosaPipelineINT[scalar_input_t](
LshiftScalar(),
test_data,
- LshiftScalar.torch_op_BI,
+ LshiftScalar.torch_op_INT,
LshiftScalar.exir_op,
)
pipeline.pop_stage("check.quant_nodes")
@@ -91,11 +97,11 @@ def test_bitwise_left_shift_tensor_tosa_BI_scalar(test_data):
@common.parametrize("test_data", LshiftScalar.test_data)
@XfailIfNoCorstone300
-def test_bitwise_left_shift_tensor_u55_BI_scalar(test_data):
- pipeline = EthosU55PipelineBI[scalar_input_t](
+def test_bitwise_left_shift_tensor_u55_INT_scalar(test_data):
+ pipeline = EthosU55PipelineINT[scalar_input_t](
LshiftScalar(),
test_data,
- LshiftScalar.torch_op_BI,
+ LshiftScalar.torch_op_INT,
LshiftScalar.exir_op,
run_on_fvp=True,
)
@@ -105,11 +111,11 @@ def test_bitwise_left_shift_tensor_u55_BI_scalar(test_data):
@common.parametrize("test_data", LshiftScalar.test_data)
@XfailIfNoCorstone320
-def test_bitwise_left_shift_tensor_u85_BI_scalar(test_data):
- pipeline = EthosU85PipelineBI[scalar_input_t](
+def test_bitwise_left_shift_tensor_u85_INT_scalar(test_data):
+ pipeline = EthosU85PipelineINT[scalar_input_t](
LshiftScalar(),
test_data,
- LshiftScalar.torch_op_BI,
+ LshiftScalar.torch_op_INT,
LshiftScalar.exir_op,
run_on_fvp=True,
)
@@ -117,9 +123,41 @@ def test_bitwise_left_shift_tensor_u85_BI_scalar(test_data):
pipeline.run()
+@common.parametrize("test_data", LshiftScalar.test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_left_shift_scalar_vgf_FP_scalar(test_data: scalar_input_t):
+ pipeline = VgfPipeline[scalar_input_t](
+ LshiftScalar(),
+ test_data,
+ LshiftScalar.torch_op_FP,
+ LshiftScalar.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", LshiftScalar.test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_left_shift_tensor_vgf_INT_scalar(test_data: scalar_input_t):
+ pipeline = VgfPipeline[scalar_input_t](
+ LshiftScalar(),
+ test_data,
+ LshiftScalar.torch_op_INT,
+ LshiftScalar.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+##################
+## LshiftTensor ##
+##################
+
+
@common.parametrize("test_data", LshiftTensor.test_data)
-def test_lshift_scalar_tosa_MI(test_data):
- TosaPipelineMI[scalar_input_t](
+def test_bitwise_left_shift_tensor_tosa_FP(test_data):
+ TosaPipelineFP[scalar_input_t](
LshiftTensor(),
test_data,
LshiftTensor.torch_op,
@@ -128,8 +166,8 @@ def test_lshift_scalar_tosa_MI(test_data):
@common.parametrize("test_data", LshiftTensor.test_data)
-def test_bitwise_left_shift_tensor_tosa_BI(test_data):
- pipeline = TosaPipelineBI[scalar_input_t](
+def test_bitwise_left_shift_tensor_tosa_INT(test_data):
+ pipeline = TosaPipelineINT[scalar_input_t](
LshiftTensor(),
test_data,
LshiftTensor.torch_op,
@@ -141,8 +179,8 @@ def test_bitwise_left_shift_tensor_tosa_BI(test_data):
@common.parametrize("test_data", LshiftTensor.test_data)
@XfailIfNoCorstone300
-def test_bitwise_left_shift_tensor_u55_BI(test_data):
- pipeline = EthosU55PipelineBI[scalar_input_t](
+def test_bitwise_left_shift_tensor_u55_INT(test_data):
+ pipeline = EthosU55PipelineINT[scalar_input_t](
LshiftTensor(),
test_data,
LshiftTensor.torch_op,
@@ -155,8 +193,8 @@ def test_bitwise_left_shift_tensor_u55_BI(test_data):
@common.parametrize("test_data", LshiftTensor.test_data)
@XfailIfNoCorstone320
-def test_bitwise_left_shift_tensor_u85_BI(test_data):
- pipeline = EthosU85PipelineBI[scalar_input_t](
+def test_bitwise_left_shift_tensor_u85_INT(test_data):
+ pipeline = EthosU85PipelineINT[scalar_input_t](
LshiftTensor(),
test_data,
LshiftTensor.torch_op,
@@ -165,3 +203,30 @@ def test_bitwise_left_shift_tensor_u85_BI(test_data):
)
pipeline.pop_stage("check.quant_nodes")
pipeline.run()
+
+
+@common.parametrize("test_data", LshiftTensor.test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_left_shift_tensor_vgf_FP(test_data: tensor_input_t):
+ pipeline = VgfPipeline[tensor_input_t](
+ LshiftTensor(),
+ test_data,
+ LshiftTensor.torch_op,
+ LshiftTensor.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", LshiftTensor.test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_left_shift_tensor_vgf_INT(test_data: tensor_input_t):
+ pipeline = VgfPipeline[tensor_input_t](
+ LshiftTensor(),
+ test_data,
+ LshiftTensor.torch_op,
+ LshiftTensor.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_lt.py b/backends/arm/test/ops/test_lt.py
index 92298ca70fa..86d903e3f88 100644
--- a/backends/arm/test/ops/test_lt.py
+++ b/backends/arm/test/ops/test_lt.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -78,8 +79,8 @@ def get_inputs(self):
@common.parametrize("test_module", test_data_tensor)
-def test_lt_tensor_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_lt_tensor_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
LessThan.aten_op_tensor,
@@ -89,8 +90,8 @@ def test_lt_tensor_tosa_MI(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_lt_scalar_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_lt_scalar_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
LessThan.aten_op_scalar,
@@ -100,8 +101,8 @@ def test_lt_scalar_tosa_MI(test_module):
@common.parametrize("test_module", test_data_tensor)
-def test_lt_tensor_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_lt_tensor_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
LessThan.aten_op_tensor,
@@ -111,8 +112,8 @@ def test_lt_tensor_tosa_BI(test_module):
@common.parametrize("test_module", test_data_scalar)
-def test_lt_scalar_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_lt_scalar_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
LessThan.aten_op_tensor,
@@ -123,7 +124,7 @@ def test_lt_scalar_tosa_BI(test_module):
@common.parametrize("test_module", test_data_tensor)
@common.XfailIfNoCorstone300
-def test_lt_tensor_u55_BI_not_delegated(test_module):
+def test_lt_tensor_u55_INT_not_delegated(test_module):
# LessThan is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -137,7 +138,7 @@ def test_lt_tensor_u55_BI_not_delegated(test_module):
@common.parametrize("test_module", test_data_scalar)
@common.XfailIfNoCorstone300
-def test_lt_scalar_u55_BI_not_delegated(test_module):
+def test_lt_scalar_u55_INT_not_delegated(test_module):
# LessThan is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module(),
@@ -158,8 +159,8 @@ def test_lt_scalar_u55_BI_not_delegated(test_module):
},
)
@common.XfailIfNoCorstone320
-def test_lt_tensor_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_lt_tensor_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
LessThan.aten_op_tensor,
@@ -177,8 +178,8 @@ def test_lt_tensor_u85_BI(test_module):
},
)
@common.XfailIfNoCorstone320
-def test_lt_scalar_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_lt_scalar_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
LessThan.aten_op_tensor,
@@ -186,3 +187,55 @@ def test_lt_scalar_u85_BI(test_module):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_lt_tensor_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ LessThan.aten_op_tensor,
+ LessThan.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_lt_scalar_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ LessThan.aten_op_scalar,
+ LessThan.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_lt_tensor_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ LessThan.aten_op_tensor,
+ LessThan.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_lt_scalar_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ LessThan.aten_op_tensor,
+ LessThan.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_masked_fill.py b/backends/arm/test/ops/test_masked_fill.py
index bfd5c8857c7..3aab19925ec 100644
--- a/backends/arm/test/ops/test_masked_fill.py
+++ b/backends/arm/test/ops/test_masked_fill.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -99,16 +100,16 @@ def forward(
@common.parametrize("test_module", test_modules)
-def test_masked_fill_scalar_tosa_MI(test_module):
+def test_masked_fill_scalar_tosa_FP(test_module):
module, inputs = test_module()
- pipeline = TosaPipelineMI[input_t](module, inputs, aten_op=[])
+ pipeline = TosaPipelineFP[input_t](module, inputs, aten_op=[])
pipeline.run()
@common.parametrize("test_module", test_modules)
-def test_masked_fill_scalar_tosa_BI(test_module):
+def test_masked_fill_scalar_tosa_INT(test_module):
module, inputs = test_module()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
module,
inputs,
aten_op=[],
@@ -118,7 +119,7 @@ def test_masked_fill_scalar_tosa_BI(test_module):
@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone300
-def test_masked_fill_scalar_u55_BI(test_module):
+def test_masked_fill_scalar_u55_INT(test_module):
module, inputs = test_module()
pipeline = OpNotSupportedPipeline[input_t](
module,
@@ -133,12 +134,32 @@ def test_masked_fill_scalar_u55_BI(test_module):
@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone320
-def test_masked_fill_scalar_u85_BI(test_module):
+def test_masked_fill_scalar_u85_INT(test_module):
module, inputs = test_module()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
module,
inputs,
aten_ops=[],
exir_ops=exir_op,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.SkipIfNoModelConverter
+def test_masked_fill_scalar_vgf_FP(test_module):
+ module, inputs = test_module()
+ pipeline = VgfPipeline[input_t](
+ module, inputs, aten_op=[], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.SkipIfNoModelConverter
+def test_masked_fill_scalar_vgf_INT(test_module):
+ module, inputs = test_module()
+ pipeline = VgfPipeline[input_t](
+ module, inputs, aten_op=[], tosa_version="TOSA-1.0+INT"
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_matmul.py b/backends/arm/test/ops/test_matmul.py
index 11a4786c4af..d1a21684325 100644
--- a/backends/arm/test/ops/test_matmul.py
+++ b/backends/arm/test/ops/test_matmul.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op_mm = "torch.ops.aten.matmul.default"
@@ -60,38 +61,38 @@ def forward(self, x1: torch.Tensor, x2: torch.Tensor, x3: torch.Tensor):
@common.parametrize("test_data", MatMul.test_data_generators)
-def test_matmul_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](MatMul(), test_data(), aten_op_mm, exir_op_mm)
+def test_matmul_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](MatMul(), test_data(), aten_op_mm, exir_op_mm)
pipeline.run()
@common.parametrize("test_data", MatMulSingleInput.test_data_generators)
-def test_matmul_single_input_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_matmul_single_input_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
MatMulSingleInput(), test_data(), aten_op_mm, exir_op_mm
)
pipeline.run()
@common.parametrize("test_data", MatMulCombo.test_data_generators)
-def test_matmul_combo_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_matmul_combo_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
MatMulCombo(), test_data(), aten_op_mm, exir_op_mm
)
pipeline.run()
@common.parametrize("test_data", MatMul.test_data_generators)
-def test_matmul_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_matmul_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
MatMul(), test_data(), aten_op_mm, exir_op_mm, qtol=1
)
pipeline.run()
@common.parametrize("test_data", MatMulSingleInput.test_data_generators)
-def test_matmul_single_input_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_matmul_single_input_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
MatMulSingleInput(),
test_data(),
aten_op_mm,
@@ -102,8 +103,8 @@ def test_matmul_single_input_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", MatMulCombo.test_data_generators)
-def test_matmul_combo_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_matmul_combo_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
MatMulCombo(),
test_data(),
aten_op_mm,
@@ -115,8 +116,8 @@ def test_matmul_combo_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", MatMul.test_data_generators)
@common.XfailIfNoCorstone300
-def test_matmul_u55_BI(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_matmul_u55_INT(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
MatMul(),
test_data(),
aten_op_mm,
@@ -129,8 +130,8 @@ def test_matmul_u55_BI(test_data: input_t1):
@common.parametrize("test_data", MatMulSingleInput.test_data_generators)
@common.XfailIfNoCorstone300
-def test_matmul_single_input_u55_BI(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_matmul_single_input_u55_INT(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
MatMulSingleInput(),
test_data(),
aten_op_mm,
@@ -143,8 +144,8 @@ def test_matmul_single_input_u55_BI(test_data: input_t1):
@common.parametrize("test_data", MatMulCombo.test_data_generators)
@common.XfailIfNoCorstone300
-def test_matmul_combo_u55_BI(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_matmul_combo_u55_INT(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
MatMulCombo(),
test_data(),
aten_op_mm,
@@ -157,8 +158,8 @@ def test_matmul_combo_u55_BI(test_data: input_t1):
@common.parametrize("test_data", MatMul.test_data_generators)
@common.XfailIfNoCorstone320
-def test_matmul_u85_BI(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_matmul_u85_INT(test_data: input_t1):
+ pipeline = EthosU85PipelineINT[input_t1](
MatMul(),
test_data(),
aten_op_mm,
@@ -171,8 +172,8 @@ def test_matmul_u85_BI(test_data: input_t1):
@common.parametrize("test_data", MatMulSingleInput.test_data_generators)
@common.XfailIfNoCorstone320
-def test_matmul_single_input_u85_BI(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_matmul_single_input_u85_INT(test_data: input_t1):
+ pipeline = EthosU85PipelineINT[input_t1](
MatMulSingleInput(),
test_data(),
aten_op_mm,
@@ -185,8 +186,8 @@ def test_matmul_single_input_u85_BI(test_data: input_t1):
@common.parametrize("test_data", MatMulCombo.test_data_generators)
@common.XfailIfNoCorstone320
-def test_matmul_combo_u85_BI(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_matmul_combo_u85_INT(test_data: input_t1):
+ pipeline = EthosU85PipelineINT[input_t1](
MatMulCombo(),
test_data(),
aten_op_mm,
@@ -195,3 +196,73 @@ def test_matmul_combo_u85_BI(test_data: input_t1):
use_to_edge_transform_and_lower=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", MatMul.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_matmul_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ MatMul(), test_data(), aten_op_mm, exir_op_mm, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", MatMulSingleInput.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_matmul_single_input_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ MatMulSingleInput(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", MatMulCombo.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_matmul_combo_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ MatMulCombo(), test_data(), aten_op_mm, exir_op_mm, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", MatMul.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_matmul_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ MatMul(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", MatMulSingleInput.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_matmul_single_input_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ MatMulSingleInput(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", MatMulCombo.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_matmul_combo_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ MatMulCombo(),
+ test_data(),
+ aten_op_mm,
+ exir_op_mm,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py
index b2aa263de39..6b75c2b7d0a 100644
--- a/backends/arm/test/ops/test_max_pool.py
+++ b/backends/arm/test/ops/test_max_pool.py
@@ -13,10 +13,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
test_data_suite = {
@@ -114,18 +115,18 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_max_pool2d_tosa_MI(test_data: torch.Tensor):
+def test_max_pool2d_tosa_FP(test_data: torch.Tensor):
test_data, model_params = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
MaxPool2d(*model_params), (test_data,), aten_op, exir_op
)
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_max_pool2d_tosa_BI(test_data: torch.Tensor):
+def test_max_pool2d_tosa_INT(test_data: torch.Tensor):
test_data, model_params = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
MaxPool2d(*model_params),
(test_data,),
aten_op,
@@ -136,9 +137,9 @@ def test_max_pool2d_tosa_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_max_pool2d_u55_BI(test_data: torch.Tensor):
+def test_max_pool2d_u55_INT(test_data: torch.Tensor):
test_data, model_params = test_data()
- EthosU55PipelineBI[input_t1](
+ EthosU55PipelineINT[input_t1](
MaxPool2d(*model_params),
(test_data,),
aten_op,
@@ -149,9 +150,9 @@ def test_max_pool2d_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_max_pool2d_u85_BI(test_data: torch.Tensor):
+def test_max_pool2d_u85_INT(test_data: torch.Tensor):
test_data, model_params = test_data()
- EthosU85PipelineBI[input_t1](
+ EthosU85PipelineINT[input_t1](
MaxPool2d(*model_params),
(test_data,),
aten_op,
@@ -161,9 +162,9 @@ def test_max_pool2d_u85_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_mult_batches)
-def test_max_pool2d_tosa_MI_mult_batches(test_data: torch.Tensor):
+def test_max_pool2d_tosa_FP_mult_batches(test_data: torch.Tensor):
test_data, model_params = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
MaxPool2d(*model_params),
(test_data,),
aten_op,
@@ -173,9 +174,9 @@ def test_max_pool2d_tosa_MI_mult_batches(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_mult_batches)
-def test_max_pool2d_tosa_BI_mult_batches(test_data: torch.Tensor):
+def test_max_pool2d_tosa_INT_mult_batches(test_data: torch.Tensor):
test_data, model_params = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
MaxPool2d(*model_params),
(test_data,),
aten_op,
@@ -189,9 +190,9 @@ def test_max_pool2d_tosa_BI_mult_batches(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_mult_batches, x_fail)
@common.XfailIfNoCorstone300
-def test_max_pool2d_u55_BI_mult_batches(test_data: torch.Tensor):
+def test_max_pool2d_u55_INT_mult_batches(test_data: torch.Tensor):
test_data, model_params = test_data()
- EthosU55PipelineBI[input_t1](
+ EthosU55PipelineINT[input_t1](
MaxPool2d(*model_params),
(test_data,),
aten_op,
@@ -203,9 +204,9 @@ def test_max_pool2d_u55_BI_mult_batches(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_mult_batches, x_fail)
@common.XfailIfNoCorstone320
-def test_max_pool2d_u85_BI_mult_batches(test_data: torch.Tensor):
+def test_max_pool2d_u85_INT_mult_batches(test_data: torch.Tensor):
test_data, model_params = test_data()
- EthosU85PipelineBI[input_t1](
+ EthosU85PipelineINT[input_t1](
MaxPool2d(*model_params),
(test_data,),
aten_op,
@@ -224,9 +225,9 @@ def test_max_pool2d_u85_BI_mult_batches(test_data: torch.Tensor):
@common.parametrize("test_data", reject_data_suite)
@common.XfailIfNoCorstone300
-def test_max_pool2d_u55_BI_failure_set(test_data: Tuple):
+def test_max_pool2d_u55_INT_failure_set(test_data: Tuple):
module, test_data = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
module,
(test_data,),
aten_op,
@@ -246,12 +247,12 @@ def test_max_pool2d_u55_BI_failure_set(test_data: Tuple):
@common.parametrize("test_data", dilation_test_data)
-def test_max_pool2d_tosa_MI_dilation(test_data):
+def test_max_pool2d_tosa_FP_dilation(test_data):
"""
- TOSA MI pipeline with dilation > 1 (and dilation=1 sanity cases).
+ TOSA FP pipeline with dilation > 1 (and dilation=1 sanity cases).
"""
data, model_params = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
MaxPool2d(*model_params),
(data,),
aten_op,
@@ -261,12 +262,12 @@ def test_max_pool2d_tosa_MI_dilation(test_data):
@common.parametrize("test_data", dilation_test_data)
-def test_max_pool2d_tosa_BI_dilation(test_data):
+def test_max_pool2d_tosa_INT_dilation(test_data):
"""
- TOSA BI pipeline with dilation > 1 (and dilation=1 sanity cases).
+ TOSA INT pipeline with dilation > 1 (and dilation=1 sanity cases).
"""
data, model_params = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
MaxPool2d(*model_params),
(data,),
aten_op,
@@ -274,3 +275,94 @@ def test_max_pool2d_tosa_BI_dilation(test_data):
symmetric_io_quantization=True,
)
pipeline.run()
+
+
+# VGF tests
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_max_pool2d_vgf_FP(test_data: torch.Tensor):
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ MaxPool2d(*model_params),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_max_pool2d_vgf_INT(test_data: torch.Tensor):
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ MaxPool2d(*model_params),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_mult_batches)
+@common.SkipIfNoModelConverter
+def test_max_pool2d_vgf_FP_mult_batches(test_data: torch.Tensor):
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ MaxPool2d(*model_params),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_mult_batches)
+@common.SkipIfNoModelConverter
+def test_max_pool2d_vgf_INT_mult_batches(test_data: torch.Tensor):
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ MaxPool2d(*model_params),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", dilation_test_data)
+@common.SkipIfNoModelConverter
+def test_max_pool2d_vgf_FP_dilation(test_data: torch.Tensor):
+ """
+ VGF FP pipeline with dilation > 1 (and dilation=1 sanity cases).
+ """
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ MaxPool2d(*model_params),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", dilation_test_data)
+@common.SkipIfNoModelConverter
+def test_max_pool2d_vgf_INT_dilation(test_data: torch.Tensor):
+ """
+ VGF INT pipeline with dilation > 1 (and dilation=1 sanity cases).
+ """
+ test_data, model_params = test_data()
+ pipeline = VgfPipeline[input_t1](
+ MaxPool2d(*model_params),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_maximum.py b/backends/arm/test/ops/test_maximum.py
index adcc7dc9cab..eb0d4b86efc 100644
--- a/backends/arm/test/ops/test_maximum.py
+++ b/backends/arm/test/ops/test_maximum.py
@@ -11,10 +11,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
test_t = tuple[torch.Tensor, torch.Tensor]
@@ -44,19 +45,19 @@ def forward(self, x, y):
@common.parametrize("test_data", Maximum.test_parameters)
-def test_maximum_tosa_MI(test_data: Tuple):
- TosaPipelineMI[test_t](Maximum(), test_data(), aten_op).run()
+def test_maximum_tosa_FP(test_data: Tuple):
+ TosaPipelineFP[test_t](Maximum(), test_data(), aten_op).run()
@common.parametrize("test_data", Maximum.test_parameters)
-def test_maximum_tosa_BI(test_data: Tuple):
- TosaPipelineBI[test_t](Maximum(), test_data(), aten_op).run()
+def test_maximum_tosa_INT(test_data: Tuple):
+ TosaPipelineINT[test_t](Maximum(), test_data(), aten_op).run()
@common.parametrize("test_data", Maximum.test_parameters)
@common.XfailIfNoCorstone300
-def test_maximum_u55_BI(test_data: Tuple):
- EthosU55PipelineBI[test_t](
+def test_maximum_u55_INT(test_data: Tuple):
+ EthosU55PipelineINT[test_t](
Maximum(),
test_data(),
aten_op,
@@ -66,10 +67,34 @@ def test_maximum_u55_BI(test_data: Tuple):
@common.parametrize("test_data", Maximum.test_parameters)
@common.XfailIfNoCorstone320
-def test_maximum_u85_BI(test_data: Tuple):
- EthosU85PipelineBI[test_t](
+def test_maximum_u85_INT(test_data: Tuple):
+ EthosU85PipelineINT[test_t](
Maximum(),
test_data(),
aten_op,
run_on_fvp=True,
).run()
+
+
+@common.parametrize("test_data", Maximum.test_parameters)
+@common.SkipIfNoModelConverter
+def test_maximum_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[test_t](
+ Maximum(),
+ test_data(),
+ aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Maximum.test_parameters)
+@common.SkipIfNoModelConverter
+def test_maximum_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[test_t](
+ Maximum(),
+ test_data(),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_mean_dim.py b/backends/arm/test/ops/test_mean_dim.py
index 6803ec44a12..1483b5d82b6 100644
--- a/backends/arm/test/ops/test_mean_dim.py
+++ b/backends/arm/test/ops/test_mean_dim.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = tuple[torch.Tensor]
@@ -37,8 +38,8 @@ def forward(self, x):
@common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite)
-def test_adaptive_avg_pool2d_tosa_MI(test_data):
- TosaPipelineMI[input_t](
+def test_adaptive_avg_pool2d_tosa_FP(test_data):
+ TosaPipelineFP[input_t](
AdaptiveAveragePool2d(),
test_data(),
AdaptiveAveragePool2d.aten_op,
@@ -47,8 +48,8 @@ def test_adaptive_avg_pool2d_tosa_MI(test_data):
@common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite)
-def test_adaptive_avg_pool2d_tosa_BI(test_data):
- TosaPipelineBI[input_t](
+def test_adaptive_avg_pool2d_tosa_INT(test_data):
+ TosaPipelineINT[input_t](
AdaptiveAveragePool2d(),
test_data(),
AdaptiveAveragePool2d.aten_op,
@@ -59,8 +60,8 @@ def test_adaptive_avg_pool2d_tosa_BI(test_data):
@common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite)
@common.XfailIfNoCorstone300
-def test_adaptive_avg_pool2d_u55_BI(test_data):
- EthosU55PipelineBI[input_t](
+def test_adaptive_avg_pool2d_u55_INT(test_data):
+ EthosU55PipelineINT[input_t](
AdaptiveAveragePool2d(),
test_data(),
AdaptiveAveragePool2d.aten_op,
@@ -72,8 +73,8 @@ def test_adaptive_avg_pool2d_u55_BI(test_data):
@common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite)
@common.XfailIfNoCorstone320
-def test_adaptive_avg_pool2d_u85_BI(test_data):
- EthosU85PipelineBI[input_t](
+def test_adaptive_avg_pool2d_u85_INT(test_data):
+ EthosU85PipelineINT[input_t](
AdaptiveAveragePool2d(),
test_data(),
AdaptiveAveragePool2d.aten_op,
@@ -83,6 +84,33 @@ def test_adaptive_avg_pool2d_u85_BI(test_data):
).run()
+@common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite)
+@common.SkipIfNoModelConverter
+def test_adaptive_avg_pool2d_vgf_FP(test_data):
+ pipeline = VgfPipeline[input_t](
+ AdaptiveAveragePool2d(),
+ test_data(),
+ AdaptiveAveragePool2d.aten_op,
+ AdaptiveAveragePool2d.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite)
+@common.SkipIfNoModelConverter
+def test_adaptive_avg_pool2d_vgf_INT(test_data):
+ pipeline = VgfPipeline[input_t](
+ AdaptiveAveragePool2d(),
+ test_data(),
+ AdaptiveAveragePool2d.aten_op,
+ AdaptiveAveragePool2d.exir_op,
+ symmetric_io_quantization=True,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
class MeanDim(torch.nn.Module):
test_data_suite: dict[str, tuple] = {
"rank_1_keepdim": lambda: (
@@ -234,9 +262,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", MeanDim.test_data_suite)
-def test_mean_dim_tosa_MI(test_data):
+def test_mean_dim_tosa_FP(test_data):
test_data, dim, keep_dim = test_data()
- TosaPipelineMI[input_t](
+ TosaPipelineFP[input_t](
MeanDim(dim, keep_dim),
(test_data,),
MeanDim.torch_op,
@@ -245,9 +273,9 @@ def test_mean_dim_tosa_MI(test_data):
@common.parametrize("test_data", MeanDim.test_data_suite)
-def test_mean_dim_tosa_BI(test_data):
+def test_mean_dim_tosa_INT(test_data):
test_data, dim, keep_dim = test_data()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
MeanDim(dim, keep_dim),
(test_data,),
[], # Might be sum, avgpool, or both
@@ -266,9 +294,9 @@ def test_mean_dim_tosa_BI(test_data):
@common.parametrize("test_data", MeanDim.test_data_suite, xfails=xfails, strict=False)
@common.XfailIfNoCorstone300
-def test_mean_dim_u55_BI(test_data):
+def test_mean_dim_u55_INT(test_data):
test_data, dim, keep_dim = test_data()
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
MeanDim(dim, keep_dim),
(test_data,),
[], # Might be sum, avgpool, or both
@@ -286,9 +314,9 @@ def test_mean_dim_u55_BI(test_data):
@common.parametrize("test_data", MeanDim.test_data_suite, xfails=xfails, strict=False)
@common.XfailIfNoCorstone320
-def test_mean_dim_u85_BI(test_data):
+def test_mean_dim_u85_INT(test_data):
test_data, dim, keep_dim = test_data()
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
MeanDim(dim, keep_dim),
(test_data,),
[], # Might be sum, avgpool, or both
@@ -296,3 +324,31 @@ def test_mean_dim_u85_BI(test_data):
symmetric_io_quantization=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", MeanDim.test_data_suite)
+@common.SkipIfNoModelConverter
+def test_mean_dim_vgf_FP(test_data):
+ test_data_val, dim, keep_dim = test_data()
+ pipeline = VgfPipeline[input_t](
+ MeanDim(dim, keep_dim),
+ (test_data_val,),
+ MeanDim.torch_op,
+ MeanDim.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", MeanDim.test_data_suite)
+@common.SkipIfNoModelConverter
+def test_mean_dim_vgf_INT(test_data):
+ test_data_val, dim, keep_dim = test_data()
+ pipeline = VgfPipeline[input_t](
+ MeanDim(dim, keep_dim),
+ (test_data_val,),
+ [], # Might be sum, avgpool, or both
+ symmetric_io_quantization=True,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_minimum.py b/backends/arm/test/ops/test_minimum.py
index 27922cda5e0..88ae2c2b8da 100644
--- a/backends/arm/test/ops/test_minimum.py
+++ b/backends/arm/test/ops/test_minimum.py
@@ -11,10 +11,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
test_t = tuple[torch.Tensor, torch.Tensor]
@@ -44,19 +45,19 @@ def forward(self, x, y):
@common.parametrize("test_data", Minimum.test_parameters)
-def test_minimum_tosa_MI(test_data: Tuple):
- TosaPipelineMI[test_t](Minimum(), test_data(), aten_op).run()
+def test_minimum_tosa_FP(test_data: Tuple):
+ TosaPipelineFP[test_t](Minimum(), test_data(), aten_op).run()
@common.parametrize("test_data", Minimum.test_parameters)
-def test_minimum_tosa_BI(test_data: Tuple):
- TosaPipelineBI[test_t](Minimum(), test_data(), aten_op).run()
+def test_minimum_tosa_INT(test_data: Tuple):
+ TosaPipelineINT[test_t](Minimum(), test_data(), aten_op).run()
@common.parametrize("test_data", Minimum.test_parameters)
@common.XfailIfNoCorstone300
-def test_minimum_u55_BI(test_data: Tuple):
- EthosU55PipelineBI[test_t](
+def test_minimum_u55_INT(test_data: Tuple):
+ EthosU55PipelineINT[test_t](
Minimum(),
test_data(),
aten_op,
@@ -66,10 +67,29 @@ def test_minimum_u55_BI(test_data: Tuple):
@common.parametrize("test_data", Minimum.test_parameters)
@common.XfailIfNoCorstone320
-def test_minimum_u85_BI(test_data: Tuple):
- EthosU85PipelineBI[test_t](
+def test_minimum_u85_INT(test_data: Tuple):
+ EthosU85PipelineINT[test_t](
Minimum(),
test_data(),
aten_op,
run_on_fvp=True,
).run()
+
+
+@common.parametrize("test_data", Minimum.test_parameters)
+@common.SkipIfNoModelConverter
+def test_minimum_vgf_FP(test_data: test_t):
+ pipeline = VgfPipeline[test_t](Minimum(), test_data(), aten_op)
+ pipeline.run()
+
+
+@common.parametrize("test_data", Minimum.test_parameters)
+@common.SkipIfNoModelConverter
+def test_minimum_vgf_INT(test_data: test_t):
+ pipeline = VgfPipeline[test_t](
+ Minimum(),
+ test_data(),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_mm.py b/backends/arm/test/ops/test_mm.py
index 9c3ce443bfd..1b76baaeff0 100644
--- a/backends/arm/test/ops/test_mm.py
+++ b/backends/arm/test/ops/test_mm.py
@@ -10,10 +10,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
test_t = tuple[torch.Tensor, torch.Tensor]
@@ -35,20 +36,20 @@ def forward(self, x, y):
@common.parametrize("test_data", MM.test_data_generators)
-def test_mm_tosa_MI(test_data: Tuple):
- TosaPipelineMI[test_t](MM(), test_data(), MM.aten_op).run()
+def test_mm_tosa_FP(test_data: Tuple):
+ TosaPipelineFP[test_t](MM(), test_data(), MM.aten_op).run()
@common.parametrize("test_data", MM.test_data_generators)
-def test_mm_tosa_BI(test_data: Tuple):
- TosaPipelineBI[test_t](MM(), test_data(), MM.aten_op, MM.exir_op, qtol=1).run()
+def test_mm_tosa_INT(test_data: Tuple):
+ TosaPipelineINT[test_t](MM(), test_data(), MM.aten_op, MM.exir_op, qtol=1).run()
@common.parametrize("test_data", MM.test_data_generators)
@common.XfailIfNoCorstone300
@pytest.mark.flaky # Investigate flakiness (MLETORCH-870)
-def test_mm_u55_BI(test_data: Tuple):
- EthosU55PipelineBI[test_t](
+def test_mm_u55_INT(test_data: Tuple):
+ EthosU55PipelineINT[test_t](
MM(),
test_data(),
MM.aten_op,
@@ -58,11 +59,33 @@ def test_mm_u55_BI(test_data: Tuple):
@common.parametrize("test_data", MM.test_data_generators)
@common.XfailIfNoCorstone320
-def test_mm_u85_BI(test_data: Tuple):
- EthosU85PipelineBI[test_t](
+def test_mm_u85_INT(test_data: Tuple):
+ EthosU85PipelineINT[test_t](
MM(),
test_data(),
MM.aten_op,
MM.exir_op,
run_on_fvp=True,
).run()
+
+
+@common.parametrize("test_data", MM.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_mm_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[test_t](
+ MM(), test_data(), MM.aten_op, MM.exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", MM.test_data_generators)
+@common.SkipIfNoModelConverter
+def test_mm_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[test_t](
+ MM(),
+ test_data(),
+ MM.aten_op,
+ MM.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_mul.py b/backends/arm/test/ops/test_mul.py
index b061e57287a..b0b7f5f4b7d 100644
--- a/backends/arm/test/ops/test_mul.py
+++ b/backends/arm/test/ops/test_mul.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor, torch.Tensor] # Input x
@@ -107,8 +108,8 @@ def forward(
@common.parametrize("test_data", test_data_suite)
-def test_mul_tensor_tosa_MI(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](
+def test_mul_tensor_tosa_FP(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](
Mul(),
test_data(),
aten_op,
@@ -118,8 +119,8 @@ def test_mul_tensor_tosa_MI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_2)
-def test_mul_tensor_tosa_MI_diff_input_ranks(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](
+def test_mul_tensor_tosa_FP_diff_input_ranks(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](
Mul(),
test_data(),
aten_op,
@@ -129,8 +130,8 @@ def test_mul_tensor_tosa_MI_diff_input_ranks(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_int32)
-def test_mul_tensor_tosa_MI_int32(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](
+def test_mul_tensor_tosa_FP_int32(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](
Mul(),
test_data(),
aten_op,
@@ -140,8 +141,8 @@ def test_mul_tensor_tosa_MI_int32(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_2)
-def test_mul_tensor_tosa_BI_diff_input_ranks(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_mul_tensor_tosa_INT_diff_input_ranks(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Mul(),
test_data(),
aten_op,
@@ -151,8 +152,8 @@ def test_mul_tensor_tosa_BI_diff_input_ranks(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_mul_tensor_tosa_BI(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_mul_tensor_tosa_INT(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Mul(),
test_data(),
aten_op,
@@ -162,8 +163,8 @@ def test_mul_tensor_tosa_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_int32)
-def test_mul_tensor_tosa_BI_int32(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_mul_tensor_tosa_INT_int32(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Mul(),
test_data(),
aten_op,
@@ -175,8 +176,8 @@ def test_mul_tensor_tosa_BI_int32(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_mul_tensor_u55_BI(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_mul_tensor_u55_INT(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Mul(),
test_data(),
aten_op,
@@ -188,8 +189,8 @@ def test_mul_tensor_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_mul_tensor_u85_BI(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_mul_tensor_u85_INT(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Mul(),
test_data(),
aten_op,
@@ -209,8 +210,8 @@ def test_mul_tensor_u85_BI(test_data: torch.Tensor):
},
)
@common.XfailIfNoCorstone300
-def test_mul_tensor_u55_BI_int32(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_mul_tensor_u55_INT_int32(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Mul(),
test_data(),
aten_op,
@@ -231,8 +232,8 @@ def test_mul_tensor_u55_BI_int32(test_data: torch.Tensor):
},
)
@common.XfailIfNoCorstone320
-def test_mul_tensor_u85_BI_int32(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_mul_tensor_u85_INT_int32(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Mul(),
test_data(),
aten_op,
@@ -241,3 +242,45 @@ def test_mul_tensor_u85_BI_int32(test_data: torch.Tensor):
)
pipeline.pop_stage("check.quant_nodes")
pipeline.run()
+
+
+@common.parametrize(
+ "test_data", test_data_suite | test_data_suite_2 | test_data_suite_int32
+)
+@common.SkipIfNoModelConverter
+def test_mul_tensor_vgf_FP(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Mul(),
+ test_data(),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite | test_data_suite_2)
+@common.SkipIfNoModelConverter
+def test_mul_tensor_vgf_INT(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Mul(),
+ test_data(),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_int32)
+@common.SkipIfNoModelConverter
+def test_mul_tensor_vgf_INT_int32(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Mul(),
+ test_data(),
+ aten_op,
+ exir_op=[],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_multihead_attention.py b/backends/arm/test/ops/test_multihead_attention.py
index 8a704ec333c..71cf076a157 100644
--- a/backends/arm/test/ops/test_multihead_attention.py
+++ b/backends/arm/test/ops/test_multihead_attention.py
@@ -7,10 +7,10 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
VgfPipeline,
)
@@ -42,9 +42,9 @@ def forward(self, *args, **kwargs):
"test_data",
test_suite,
)
-def test_multihead_attention_tosa_MI(test_data: input_t1):
+def test_multihead_attention_tosa_FP(test_data: input_t1):
test_data, module = test_data()
- pipeline = TosaPipelineMI(module, (*test_data, *test_data, *test_data), [], [])
+ pipeline = TosaPipelineFP(module, (*test_data, *test_data, *test_data), [], [])
pipeline.run()
@@ -52,9 +52,9 @@ def test_multihead_attention_tosa_MI(test_data: input_t1):
"test_data",
test_suite,
)
-def test_multihead_attention_tosa_BI(test_data):
+def test_multihead_attention_tosa_INT(test_data):
test_data, module = test_data()
- pipeline = TosaPipelineBI(
+ pipeline = TosaPipelineINT(
module,
(*test_data, *test_data, *test_data),
[],
@@ -71,9 +71,9 @@ def test_multihead_attention_tosa_BI(test_data):
)
@pytest.mark.xfail(reason="MLETORCH-1102: Numerical issues on FVP")
@common.XfailIfNoCorstone300
-def test_multihead_attention_u55_BI(test_data: input_t1):
+def test_multihead_attention_u55_INT(test_data: input_t1):
test_data, module = test_data()
- pipeline = EthosU55PipelineBI(
+ pipeline = EthosU55PipelineINT(
module,
(*test_data, *test_data, *test_data),
[],
@@ -93,9 +93,9 @@ def test_multihead_attention_u55_BI(test_data: input_t1):
)
@pytest.mark.xfail(reason="MLETORCH-1102: Numerical issues on FVP")
@common.XfailIfNoCorstone320
-def test_multihead_attention_u85_BI(test_data: input_t1):
+def test_multihead_attention_u85_INT(test_data: input_t1):
test_data, module = test_data()
- pipeline = EthosU85PipelineBI(
+ pipeline = EthosU85PipelineINT(
module,
(*test_data, *test_data, *test_data),
[],
diff --git a/backends/arm/test/ops/test_ne.py b/backends/arm/test/ops/test_ne.py
index 2ceacdb31b9..60f07ad9fdd 100644
--- a/backends/arm/test/ops/test_ne.py
+++ b/backends/arm/test/ops/test_ne.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -85,16 +86,16 @@ def get_inputs(self):
@common.parametrize("test_module", test_data_tensor)
-def test_ne_tensor_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_ne_tensor_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module, test_module.get_inputs(), NotEqual.aten_op_Tensor, NotEqual.exir_op
)
pipeline.run()
@common.parametrize("test_module", test_data_scalar)
-def test_ne_scalar_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+def test_ne_scalar_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module,
test_module.get_inputs(),
NotEqual.aten_op_Scalar,
@@ -104,16 +105,16 @@ def test_ne_scalar_tosa_MI(test_module):
@common.parametrize("test_module", test_data_tensor)
-def test_ne_tensor_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_ne_tensor_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module, test_module.get_inputs(), NotEqual.decomposed_ops, NotEqual.exir_op
)
pipeline.run()
@common.parametrize("test_module", test_data_scalar)
-def test_ne_scalar_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+def test_ne_scalar_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module, test_module.get_inputs(), NotEqual.decomposed_ops, NotEqual.exir_op
)
pipeline.run()
@@ -121,7 +122,7 @@ def test_ne_scalar_tosa_BI(test_module):
@common.parametrize("test_module", test_data_tensor)
@common.XfailIfNoCorstone300
-def test_ne_tensor_u55_BI(test_module):
+def test_ne_tensor_u55_INT(test_module):
# EQUAL is not supported on U55.
pipeline = OpNotSupportedPipeline[input_t](
test_module,
@@ -138,7 +139,7 @@ def test_ne_tensor_u55_BI(test_module):
@common.parametrize("test_module", test_data_scalar)
@common.XfailIfNoCorstone300
-def test_ne_scalar_u55_BI(test_module):
+def test_ne_scalar_u55_INT(test_module):
# Not equal (ne) is decomposed into the TOSA ops EQUAL and LOGICAL_NOT, both of
# which are unsupported on U55.
pipeline = OpNotSupportedPipeline[input_t](
@@ -164,8 +165,8 @@ def test_ne_scalar_u55_BI(test_module):
strict=False,
)
@common.XfailIfNoCorstone320
-def test_ne_tensor_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_ne_tensor_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module,
test_module.get_inputs(),
NotEqual.decomposed_ops,
@@ -185,8 +186,8 @@ def test_ne_tensor_u85_BI(test_module):
strict=False,
)
@common.XfailIfNoCorstone320
-def test_ne_scalar_u85_BI(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+def test_ne_scalar_u85_INT(test_module):
+ pipeline = EthosU85PipelineINT[input_t](
test_module,
test_module.get_inputs(),
NotEqual.decomposed_ops,
@@ -194,3 +195,55 @@ def test_ne_scalar_u85_BI(test_module):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_ne_tensor_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module,
+ test_module.get_inputs(),
+ NotEqual.aten_op_Tensor,
+ NotEqual.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_tensor)
+@common.SkipIfNoModelConverter
+def test_ne_tensor_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module,
+ test_module.get_inputs(),
+ NotEqual.decomposed_ops,
+ NotEqual.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_ne_scalar_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module,
+ test_module.get_inputs(),
+ NotEqual.aten_op_Scalar,
+ NotEqual.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_data_scalar)
+@common.SkipIfNoModelConverter
+def test_ne_scalar_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module,
+ test_module.get_inputs(),
+ NotEqual.decomposed_ops,
+ NotEqual.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_neg.py b/backends/arm/test/ops/test_neg.py
index e4d705dfba9..395a4815b62 100644
--- a/backends/arm/test/ops/test_neg.py
+++ b/backends/arm/test/ops/test_neg.py
@@ -9,10 +9,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor]
@@ -37,21 +38,21 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", Neg.test_data)
-def test_neg_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](Neg(), test_data, Neg.aten_op, Neg.exir_op)
+def test_neg_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](Neg(), test_data, Neg.aten_op, Neg.exir_op)
pipeline.run()
@common.parametrize("test_data", Neg.test_data)
-def test_neg_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](Neg(), test_data, Neg.aten_op, Neg.exir_op)
+def test_neg_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](Neg(), test_data, Neg.aten_op, Neg.exir_op)
pipeline.run()
@common.parametrize("test_data", Neg.test_data)
@common.XfailIfNoCorstone300
-def test_neg_u55_BI(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_neg_u55_INT(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
Neg(), test_data, Neg.aten_op, Neg.exir_op, run_on_fvp=True
)
pipeline.run()
@@ -59,8 +60,30 @@ def test_neg_u55_BI(test_data: input_t1):
@common.parametrize("test_data", Neg.test_data)
@common.XfailIfNoCorstone320
-def test_neg_u85_BI(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_neg_u85_INT(test_data: input_t1):
+ pipeline = EthosU85PipelineINT[input_t1](
Neg(), test_data, Neg.aten_op, Neg.exir_op, run_on_fvp=True
)
pipeline.run()
+
+
+@common.parametrize("test_data", Neg.test_data)
+@common.SkipIfNoModelConverter
+def test_neg_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Neg(), test_data, Neg.aten_op, Neg.exir_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Neg.test_data)
+@common.SkipIfNoModelConverter
+def test_neg_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Neg(),
+ test_data,
+ Neg.aten_op,
+ Neg.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_ones.py b/backends/arm/test/ops/test_ones.py
index d3b7528c4d0..18204a8eaaa 100644
--- a/backends/arm/test/ops/test_ones.py
+++ b/backends/arm/test/ops/test_ones.py
@@ -7,11 +7,12 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = tuple[torch.Tensor]
@@ -49,9 +50,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", OnesAdd.test_data)
-def test_ones_tosa_MI(test_data: test_data_t):
+def test_ones_tosa_FP(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
OnesAdd(*init_data),
input_data(),
OnesAdd.aten_op,
@@ -60,9 +61,9 @@ def test_ones_tosa_MI(test_data: test_data_t):
@common.parametrize("test_data", OnesAdd.test_data)
-def test_ones_tosa_BI(test_data: test_data_t):
+def test_ones_tosa_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
OnesAdd(*init_data),
input_data(),
OnesAdd.aten_op,
@@ -73,9 +74,9 @@ def test_ones_tosa_BI(test_data: test_data_t):
@common.parametrize("test_data", OnesAdd.test_data)
@common.XfailIfNoCorstone300
-def test_ones_u55_BI(test_data: test_data_t):
+def test_ones_u55_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
OnesAdd(*init_data),
input_data(),
OnesAdd.aten_op,
@@ -87,9 +88,9 @@ def test_ones_u55_BI(test_data: test_data_t):
@common.parametrize("test_data", OnesAdd.test_data)
@common.XfailIfNoCorstone320
-def test_ones_u85_BI(test_data: test_data_t):
+def test_ones_u85_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
OnesAdd(*init_data),
input_data(),
OnesAdd.aten_op,
@@ -108,9 +109,33 @@ def test_ones_u85_BI(test_data: test_data_t):
"int32_int64": "MLETORCG-716: Do not delegate empty networks to vela",
},
)
-def test_ones_tosa_BI_not_delegated(test_data: test_data_t):
+def test_ones_tosa_INT_not_delegated(test_data: test_data_t):
input_data, init_data = test_data
pipeline = OpNotSupportedPipeline[input_t](
OnesAdd(*init_data), input_data(), non_delegated_ops={}, quantize=True
)
pipeline.run()
+
+
+@common.parametrize("test_data", OnesAdd.test_data)
+@common.SkipIfNoModelConverter
+def test_ones_vgf_FP(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ OnesAdd(*init_data), input_data(), OnesAdd.aten_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", OnesAdd.test_data)
+@common.SkipIfNoModelConverter
+def test_ones_vgf_INT(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ OnesAdd(*init_data),
+ input_data(),
+ OnesAdd.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py
index ef91c794379..57f7f9603a1 100644
--- a/backends/arm/test/ops/test_permute.py
+++ b/backends/arm/test/ops/test_permute.py
@@ -13,10 +13,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from torchvision.ops import Permute
@@ -48,9 +49,9 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_permute_tosa_MI(test_data: torch.Tensor):
+def test_permute_tosa_FP(test_data: torch.Tensor):
test_data, dims = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
SimplePermute(dims=dims),
(test_data,),
aten_op,
@@ -60,9 +61,9 @@ def test_permute_tosa_MI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_permute_tosa_BI(test_data: torch.Tensor):
+def test_permute_tosa_INT(test_data: torch.Tensor):
test_data, dims = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
SimplePermute(dims=dims),
(test_data,),
aten_op,
@@ -79,9 +80,9 @@ def test_permute_tosa_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite, x_fails)
@common.XfailIfNoCorstone300
-def test_permute_u55_BI(test_data):
+def test_permute_u55_INT(test_data):
test_data, dims = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
SimplePermute(dims=dims),
(test_data,),
aten_op,
@@ -94,9 +95,9 @@ def test_permute_u55_BI(test_data):
# Fails since on FVP since N > 1 is not supported. MLETORCH-517
@common.parametrize("test_data", test_data_suite, x_fails)
@common.XfailIfNoCorstone320
-def test_permute_u85_BI(test_data: torch.Tensor):
+def test_permute_u85_INT(test_data: torch.Tensor):
test_data, dims = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
SimplePermute(dims=dims),
(test_data,),
aten_op,
@@ -104,3 +105,31 @@ def test_permute_u85_BI(test_data: torch.Tensor):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_permute_vgf_FP(test_data):
+ test_data, dims = test_data()
+ pipeline = VgfPipeline[input_t1](
+ SimplePermute(dims=dims),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_permute_vgf_INT(test_data):
+ test_data, dims = test_data()
+ pipeline = VgfPipeline[input_t1](
+ SimplePermute(dims=dims),
+ (test_data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_pow.py b/backends/arm/test/ops/test_pow.py
index c1014d4a5d6..016c3e97265 100644
--- a/backends/arm/test/ops/test_pow.py
+++ b/backends/arm/test/ops/test_pow.py
@@ -9,10 +9,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -92,8 +93,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", Pow_TensorTensor.test_data, x_fail, strict=False)
-def test_pow_tensor_tensor_tosa_MI(test_data: Pow_TensorTensor.input_t):
- pipeline = TosaPipelineMI[Pow_TensorTensor.input_t](
+def test_pow_tensor_tensor_tosa_FP(test_data: Pow_TensorTensor.input_t):
+ pipeline = TosaPipelineFP[Pow_TensorTensor.input_t](
Pow_TensorTensor(),
test_data(),
Pow_TensorTensor.aten_op,
@@ -102,6 +103,19 @@ def test_pow_tensor_tensor_tosa_MI(test_data: Pow_TensorTensor.input_t):
pipeline.run()
+@common.parametrize("test_data", Pow_TensorTensor.test_data, x_fail, strict=False)
+@common.SkipIfNoModelConverter
+def test_pow_tensor_tensor_vgf_FP(test_data: Pow_TensorTensor.input_t):
+ pipeline = VgfPipeline[Pow_TensorTensor.input_t](
+ Pow_TensorTensor(),
+ test_data(),
+ Pow_TensorTensor.aten_op,
+ Pow_TensorTensor.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
x_fail = {
"exp_minus_three": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.",
"exp_minus_one": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.",
@@ -113,9 +127,9 @@ def test_pow_tensor_tensor_tosa_MI(test_data: Pow_TensorTensor.input_t):
@common.parametrize("test_data", Pow_TensorScalar.test_data, x_fail, strict=False)
-def test_pow_tensor_scalar_tosa_MI(test_data: Pow_TensorScalar.input_t):
+def test_pow_tensor_scalar_tosa_FP(test_data: Pow_TensorScalar.input_t):
base, exp = test_data()
- pipeline = TosaPipelineMI[Pow_TensorScalar.input_t](
+ pipeline = TosaPipelineFP[Pow_TensorScalar.input_t](
Pow_TensorScalar(exp),
(base,),
Pow_TensorScalar.aten_op,
@@ -125,9 +139,9 @@ def test_pow_tensor_scalar_tosa_MI(test_data: Pow_TensorScalar.input_t):
@common.parametrize("test_data", Pow_TensorScalar.test_data, x_fail, strict=False)
-def test_pow_tensor_scalar_tosa_BI(test_data: Pow_TensorScalar.input_t):
+def test_pow_tensor_scalar_tosa_INT(test_data: Pow_TensorScalar.input_t):
base, exp = test_data()
- pipeline = TosaPipelineBI[Pow_TensorScalar.input_t](
+ pipeline = TosaPipelineINT[Pow_TensorScalar.input_t](
Pow_TensorScalar(exp),
(base,),
Pow_TensorScalar.aten_op,
@@ -138,9 +152,9 @@ def test_pow_tensor_scalar_tosa_BI(test_data: Pow_TensorScalar.input_t):
@common.parametrize("test_data", Pow_TensorScalar.test_data)
@common.XfailIfNoCorstone300
-def test_pow_tensor_scalar_u55_BI(test_data: Pow_TensorScalar.input_t):
+def test_pow_tensor_scalar_u55_INT(test_data: Pow_TensorScalar.input_t):
base, exp = test_data()
- pipeline = EthosU55PipelineBI[Pow_TensorScalar.input_t](
+ pipeline = EthosU55PipelineINT[Pow_TensorScalar.input_t](
Pow_TensorScalar(exp),
(base,),
Pow_TensorScalar.aten_op,
@@ -152,9 +166,9 @@ def test_pow_tensor_scalar_u55_BI(test_data: Pow_TensorScalar.input_t):
@common.parametrize("test_data", Pow_TensorScalar.test_data)
@common.XfailIfNoCorstone320
-def test_pow_tensor_scalar_u85_BI(test_data: Pow_TensorScalar.input_t):
+def test_pow_tensor_scalar_u85_INT(test_data: Pow_TensorScalar.input_t):
base, exp = test_data()
- pipeline = EthosU85PipelineBI[Pow_TensorScalar.input_t](
+ pipeline = EthosU85PipelineINT[Pow_TensorScalar.input_t](
Pow_TensorScalar(exp),
(base,),
Pow_TensorScalar.aten_op,
@@ -162,3 +176,31 @@ def test_pow_tensor_scalar_u85_BI(test_data: Pow_TensorScalar.input_t):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", Pow_TensorScalar.test_data, x_fail, strict=False)
+@common.SkipIfNoModelConverter
+def test_pow_tensor_scalar_vgf_FP(test_data: Pow_TensorScalar.input_t):
+ base, exp = test_data()
+ pipeline = VgfPipeline[Pow_TensorScalar.input_t](
+ Pow_TensorScalar(exp),
+ (base,),
+ Pow_TensorScalar.aten_op,
+ Pow_TensorScalar.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Pow_TensorScalar.test_data, x_fail, strict=False)
+@common.SkipIfNoModelConverter
+def test_pow_tensor_scalar_vgf_INT(test_data: Pow_TensorScalar.input_t):
+ base, exp = test_data()
+ pipeline = VgfPipeline[Pow_TensorScalar.input_t](
+ Pow_TensorScalar(exp),
+ (base,),
+ Pow_TensorScalar.aten_op,
+ Pow_TensorScalar.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_reciprocal.py b/backends/arm/test/ops/test_reciprocal.py
index 48d7e516aaa..78edbb980e8 100644
--- a/backends/arm/test/ops/test_reciprocal.py
+++ b/backends/arm/test/ops/test_reciprocal.py
@@ -11,10 +11,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor] # Input x, Input y
@@ -41,8 +42,8 @@ def forward(self, input_: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_reciprocal_tosa_MI(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](
+def test_reciprocal_tosa_FP(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](
Reciprocal(),
(test_data(),),
aten_op,
@@ -52,8 +53,8 @@ def test_reciprocal_tosa_MI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_reciprocal_tosa_BI(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_reciprocal_tosa_INT(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Reciprocal(),
(test_data(),),
aten_op,
@@ -64,8 +65,8 @@ def test_reciprocal_tosa_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
-def test_reciprocal_u55_BI(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_reciprocal_u55_INT(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Reciprocal(),
(test_data(),),
aten_op,
@@ -77,8 +78,8 @@ def test_reciprocal_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_reciprocal_u85_BI(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_reciprocal_u85_INT(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Reciprocal(),
(test_data(),),
aten_op,
@@ -87,3 +88,27 @@ def test_reciprocal_u85_BI(test_data: torch.Tensor):
symmetric_io_quantization=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_reciprocal_vgf_FP(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Reciprocal(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_reciprocal_vgf_INT(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Reciprocal(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_relu.py b/backends/arm/test/ops/test_relu.py
index 00527a6c314..0b29bc24e75 100644
--- a/backends/arm/test/ops/test_relu.py
+++ b/backends/arm/test/ops/test_relu.py
@@ -11,10 +11,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor] # Input x
@@ -43,8 +44,8 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_relu_tosa_MI(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](
+def test_relu_tosa_FP(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](
Relu(),
(test_data(),),
aten_op,
@@ -54,8 +55,8 @@ def test_relu_tosa_MI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_relu_tosa_BI(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_relu_tosa_INT(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Relu(),
(test_data(),),
aten_op,
@@ -65,8 +66,8 @@ def test_relu_tosa_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_relu_u55_BI(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_relu_u55_INT(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Relu(),
(test_data(),),
aten_op,
@@ -77,8 +78,8 @@ def test_relu_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_relu_u85_BI(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_relu_u85_INT(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Relu(),
(test_data(),),
aten_op,
@@ -86,3 +87,29 @@ def test_relu_u85_BI(test_data: torch.Tensor):
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_relu_vgf_FP(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Relu(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_relu_vgf_INT(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Relu(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py
index 556e27be23d..3236515b661 100644
--- a/backends/arm/test/ops/test_repeat.py
+++ b/backends/arm/test/ops/test_repeat.py
@@ -14,10 +14,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor, torch.Tensor] # Input x, Input y
@@ -63,9 +64,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_repeat_tosa_MI(test_data: Tuple):
+def test_repeat_tosa_FP(test_data: Tuple):
module, test_data = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
module,
test_data,
module.aten_op,
@@ -75,9 +76,9 @@ def test_repeat_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_repeat_tosa_BI(test_data: Tuple):
+def test_repeat_tosa_INT(test_data: Tuple):
module, test_data = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
module,
test_data,
module.aten_op,
@@ -87,9 +88,9 @@ def test_repeat_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_repeat_u55_BI(test_data: Tuple):
+def test_repeat_u55_INT(test_data: Tuple):
module, test_data = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
module,
test_data,
module.aten_op,
@@ -100,9 +101,9 @@ def test_repeat_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_repeat_u85_BI(test_data: Tuple):
+def test_repeat_u85_INT(test_data: Tuple):
module, test_data = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
module,
test_data,
module.aten_op,
@@ -110,3 +111,29 @@ def test_repeat_u85_BI(test_data: Tuple):
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_repeat_vgf_FP(test_data: Tuple):
+ module, args = test_data()
+ pipeline = VgfPipeline[input_t1](
+ module,
+ args,
+ module.aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_repeat_vgf_INT(test_data: Tuple):
+ module, args = test_data()
+ pipeline = VgfPipeline[input_t1](
+ module,
+ args,
+ module.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_round.py b/backends/arm/test/ops/test_round.py
index 3480076a3e1..a4fea455e4f 100644
--- a/backends/arm/test/ops/test_round.py
+++ b/backends/arm/test/ops/test_round.py
@@ -10,10 +10,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor] # Input x
@@ -38,8 +39,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_round_tosa_MI(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](
+def test_round_tosa_FP(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](
Round(),
(test_data(),),
aten_op,
@@ -49,8 +50,8 @@ def test_round_tosa_MI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_round_tosa_BI(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_round_tosa_INT(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Round(),
(test_data(),),
[],
@@ -62,8 +63,8 @@ def test_round_tosa_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
@pytest.mark.xfail(reason="where.self not supported on U55")
-def test_round_u55_BI(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_round_u55_INT(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Round(),
(test_data(),),
[],
@@ -74,11 +75,37 @@ def test_round_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_round_u85_BI(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_round_u85_INT(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Round(),
(test_data(),),
[],
exir_op,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_round_vgf_FP(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Round(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_round_vgf_INT(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Round(),
+ (test_data(),),
+ [],
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_rshift.py b/backends/arm/test/ops/test_rshift.py
index 2e11cee5183..e97bfb840ae 100644
--- a/backends/arm/test/ops/test_rshift.py
+++ b/backends/arm/test/ops/test_rshift.py
@@ -10,18 +10,19 @@
XfailIfNoCorstone320,
)
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
scalar_input_t = tuple[torch.Tensor, int]
class RshiftScalar(torch.nn.Module):
- torch_op_MI = "torch.ops.aten.__rshift__.Scalar"
- torch_op_BI = "torch.ops.aten.bitwise_right_shift.Tensor"
+ torch_op_FP = "torch.ops.aten.__rshift__.Scalar"
+ torch_op_INT = "torch.ops.aten.bitwise_right_shift.Tensor"
exir_op = "executorch_exir_dialects_edge__ops_aten_bitwise_right_shift_Tensor"
test_data = {
"randint_neg_100_int8": lambda: (
@@ -67,22 +68,27 @@ def forward(self, x: torch.Tensor, shift: torch.Tensor):
return x.bitwise_right_shift(shift)
+##################
+## RshiftScalar ##
+##################
+
+
@common.parametrize("test_data", RshiftScalar.test_data)
-def test_rshift_scalar_tosa_MI_scalar(test_data):
- TosaPipelineMI[scalar_input_t](
+def test_bitwise_right_shift_scalar_tosa_FP_scalar(test_data):
+ TosaPipelineFP[scalar_input_t](
RshiftScalar(),
test_data(),
- RshiftScalar.torch_op_MI,
+ RshiftScalar.torch_op_FP,
RshiftScalar.exir_op,
).run()
@common.parametrize("test_data", RshiftScalar.test_data)
-def test_bitwise_right_shift_tensor_tosa_BI_scalar(test_data):
- pipeline = TosaPipelineBI[scalar_input_t](
+def test_bitwise_right_shift_tensor_tosa_INT_scalar(test_data):
+ pipeline = TosaPipelineINT[scalar_input_t](
RshiftScalar(),
test_data(),
- RshiftScalar.torch_op_BI,
+ RshiftScalar.torch_op_INT,
RshiftScalar.exir_op,
)
pipeline.pop_stage("check.quant_nodes")
@@ -91,11 +97,11 @@ def test_bitwise_right_shift_tensor_tosa_BI_scalar(test_data):
@common.parametrize("test_data", RshiftScalar.test_data)
@XfailIfNoCorstone300
-def test_bitwise_right_shift_tensor_u55_BI_scalar(test_data):
- pipeline = EthosU55PipelineBI[scalar_input_t](
+def test_bitwise_right_shift_tensor_u55_INT_scalar(test_data):
+ pipeline = EthosU55PipelineINT[scalar_input_t](
RshiftScalar(),
test_data(),
- RshiftScalar.torch_op_BI,
+ RshiftScalar.torch_op_INT,
RshiftScalar.exir_op,
run_on_fvp=True,
)
@@ -108,11 +114,11 @@ def test_bitwise_right_shift_tensor_u55_BI_scalar(test_data):
@common.parametrize("test_data", RshiftScalar.test_data)
@XfailIfNoCorstone320
-def test_bitwise_right_shift_tensor_u85_BI_scalar(test_data):
- pipeline = EthosU85PipelineBI[scalar_input_t](
+def test_bitwise_right_shift_tensor_u85_INT_scalar(test_data):
+ pipeline = EthosU85PipelineINT[scalar_input_t](
RshiftScalar(),
test_data(),
- RshiftScalar.torch_op_BI,
+ RshiftScalar.torch_op_INT,
RshiftScalar.exir_op,
run_on_fvp=True,
)
@@ -120,9 +126,41 @@ def test_bitwise_right_shift_tensor_u85_BI_scalar(test_data):
pipeline.run()
+@common.parametrize("test_data", RshiftScalar.test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_right_shift_scalar_vgf_FP_scalar(test_data):
+ pipeline = VgfPipeline[scalar_input_t](
+ RshiftScalar(),
+ test_data(),
+ RshiftScalar.torch_op_FP,
+ RshiftScalar.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", RshiftScalar.test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_right_shift_tensor_vgf_INT_scalar(test_data):
+ pipeline = VgfPipeline[scalar_input_t](
+ RshiftScalar(),
+ test_data(),
+ RshiftScalar.torch_op_INT,
+ RshiftScalar.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
+
+
+##################
+## RshiftTensor ##
+##################
+
+
@common.parametrize("test_data", RshiftTensor.test_data)
-def test_rshift_scalar_tosa_MI(test_data):
- TosaPipelineMI[scalar_input_t](
+def test_bitwise_right_shift_tensor_tosa_FP(test_data):
+ TosaPipelineFP[scalar_input_t](
RshiftTensor(),
test_data(),
RshiftTensor.torch_op,
@@ -131,8 +169,8 @@ def test_rshift_scalar_tosa_MI(test_data):
@common.parametrize("test_data", RshiftTensor.test_data)
-def test_bitwise_right_shift_tensor_tosa_BI(test_data):
- pipeline = TosaPipelineBI[scalar_input_t](
+def test_bitwise_right_shift_tensor_tosa_INT(test_data):
+ pipeline = TosaPipelineINT[scalar_input_t](
RshiftTensor(),
test_data(),
RshiftTensor.torch_op,
@@ -144,8 +182,8 @@ def test_bitwise_right_shift_tensor_tosa_BI(test_data):
@common.parametrize("test_data", RshiftTensor.test_data)
@XfailIfNoCorstone300
-def test_bitwise_right_shift_tensor_u55_BI(test_data):
- pipeline = EthosU55PipelineBI[scalar_input_t](
+def test_bitwise_right_shift_tensor_u55_INT(test_data):
+ pipeline = EthosU55PipelineINT[scalar_input_t](
RshiftTensor(),
test_data(),
RshiftTensor.torch_op,
@@ -161,8 +199,8 @@ def test_bitwise_right_shift_tensor_u55_BI(test_data):
@common.parametrize("test_data", RshiftTensor.test_data)
@XfailIfNoCorstone320
-def test_bitwise_right_shift_tensor_u85_BI(test_data):
- pipeline = EthosU85PipelineBI[scalar_input_t](
+def test_bitwise_right_shift_tensor_u85_INT(test_data):
+ pipeline = EthosU85PipelineINT[scalar_input_t](
RshiftTensor(),
test_data(),
RshiftTensor.torch_op,
@@ -171,3 +209,30 @@ def test_bitwise_right_shift_tensor_u85_BI(test_data):
)
pipeline.pop_stage("check.quant_nodes")
pipeline.run()
+
+
+@common.parametrize("test_data", RshiftTensor.test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_right_shift_tensor_vgf_FP(test_data):
+ pipeline = VgfPipeline[tensor_input_t](
+ RshiftTensor(),
+ test_data(),
+ RshiftTensor.torch_op,
+ RshiftTensor.exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", RshiftTensor.test_data)
+@common.SkipIfNoModelConverter
+def test_bitwise_right_shift_tensor_vgf_INT(test_data):
+ pipeline = VgfPipeline[tensor_input_t](
+ RshiftTensor(),
+ test_data(),
+ RshiftTensor.torch_op,
+ RshiftTensor.exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_rsqrt.py b/backends/arm/test/ops/test_rsqrt.py
index 0a9e95d890e..d146a83287e 100644
--- a/backends/arm/test/ops/test_rsqrt.py
+++ b/backends/arm/test/ops/test_rsqrt.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -36,8 +37,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_tensor", Rsqrt.test_parameters)
-def test_rsqrt_tosa_MI(test_tensor: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](
+def test_rsqrt_tosa_FP(test_tensor: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](
Rsqrt(),
test_tensor(),
aten_op,
@@ -47,8 +48,8 @@ def test_rsqrt_tosa_MI(test_tensor: torch.Tensor):
@common.parametrize("test_tensor", Rsqrt.test_parameters)
-def test_rsqrt_tosa_BI(test_tensor: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_rsqrt_tosa_INT(test_tensor: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Rsqrt(),
test_tensor(),
aten_op,
@@ -59,8 +60,8 @@ def test_rsqrt_tosa_BI(test_tensor: torch.Tensor):
@common.parametrize("test_tensor", Rsqrt.test_parameters)
@common.XfailIfNoCorstone300
-def test_rsqrt_u55_BI(test_tensor: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_rsqrt_u55_INT(test_tensor: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Rsqrt(),
test_tensor(),
aten_op,
@@ -72,8 +73,8 @@ def test_rsqrt_u55_BI(test_tensor: torch.Tensor):
@common.parametrize("test_tensor", Rsqrt.test_parameters)
@common.XfailIfNoCorstone320
-def test_rsqrt_u85_BI(test_tensor: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_rsqrt_u85_INT(test_tensor: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Rsqrt(),
test_tensor(),
aten_op,
@@ -81,3 +82,27 @@ def test_rsqrt_u85_BI(test_tensor: torch.Tensor):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_tensor", Rsqrt.test_parameters)
+@common.SkipIfNoModelConverter
+def test_rsqrt_vgf_FP(test_tensor: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Rsqrt(),
+ test_tensor(),
+ aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_tensor", Rsqrt.test_parameters)
+@common.SkipIfNoModelConverter
+def test_rsqrt_vgf_INT(test_tensor: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Rsqrt(),
+ test_tensor(),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_scalar_tensor.py b/backends/arm/test/ops/test_scalar_tensor.py
index 6658f06a884..22c1cc0373d 100644
--- a/backends/arm/test/ops/test_scalar_tensor.py
+++ b/backends/arm/test/ops/test_scalar_tensor.py
@@ -7,10 +7,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
float_test_data_suite = {
@@ -53,9 +54,9 @@ def forward(self, x: torch.Tensor):
"test_data",
int_test_data_suite | float_test_data_suite,
)
-def test_scalar_tensor_tosa_MI(test_data): # Note TOSA MI supports all types
+def test_scalar_tensor_tosa_FP(test_data): # Note TOSA FP supports all types
scalar, dtype, data = test_data()
- TosaPipelineMI(
+ TosaPipelineFP(
ScalarTensor(scalar, dtype),
tuple(data),
ScalarTensor.aten_op,
@@ -66,9 +67,9 @@ def test_scalar_tensor_tosa_MI(test_data): # Note TOSA MI supports all types
"test_data",
int_test_data_suite | float_test_data_suite,
)
-def test_scalar_tensor_tosa_BI(test_data):
+def test_scalar_tensor_tosa_INT(test_data):
scalar, dtype, data = test_data()
- pipeline: TosaPipelineBI = TosaPipelineBI(
+ pipeline: TosaPipelineINT = TosaPipelineINT(
ScalarTensor(scalar, dtype),
tuple(data),
ScalarTensor.aten_op,
@@ -79,9 +80,9 @@ def test_scalar_tensor_tosa_BI(test_data):
@common.parametrize("test_data", float_test_data_suite)
@common.XfailIfNoCorstone300
-def test_scalar_tensor_u55_BI(test_data):
+def test_scalar_tensor_u55_INT(test_data):
scalar, dtype, data = test_data()
- EthosU55PipelineBI(
+ EthosU55PipelineINT(
ScalarTensor(scalar, dtype),
tuple(data),
ScalarTensor.aten_op,
@@ -91,11 +92,38 @@ def test_scalar_tensor_u55_BI(test_data):
@common.parametrize("test_data", float_test_data_suite)
@common.XfailIfNoCorstone320
-def test_scalar_tensor_u85_BI(test_data):
+def test_scalar_tensor_u85_INT(test_data):
scalar, dtype, data = test_data()
- EthosU85PipelineBI(
+ EthosU85PipelineINT(
ScalarTensor(scalar, dtype),
tuple(data),
ScalarTensor.aten_op,
run_on_fvp=True,
).run()
+
+
+@common.parametrize("test_data", float_test_data_suite)
+@common.SkipIfNoModelConverter
+def test_scalar_tensor_vgf_FP(test_data):
+ scalar, dtype, data = test_data()
+ pipeline = VgfPipeline(
+ ScalarTensor(scalar, dtype),
+ tuple(data),
+ ScalarTensor.aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", int_test_data_suite)
+@common.SkipIfNoModelConverter
+def test_scalar_tensor_vgf_INT(test_data):
+ scalar, dtype, data = test_data()
+ pipeline = VgfPipeline(
+ ScalarTensor(scalar, dtype),
+ tuple(data),
+ ScalarTensor.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_scalars.py b/backends/arm/test/ops/test_scalars.py
index 3ede947b218..1243a522526 100644
--- a/backends/arm/test/ops/test_scalars.py
+++ b/backends/arm/test/ops/test_scalars.py
@@ -12,13 +12,13 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
)
"""
Summary of non-working cases.
-MI:
+FP:
Op(scalar, tensor):
One issue is that lift_constant_tensor_pass looks for a fake_tensor in the meta of the first
node which does not work the first node is a scalar.
@@ -170,253 +170,255 @@ def forward(self, x):
}
-# ADD MI ------------------------------------------------------
+# ADD FP ------------------------------------------------------
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_add_tensor_tosa_MI_scalar(test_data):
+def test_add_tensor_tosa_FP_scalar(test_data):
"""Tests regular add with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](Add(), test_data, aten_op=Add.aten_op)
+ pipeline = TosaPipelineFP[input_t1](Add(), test_data, aten_op=Add.aten_op)
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_add_tensor_tosa_MI_inplace(test_data):
+def test_add_tensor_tosa_FP_inplace(test_data):
"""Tests inplace add with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](AddInplace(), test_data, aten_op=[])
+ pipeline = TosaPipelineFP[input_t1](AddInplace(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_const_tests, xfails=xfails)
-def test_add_tensor_tosa_MI_const(test_data):
+def test_add_tensor_tosa_FP_const(test_data):
"""Tests regular add with one scalar input, with one of inputs constant."""
- pipeline = TosaPipelineMI[input_t1](AddConst(), test_data, aten_op=AddConst.aten_op)
+ pipeline = TosaPipelineFP[input_t1](AddConst(), test_data, aten_op=AddConst.aten_op)
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_add_scalar_tosa_MI(test_data):
+def test_add_scalar_tosa_FP(test_data):
"""Tests a scalar add with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
AddScalar(), test_data, aten_op=AddScalar.aten_op
)
pipeline.run()
-# ADD BI ------------------------------------------------------
+# ADD INT ------------------------------------------------------
@common.parametrize("test_data", tensor_scalar_tests)
-def test_add_tensor_tosa_BI_scalar(test_data):
+def test_add_tensor_tosa_INT_scalar(test_data):
"""Tests regular add with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](Add(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](Add(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests)
-def test_add_tensor_tosa_BI_inplace(test_data):
+def test_add_tensor_tosa_INT_inplace(test_data):
"""Tests inplace add with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](AddInplace(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](AddInplace(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_const_tests)
-def test_add_tensor_tosa_BI_const(test_data):
+def test_add_tensor_tosa_INT_const(test_data):
"""Tests regular add with one scalar input, with one of inputs constant."""
- pipeline = TosaPipelineBI[input_t1](AddConst(), test_data, aten_op=AddConst.aten_op)
+ pipeline = TosaPipelineINT[input_t1](
+ AddConst(), test_data, aten_op=AddConst.aten_op
+ )
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_add_scalar_tosa_BI(test_data):
+def test_add_scalar_tosa_INT(test_data):
"""Tests a scalar add with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](AddScalar(), test_data, aten_op=Add.aten_op)
+ pipeline = TosaPipelineINT[input_t1](AddScalar(), test_data, aten_op=Add.aten_op)
pipeline.run()
# ADD ETHOS-U ------------------------------------------------------
-@pytest.mark.skip(reason="This is tested in test_add_scalar_tosa_BI")
-def test_add_scalar_u55_BI():
+@pytest.mark.skip(reason="This is tested in test_add_scalar_tosa_INT")
+def test_add_scalar_u55_INT():
pass
-@pytest.mark.skip(reason="This is tested in test_add_scalar_tosa_BI")
-def test_add_scalar_u85_BI():
+@pytest.mark.skip(reason="This is tested in test_add_scalar_tosa_INT")
+def test_add_scalar_u85_INT():
pass
-# SUB MI ------------------------------------------------------
+# SUB FP ------------------------------------------------------
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_sub_tensor_tosa_MI_scalar(test_data):
+def test_sub_tensor_tosa_FP_scalar(test_data):
"""Tests regular sub with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](Sub(), test_data, aten_op=Sub.aten_op)
+ pipeline = TosaPipelineFP[input_t1](Sub(), test_data, aten_op=Sub.aten_op)
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_sub_tensor_tosa_MI_inplace(test_data):
+def test_sub_tensor_tosa_FP_inplace(test_data):
"""Tests inplace sub with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](SubInplace(), test_data, aten_op=[])
+ pipeline = TosaPipelineFP[input_t1](SubInplace(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_sub_scalar_tosa_MI(test_data):
+def test_sub_scalar_tosa_FP(test_data):
"""Tests a scalar sub with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
SubScalar(), test_data, aten_op=SubScalar.aten_op
)
pipeline.run()
-# SUB BI ------------------------------------------------------
+# SUB INT ------------------------------------------------------
@common.parametrize("test_data", tensor_scalar_tests)
-def test_sub_tensor_tosa_BI_scalar(test_data):
+def test_sub_tensor_tosa_INT_scalar(test_data):
"""Tests regular sub with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](Sub(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](Sub(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests)
-def test_sub_tensor_tosa_BI_inplace(test_data):
+def test_sub_tensor_tosa_INT_inplace(test_data):
"""Tests inplace sub with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](SubInplace(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](SubInplace(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_sub_scalar_tosa_BI(test_data):
+def test_sub_scalar_tosa_INT(test_data):
"""Tests a scalar sub with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](SubScalar(), test_data, aten_op=Sub.aten_op)
+ pipeline = TosaPipelineINT[input_t1](SubScalar(), test_data, aten_op=Sub.aten_op)
pipeline.run()
# SUB ETHOS-U ------------------------------------------------------
-@pytest.mark.skip(reason="This is tested in test_sub_scalar_tosa_BI")
-def test_sub_scalar_u55_BI():
+@pytest.mark.skip(reason="This is tested in test_sub_scalar_tosa_INT")
+def test_sub_scalar_u55_INT():
pass
-@pytest.mark.skip(reason="This is tested in test_sub_scalar_tosa_BI")
-def test_sub_scalar_u85_BI():
+@pytest.mark.skip(reason="This is tested in test_sub_scalar_tosa_INT")
+def test_sub_scalar_u85_INT():
pass
-# MUL MI ------------------------------------------------------
+# MUL FP ------------------------------------------------------
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_mul_tensor_tosa_MI_scalar(test_data):
+def test_mul_tensor_tosa_FP_scalar(test_data):
"""Tests regular mul with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](Mul(), test_data, aten_op=Mul.aten_op)
+ pipeline = TosaPipelineFP[input_t1](Mul(), test_data, aten_op=Mul.aten_op)
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_mul_tensor_tosa_MI_inplace(test_data):
+def test_mul_tensor_tosa_FP_inplace(test_data):
"""Tests inplace mul with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](MulInplace(), test_data, aten_op=[])
+ pipeline = TosaPipelineFP[input_t1](MulInplace(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_mul_scalar_tosa_MI(test_data):
+def test_mul_scalar_tosa_FP(test_data):
"""Tests a scalar mul with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
MulScalar(), test_data, aten_op=MulScalar.aten_op
)
pipeline.run()
-# MUL BI ------------------------------------------------------
+# MUL INT ------------------------------------------------------
@common.parametrize("test_data", tensor_scalar_tests)
-def test_mul_tensor_tosa_BI_scalar(test_data):
+def test_mul_tensor_tosa_INT_scalar(test_data):
"""Tests regular mul with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](Mul(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](Mul(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests)
-def test_mul_tensor_tosa_BI_inplace(test_data):
+def test_mul_tensor_tosa_INT_inplace(test_data):
"""Tests inplace mul with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](MulInplace(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](MulInplace(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_mul_scalar_tosa_BI(test_data):
+def test_mul_scalar_tosa_INT(test_data):
"""Tests a scalar mul with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](MulScalar(), test_data, aten_op=Mul.aten_op)
+ pipeline = TosaPipelineINT[input_t1](MulScalar(), test_data, aten_op=Mul.aten_op)
pipeline.run()
# MUL ETHOS-U ------------------------------------------------------
-@pytest.mark.skip(reason="This is tested in test_mul_scalar_tosa_BI")
-def test_mul_scalar_u55_BI():
+@pytest.mark.skip(reason="This is tested in test_mul_scalar_tosa_INT")
+def test_mul_scalar_u55_INT():
pass
-@pytest.mark.skip(reason="This is tested in test_mul_scalar_tosa_BI")
-def test_mul_scalar_u85_BI():
+@pytest.mark.skip(reason="This is tested in test_mul_scalar_tosa_INT")
+def test_mul_scalar_u85_INT():
pass
-# DIV MI ------------------------------------------------------
+# DIV FP ------------------------------------------------------
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_div_tensor_tosa_MI_scalar(test_data):
+def test_div_tensor_tosa_FP_scalar(test_data):
"""Tests regular div with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](Div(), test_data, aten_op=Div.aten_op)
+ pipeline = TosaPipelineFP[input_t1](Div(), test_data, aten_op=Div.aten_op)
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_div_tensor_tosa_MI_inplace(test_data):
+def test_div_tensor_tosa_FP_inplace(test_data):
"""Tests inplace div with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](DivInplace(), test_data, aten_op=[])
+ pipeline = TosaPipelineFP[input_t1](DivInplace(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_div_scalar_tosa_MI(test_data):
+def test_div_scalar_tosa_FP(test_data):
"""Tests a scalar div with one scalar input."""
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
DivScalar(), test_data, aten_op=DivScalar.aten_op
)
pipeline.run()
-# DIV BI ------------------------------------------------------
+# DIV INT ------------------------------------------------------
@common.parametrize("test_data", tensor_scalar_tests)
-def test_div_tensor_tosa_BI_scalar(test_data):
+def test_div_tensor_tosa_INT_scalar(test_data):
"""Tests regular div with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](Div(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](Div(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests)
-def test_div_tensor_tosa_BI_inplace(test_data):
+def test_div_tensor_tosa_INT_inplace(test_data):
"""Tests inplace div with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](DivInplace(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](DivInplace(), test_data, aten_op=[])
pipeline.run()
@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
-def test_div_scalar_tosa_BI(test_data):
+def test_div_scalar_tosa_INT(test_data):
"""Tests a scalar div with one scalar input."""
- pipeline = TosaPipelineBI[input_t1](DivScalar(), test_data, aten_op=[])
+ pipeline = TosaPipelineINT[input_t1](DivScalar(), test_data, aten_op=[])
pipeline.run()
# DIV ETHOS-U ------------------------------------------------------
-@pytest.mark.skip(reason="This is tested in test_div_scalar_tosa_BI")
-def test_div_scalar_u55_BI():
+@pytest.mark.skip(reason="This is tested in test_div_scalar_tosa_INT")
+def test_div_scalar_u55_INT():
pass
-@pytest.mark.skip(reason="This is tested in test_div_scalar_tosa_BI")
-def test_div_scalar_u85_BI():
+@pytest.mark.skip(reason="This is tested in test_div_scalar_tosa_INT")
+def test_div_scalar_u85_INT():
pass
# SHIFT ETHOS-U ------------------------------------------------------
-def test_bitwise_right_shift_tensor_tosa_MI_inplace():
- pipeline = TosaPipelineMI[input_t1](
+def test_bitwise_right_shift_tensor_tosa_FP_inplace():
+ pipeline = TosaPipelineFP[input_t1](
ShiftInplaceSub(),
(torch.IntTensor(5),),
aten_op="torch.ops.aten.__rshift__.Scalar",
@@ -424,8 +426,8 @@ def test_bitwise_right_shift_tensor_tosa_MI_inplace():
pipeline.run()
-def test_bitwise_right_shift_tensor_tosa_BI_inplace():
- pipeline = TosaPipelineBI[input_t1](
+def test_bitwise_right_shift_tensor_tosa_INT_inplace():
+ pipeline = TosaPipelineINT[input_t1](
ShiftInplaceSub(),
(torch.IntTensor(5),),
aten_op="torch.ops.aten.bitwise_right_shift.Tensor",
diff --git a/backends/arm/test/ops/test_sdpa.py b/backends/arm/test/ops/test_sdpa.py
index 470030f67fd..009e4b2ad70 100644
--- a/backends/arm/test/ops/test_sdpa.py
+++ b/backends/arm/test/ops/test_sdpa.py
@@ -8,9 +8,11 @@
import torch
+from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -27,19 +29,41 @@ def forward(self, query, key, value):
input_t = Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
-def test_sdpa_MI():
+def test_sdpa_tosa_FP():
test_input = tuple(torch.randn(1, 3, 197, 64) for x in range(3))
- pipeline = TosaPipelineMI[input_t](SDPA(), test_input, [], [])
+ pipeline = TosaPipelineFP[input_t](SDPA(), test_input, [], [])
pipeline.pop_stage("check_count.exir")
pipeline.run()
-def test_sdpa_BI():
+def test_sdpa_tosa_INT():
test_input = tuple(torch.randn(1, 3, 197, 64) for x in range(3))
- pipeline = TosaPipelineBI[input_t](SDPA(), test_input, [], [])
+ pipeline = TosaPipelineINT[input_t](SDPA(), test_input, [], [])
pipeline.pop_stage("check.quant_nodes")
pipeline.pop_stage("check_count.exir")
pipeline.pop_stage(
"run_method_and_compare_outputs"
) # TODO: reference is not quantized
pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_sdpa_vgf_FP():
+ test_input = tuple(torch.randn(1, 3, 197, 64) for _ in range(3))
+ pipeline = VgfPipeline[input_t](
+ SDPA(), test_input, [], [], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_sdpa_vgf_INT():
+ test_input = tuple(torch.randn(1, 3, 197, 64) for _ in range(3))
+ pipeline = VgfPipeline[input_t](
+ SDPA(),
+ test_input,
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_select.py b/backends/arm/test/ops/test_select.py
index 72ab637ddfb..dcf5a4a181b 100644
--- a/backends/arm/test/ops/test_select.py
+++ b/backends/arm/test/ops/test_select.py
@@ -11,11 +11,12 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor, int, int]
@@ -58,8 +59,8 @@ def forward(self, x, dim: int, index: int):
@common.parametrize("test_data", test_data_suite)
-def test_select_int_tosa_MI_copy(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_select_int_tosa_FP_copy(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
SelectCopy(),
test_data(),
aten_op=aten_op_copy,
@@ -69,8 +70,8 @@ def test_select_int_tosa_MI_copy(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_select_int_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_select_int_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
SelectInt(),
test_data(),
aten_op=aten_op_int,
@@ -80,8 +81,8 @@ def test_select_int_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_select_int_tosa_BI_copy(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_select_int_tosa_INT_copy(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
SelectCopy(),
test_data(),
aten_op=aten_op_copy,
@@ -91,8 +92,8 @@ def test_select_int_tosa_BI_copy(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_select_int_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_select_int_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
SelectInt(),
test_data(),
aten_op=aten_op_int,
@@ -108,8 +109,8 @@ def test_select_int_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite, x_fails)
@common.XfailIfNoCorstone300
-def test_select_int_u55_BI_copy(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_select_int_u55_INT_copy(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
SelectCopy(),
test_data(),
aten_op_copy,
@@ -122,8 +123,8 @@ def test_select_int_u55_BI_copy(test_data: Tuple):
@common.parametrize("test_data", test_data_suite, x_fails)
@common.XfailIfNoCorstone300
-def test_select_int_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_select_int_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
SelectInt(),
test_data(),
aten_op_int,
@@ -135,7 +136,7 @@ def test_select_int_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_not_delegated)
-def test_select_int_u55_BI_not_delegated(test_data: Tuple):
+def test_select_int_u55_INT_not_delegated(test_data: Tuple):
pipeline = OpNotSupportedPipeline[input_t1](
SelectInt(),
test_data(),
@@ -149,8 +150,8 @@ def test_select_int_u55_BI_not_delegated(test_data: Tuple):
@common.parametrize("test_data", test_data_suite, x_fails)
@common.XfailIfNoCorstone320
-def test_select_int_u85_BI_copy(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_select_int_u85_INT_copy(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
SelectCopy(),
test_data(),
aten_op_copy,
@@ -163,8 +164,8 @@ def test_select_int_u85_BI_copy(test_data: Tuple):
@common.parametrize("test_data", test_data_suite, x_fails)
@common.XfailIfNoCorstone320
-def test_select_int_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_select_int_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
SelectInt(),
test_data(),
aten_op_int,
@@ -173,3 +174,47 @@ def test_select_int_u85_BI(test_data: Tuple):
use_to_edge_transform_and_lower=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_select_int_vgf_FP_copy(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ SelectCopy(), test_data(), aten_op_copy, [], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_select_int_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ SelectInt(), test_data(), aten_op_int, [], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_select_int_vgf_INT_copy(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ SelectCopy(),
+ test_data(),
+ aten_op_copy,
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_select_int_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ SelectInt(),
+ test_data(),
+ aten_op_int,
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_sigmoid.py b/backends/arm/test/ops/test_sigmoid.py
index b5ee68b987b..a29bbc84782 100644
--- a/backends/arm/test/ops/test_sigmoid.py
+++ b/backends/arm/test/ops/test_sigmoid.py
@@ -9,12 +9,13 @@
from typing import Tuple
import torch
-from executorch.backends.arm.test import common, conftest
+from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.sigmoid.default" # Used for checking that we do not have softmax in the graph after decompose
@@ -69,78 +70,72 @@ def forward(self, x, y):
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_tosa_MI(test_data: torch.Tensor):
- TosaPipelineMI[input_t1](Sigmoid(), (test_data(),), aten_op, exir_op).run()
+def test_sigmoid_tosa_FP(test_data: torch.Tensor):
+ TosaPipelineFP[input_t1](Sigmoid(), (test_data(),), aten_op, exir_op).run()
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_tosa_BI(test_data: torch.Tensor):
- TosaPipelineBI[input_t1](Sigmoid(), (test_data(),), aten_op, exir_op).run()
+def test_sigmoid_tosa_INT(test_data: torch.Tensor):
+ TosaPipelineINT[input_t1](Sigmoid(), (test_data(),), aten_op, exir_op).run()
-def test_sigmoid_tosa_MI_add():
- TosaPipelineMI[input_t1](
+def test_sigmoid_tosa_FP_add():
+ TosaPipelineFP[input_t1](
AddSigmoid(),
(test_data_suite["zeros"](),),
aten_op,
exir_op,
- tosa_version=conftest.get_option("tosa_version"),
).run()
-def test_sigmoid_tosa_BI_add():
- TosaPipelineBI[input_t1](
+def test_sigmoid_tosa_INT_add():
+ TosaPipelineINT[input_t1](
AddSigmoid(),
(test_data_suite["ramp"](),),
aten_op,
exir_op,
- tosa_version=conftest.get_option("tosa_version"),
).run()
-def test_sigmoid_tosa_MI_add_2():
- TosaPipelineMI[input_t1](
+def test_sigmoid_tosa_FP_add_2():
+ TosaPipelineFP[input_t1](
SigmoidAdd(),
(test_data_suite["zeros"](),),
aten_op,
exir_op,
- tosa_version=conftest.get_option("tosa_version"),
).run()
-def test_sigmoid_tosa_BI_add_2():
- TosaPipelineBI[input_t1](
+def test_sigmoid_tosa_INT_add_2():
+ TosaPipelineINT[input_t1](
SigmoidAdd(),
(test_data_suite["zeros"](),),
aten_op,
exir_op,
- tosa_version=conftest.get_option("tosa_version"),
).run()
-def test_sigmoid_tosa_MI_add_3():
- TosaPipelineMI[input_t1](
+def test_sigmoid_tosa_FP_add_3():
+ TosaPipelineFP[input_t1](
SigmoidAddSigmoid(),
(test_data_suite["randn_neg"](), test_data_suite["randn_pos"]()),
aten_op,
exir_op,
- tosa_version=conftest.get_option("tosa_version"),
).run()
-def test_sigmoid_tosa_BI_3():
- TosaPipelineBI[input_t1](
+def test_sigmoid_tosa_INT_3():
+ TosaPipelineINT[input_t1](
SigmoidAddSigmoid(),
(test_data_suite["randn_neg"](), test_data_suite["randn_pos"]()),
aten_op,
exir_op,
- tosa_version=conftest.get_option("tosa_version"),
).run()
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_sigmoid_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Sigmoid(),
(test_data(),),
aten_op,
@@ -151,8 +146,8 @@ def test_sigmoid_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_sigmoid_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Sigmoid(),
(test_data(),),
aten_op,
@@ -160,3 +155,101 @@ def test_sigmoid_u85_BI(test_data: Tuple):
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_sigmoid_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Sigmoid(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_sigmoid_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Sigmoid(),
+ (test_data(),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_sigmoid_vgf_FP_add():
+ pipeline = VgfPipeline[input_t1](
+ AddSigmoid(),
+ (test_data_suite["zeros"](),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_sigmoid_vgf_INT_add():
+ pipeline = VgfPipeline[input_t1](
+ AddSigmoid(),
+ (test_data_suite["ramp"](),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_sigmoid_vgf_FP_add_2():
+ pipeline = VgfPipeline[input_t1](
+ SigmoidAdd(),
+ (test_data_suite["zeros"](),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_sigmoid_vgf_INT_add_2():
+ pipeline = VgfPipeline[input_t1](
+ SigmoidAdd(),
+ (test_data_suite["zeros"](),),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_sigmoid_vgf_FP_add_3():
+ pipeline = VgfPipeline[input_t1](
+ SigmoidAddSigmoid(),
+ (test_data_suite["randn_neg"](), test_data_suite["randn_pos"]()),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_sigmoid_vgf_INT_add_3():
+ pipeline = VgfPipeline[input_t1](
+ SigmoidAddSigmoid(),
+ (test_data_suite["randn_neg"](), test_data_suite["randn_pos"]()),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_sigmoid_16bit.py b/backends/arm/test/ops/test_sigmoid_16bit.py
index 56b5822f8f4..3d70881a3f0 100644
--- a/backends/arm/test/ops/test_sigmoid_16bit.py
+++ b/backends/arm/test/ops/test_sigmoid_16bit.py
@@ -12,9 +12,9 @@
from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
+ TosaPipelineINT,
)
from executorch.backends.arm.tosa_specification import TosaSpecification
from executorch.backends.xnnpack.test.tester import Quantize
@@ -40,11 +40,8 @@ def _get_16_bit_quant_config():
def get_16bit_sigmoid_quantizer(u55_config=False):
tosa_version = conftest.get_option("tosa_version")
tosa_profiles = {
- "0.80": TosaSpecification.create_from_string(
- "TOSA-0.80+BI" + ("+u55" if u55_config else "")
- ),
"1.0": TosaSpecification.create_from_string(
- "TOSA-1.0+INT" + ("+u55" if u55_config else "")
+ "TOSA-1.0+INT+int16" + ("+u55" if u55_config else "")
),
}
@@ -90,13 +87,14 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_tosa_BI(test_data):
- pipeline = TosaPipelineBI(
+def test_sigmoid_tosa_INT(test_data):
+ pipeline = TosaPipelineINT(
Sigmoid(),
(test_data(),),
Sigmoid.aten_op,
Sigmoid.exir_op,
qtol=1,
+ tosa_extensions=["int16"],
)
pipeline.change_args("quantize", get_16bit_sigmoid_quantizer())
pipeline.run()
@@ -110,14 +108,16 @@ def test_sigmoid_tosa_BI(test_data):
},
strict=False,
)
-def test_sigmoid_tosa_BI_add_sigmoid(test_data):
- pipeline = TosaPipelineBI(
+def test_sigmoid_tosa_INT_add_sigmoid(test_data):
+ pipeline = TosaPipelineINT(
SigmoidAddSigmoid(),
(test_data(),),
Sigmoid.aten_op,
Sigmoid.exir_op,
qtol=1,
+ tosa_extensions=["int16"],
)
+ pipeline.change_args("quantize", get_16bit_sigmoid_quantizer())
pipeline.run()
@@ -133,7 +133,7 @@ def test_sigmoid_tosa_BI_add_sigmoid(test_data):
"test_data",
test_data_suite,
)
-def test_sigmoid_u55_BI(test_data):
+def test_sigmoid_u55_INT(test_data):
pipeline = OpNotSupportedPipeline(
Sigmoid(),
(test_data(),),
@@ -149,7 +149,7 @@ def test_sigmoid_u55_BI(test_data):
"test_data",
test_data_suite,
)
-def test_sigmoid_u55_BI_add_sigmoid(test_data):
+def test_sigmoid_u55_INT_add_sigmoid(test_data):
pipeline = OpNotSupportedPipeline(
SigmoidAddSigmoid(),
(test_data(),),
@@ -157,6 +157,7 @@ def test_sigmoid_u55_BI_add_sigmoid(test_data):
n_expected_delegates=1,
quantize=True,
u55_subset=True,
+ tosa_extensions=["int16"],
)
pipeline.change_args("quantize", get_16bit_sigmoid_quantizer(True))
pipeline.run()
@@ -164,8 +165,8 @@ def test_sigmoid_u55_BI_add_sigmoid(test_data):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_sigmoid_u85_BI(test_data):
- pipeline = EthosU85PipelineBI(
+def test_sigmoid_u85_INT(test_data):
+ pipeline = EthosU85PipelineINT(
Sigmoid(),
(test_data(),),
Sigmoid.aten_op,
@@ -185,8 +186,8 @@ def test_sigmoid_u85_BI(test_data):
)
@pytest.mark.flaky(reruns=5) # MLETORCH-787: Investigate int16-int8 rescaling precision
@common.XfailIfNoCorstone320
-def test_sigmoid_u85_BI_add_sigmoid(test_data):
- pipeline = EthosU85PipelineBI(
+def test_sigmoid_u85_INT_add_sigmoid(test_data):
+ pipeline = EthosU85PipelineINT(
SigmoidAddSigmoid(),
(test_data(),),
Sigmoid.aten_op,
diff --git a/backends/arm/test/ops/test_sigmoid_32bit.py b/backends/arm/test/ops/test_sigmoid_32bit.py
index 9cbfe89a31a..553a852b245 100644
--- a/backends/arm/test/ops/test_sigmoid_32bit.py
+++ b/backends/arm/test/ops/test_sigmoid_32bit.py
@@ -8,9 +8,9 @@
from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
+ TosaPipelineINT,
)
from executorch.backends.arm.tosa_specification import TosaSpecification
from executorch.backends.xnnpack.test.tester import Quantize
@@ -56,11 +56,8 @@ def _get_32_bit_quant_config():
def get_32bit_sigmoid_quantizer(u55_config=False):
tosa_version = conftest.get_option("tosa_version")
tosa_profiles = {
- "0.80": TosaSpecification.create_from_string(
- "TOSA-0.80+BI" + ("+u55" if u55_config else "")
- ),
"1.0": TosaSpecification.create_from_string(
- "TOSA-1.0+INT" + ("+u55" if u55_config else "")
+ "TOSA-1.0+INT+int16" + ("+u55" if u55_config else "")
),
}
@@ -106,46 +103,49 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_tosa_BI(test_data):
- pipeline = TosaPipelineBI(
+def test_sigmoid_tosa_INT(test_data):
+ pipeline = TosaPipelineINT(
Sigmoid(),
(test_data(),),
Sigmoid.aten_op,
Sigmoid.exir_op,
qtol=1,
+ tosa_extensions=["int16"],
)
pipeline.change_args("quantize", get_32bit_sigmoid_quantizer())
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_tosa_BI_add_sigmoid(test_data):
- pipeline = TosaPipelineBI(
+def test_sigmoid_tosa_INT_add_sigmoid(test_data):
+ pipeline = TosaPipelineINT(
SigmoidAddSigmoid(),
(test_data(),),
Sigmoid.aten_op,
Sigmoid.exir_op,
qtol=1,
+ tosa_extensions=["int16"],
)
pipeline.change_args("quantize", get_32bit_sigmoid_quantizer())
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_u55_BI(test_data):
+def test_sigmoid_u55_INT(test_data):
pipeline = OpNotSupportedPipeline(
Sigmoid(),
(test_data(),),
{Sigmoid.exir_op: 1},
quantize=True,
u55_subset=True,
+ tosa_extensions=["int16"],
)
pipeline.change_args("quantize", get_32bit_sigmoid_quantizer(True))
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_sigmoid_u55_BI_add_sigmoid(test_data):
+def test_sigmoid_u55_INT_add_sigmoid(test_data):
pipeline = OpNotSupportedPipeline(
SigmoidAddSigmoid(),
(test_data(),),
@@ -153,6 +153,7 @@ def test_sigmoid_u55_BI_add_sigmoid(test_data):
n_expected_delegates=1,
quantize=True,
u55_subset=True,
+ tosa_extensions=["int16"],
)
pipeline.change_args("quantize", get_32bit_sigmoid_quantizer(True))
pipeline.run()
@@ -160,8 +161,8 @@ def test_sigmoid_u55_BI_add_sigmoid(test_data):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
-def test_sigmoid_u85_BI(test_data):
- pipeline = EthosU85PipelineBI(
+def test_sigmoid_u85_INT(test_data):
+ pipeline = EthosU85PipelineINT(
Sigmoid(),
(test_data(),),
Sigmoid.aten_op,
@@ -177,8 +178,8 @@ def test_sigmoid_u85_BI(test_data):
test_data_suite,
)
@common.XfailIfNoCorstone320
-def test_sigmoid_u85_BI_add_sigmoid(test_data):
- pipeline = EthosU85PipelineBI(
+def test_sigmoid_u85_INT_add_sigmoid(test_data):
+ pipeline = EthosU85PipelineINT(
SigmoidAddSigmoid(),
(test_data(),),
Sigmoid.aten_op,
diff --git a/backends/arm/test/ops/test_sign.py b/backends/arm/test/ops/test_sign.py
index 1747570e35f..35ea9fc3e45 100644
--- a/backends/arm/test/ops/test_sign.py
+++ b/backends/arm/test/ops/test_sign.py
@@ -9,10 +9,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.sign.default"
@@ -40,8 +41,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_sign_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_sign_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Sign(),
(test_data,),
aten_op=aten_op,
@@ -51,8 +52,8 @@ def test_sign_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_sign_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_sign_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Sign(),
(test_data,),
aten_op=[],
@@ -64,8 +65,8 @@ def test_sign_tosa_BI(test_data: Tuple):
@common.XfailIfNoCorstone300
@common.parametrize("test_data", test_data_suite)
@pytest.mark.xfail(reason="where.self not supported on U55")
-def test_sign_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_sign_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Sign(),
(test_data,),
aten_ops=[],
@@ -76,11 +77,37 @@ def test_sign_u55_BI(test_data: Tuple):
@common.XfailIfNoCorstone320
@common.parametrize("test_data", test_data_suite)
-def test_sign_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_sign_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Sign(),
(test_data,),
aten_ops=[],
exir_ops=exir_op,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_sign_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Sign(),
+ (test_data,),
+ aten_op=aten_op,
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_sign_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Sign(),
+ (test_data,),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_silu.py b/backends/arm/test/ops/test_silu.py
index e1736bf10e6..edc7d769be1 100644
--- a/backends/arm/test/ops/test_silu.py
+++ b/backends/arm/test/ops/test_silu.py
@@ -11,10 +11,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
@@ -40,74 +41,120 @@ def forward(
"op_silu_rank4_large_randn": lambda: 200 * torch.randn(1, 10, 25, 20) + 1,
}
- aten_op_MI = "torch.ops.aten.silu.default"
- aten_op_inplace_MI = "torch.ops.aten.silu_.default"
- aten_op_BI = ["torch.ops.aten.sigmoid.default", "torch.ops.aten.mul.Tensor"]
+ aten_op_FP = "torch.ops.aten.silu.default"
+ aten_op_inplace_FP = "torch.ops.aten.silu_.default"
+ aten_op_INT = ["torch.ops.aten.sigmoid.default", "torch.ops.aten.mul.Tensor"]
@common.parametrize("test_data", Silu.test_data)
-def test_silu_tosa_MI(test_data: input_t):
+def test_silu_tosa_FP(test_data: input_t):
silu_data = (test_data(), False)
- pipeline = TosaPipelineMI[input_t](Silu(), silu_data, Silu.aten_op_MI)
+ pipeline = TosaPipelineFP[input_t](Silu(), silu_data, Silu.aten_op_FP)
pipeline.run()
@common.parametrize("test_data", Silu.test_data)
-def test_silu_tosa_MI_inplace(test_data: input_t):
+def test_silu_tosa_FP_inplace(test_data: input_t):
silu_data = (test_data(), True)
- pipeline = TosaPipelineMI[input_t](Silu(), silu_data, Silu.aten_op_inplace_MI)
+ pipeline = TosaPipelineFP[input_t](Silu(), silu_data, Silu.aten_op_inplace_FP)
pipeline.run()
@common.parametrize("test_data", Silu.test_data)
-def test_silu_tosa_BI(test_data: input_t):
+def test_silu_tosa_INT(test_data: input_t):
silu_data = (test_data(), False)
- pipeline = TosaPipelineBI[input_t](Silu(), silu_data, Silu.aten_op_BI)
+ pipeline = TosaPipelineINT[input_t](Silu(), silu_data, Silu.aten_op_INT)
pipeline.run()
@common.parametrize("test_data", Silu.test_data)
-def test_silu_tosa_BI_inplace(test_data: input_t):
+def test_silu_tosa_INT_inplace(test_data: input_t):
silu_data = (test_data(), True)
- pipeline = TosaPipelineBI[input_t](Silu(), silu_data, Silu.aten_op_BI)
+ pipeline = TosaPipelineINT[input_t](Silu(), silu_data, Silu.aten_op_INT)
pipeline.run()
@common.parametrize("test_data", Silu.test_data)
@common.XfailIfNoCorstone300
-def test_silu_u55_BI(test_data: input_t):
+def test_silu_u55_INT(test_data: input_t):
silu_data = (test_data(), False)
- pipeline = EthosU55PipelineBI[input_t](
- Silu(), silu_data, Silu.aten_op_BI, run_on_fvp=True
+ pipeline = EthosU55PipelineINT[input_t](
+ Silu(), silu_data, Silu.aten_op_INT, run_on_fvp=True
)
pipeline.run()
@common.parametrize("test_data", Silu.test_data)
@common.XfailIfNoCorstone300
-def test_silu_u55_BI_inplace(test_data: input_t):
+def test_silu_u55_INT_inplace(test_data: input_t):
silu_data = (test_data(), True)
- pipeline = EthosU55PipelineBI[input_t](
- Silu(), silu_data, Silu.aten_op_BI, run_on_fvp=True
+ pipeline = EthosU55PipelineINT[input_t](
+ Silu(), silu_data, Silu.aten_op_INT, run_on_fvp=True
)
pipeline.run()
@common.parametrize("test_data", Silu.test_data)
@common.XfailIfNoCorstone320
-def test_silu_u85_BI(test_data: input_t):
+def test_silu_u85_INT(test_data: input_t):
silu_data = (test_data(), False)
- pipeline = EthosU85PipelineBI[input_t](
- Silu(), silu_data, Silu.aten_op_BI, run_on_fvp=True
+ pipeline = EthosU85PipelineINT[input_t](
+ Silu(), silu_data, Silu.aten_op_INT, run_on_fvp=True
)
pipeline.run()
@common.parametrize("test_data", Silu.test_data)
@common.XfailIfNoCorstone320
-def test_silu_u85_BI_inplace(test_data: input_t):
+def test_silu_u85_INT_inplace(test_data: input_t):
silu_data = (test_data(), True)
- pipeline = EthosU85PipelineBI[input_t](
- Silu(), silu_data, Silu.aten_op_BI, run_on_fvp=True
+ pipeline = EthosU85PipelineINT[input_t](
+ Silu(), silu_data, Silu.aten_op_INT, run_on_fvp=True
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Silu.test_data)
+@common.SkipIfNoModelConverter
+def test_silu_vgf_FP(test_data: input_t):
+ silu_data = (test_data(), False)
+ pipeline = VgfPipeline[input_t](
+ Silu(), silu_data, Silu.aten_op_FP, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Silu.test_data)
+@common.SkipIfNoModelConverter
+def test_silu_vgf_FP_inplace(test_data: input_t):
+ silu_data = (test_data(), True)
+ pipeline = VgfPipeline[input_t](
+ Silu(), silu_data, Silu.aten_op_inplace_FP, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Silu.test_data)
+@common.SkipIfNoModelConverter
+def test_silu_vgf_INT(test_data: input_t):
+ silu_data = (test_data(), False)
+ pipeline = VgfPipeline[input_t](
+ Silu(),
+ silu_data,
+ Silu.aten_op_INT,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Silu.test_data)
+@common.SkipIfNoModelConverter
+def test_silu_vgf_INT_inplace(test_data: input_t):
+ silu_data = (test_data(), True)
+ pipeline = VgfPipeline[input_t](
+ Silu(),
+ silu_data,
+ Silu.aten_op_INT,
+ tosa_version="TOSA-1.0+INT",
)
pipeline.run()
diff --git a/backends/arm/test/ops/test_sin.py b/backends/arm/test/ops/test_sin.py
index 7f1f9f569af..3ca593ad608 100644
--- a/backends/arm/test/ops/test_sin.py
+++ b/backends/arm/test/ops/test_sin.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.sin.default"
@@ -37,8 +38,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_sin_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_sin_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Sin(),
(test_data,),
aten_op,
@@ -49,8 +50,8 @@ def test_sin_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_sin_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_sin_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Sin(),
(test_data,),
aten_op,
@@ -60,8 +61,8 @@ def test_sin_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_sin_tosa_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_sin_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Sin(),
(test_data,),
aten_op,
@@ -72,8 +73,8 @@ def test_sin_tosa_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_sin_tosa_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_sin_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Sin(),
(test_data,),
aten_op,
@@ -81,3 +82,24 @@ def test_sin_tosa_u85_BI(test_data: Tuple):
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_sin_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Sin(), (test_data,), aten_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_sin_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Sin(),
+ (test_data,),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_sinh.py b/backends/arm/test/ops/test_sinh.py
index fd6cbf2b65b..a059ce0ad26 100644
--- a/backends/arm/test/ops/test_sinh.py
+++ b/backends/arm/test/ops/test_sinh.py
@@ -8,10 +8,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.sinh.default"
@@ -42,8 +43,8 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_sinh_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_sinh_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Sinh(),
(test_data,),
aten_op,
@@ -53,8 +54,8 @@ def test_sinh_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_sinh_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_sinh_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Sinh(), (test_data,), aten_op=aten_op, exir_op=exir_op
)
pipeline.run()
@@ -62,8 +63,8 @@ def test_sinh_tosa_BI(test_data: Tuple):
@common.XfailIfNoCorstone300
@common.parametrize("test_data", test_data_suite)
-def test_sinh_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_sinh_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Sinh(), (test_data,), aten_ops=aten_op, exir_ops=exir_op
)
pipeline.run()
@@ -71,8 +72,29 @@ def test_sinh_u55_BI(test_data: Tuple):
@common.XfailIfNoCorstone320
@common.parametrize("test_data", test_data_suite)
-def test_sinh_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_sinh_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Sinh(), (test_data,), aten_ops=aten_op, exir_ops=exir_op
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_sinh_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Sinh(), (test_data,), aten_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_sinh_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Sinh(),
+ (test_data,),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_slice.py b/backends/arm/test/ops/test_slice.py
index 6ae12c41657..915aec2e522 100644
--- a/backends/arm/test/ops/test_slice.py
+++ b/backends/arm/test/ops/test_slice.py
@@ -12,10 +12,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.slice.Tensor"
@@ -43,14 +44,14 @@ def forward(self, x: torch.Tensor, s: list[tuple[int, int]]):
@common.parametrize("test_data", test_data_suite)
-def test_slice_tensor_tosa_MI(test_data: torch.Tensor):
- pipeline = TosaPipelineMI[input_t1](Slice(), test_data(), aten_op, exir_op)
+def test_slice_tensor_tosa_FP(test_data: torch.Tensor):
+ pipeline = TosaPipelineFP[input_t1](Slice(), test_data(), aten_op, exir_op)
pipeline.run()
@common.parametrize("test_data", test_data_suite)
-def test_slice_tensor_tosa_BI_nchw(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_slice_tensor_tosa_INT_nchw(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Slice(),
test_data(),
aten_op,
@@ -60,8 +61,8 @@ def test_slice_tensor_tosa_BI_nchw(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_slice_tensor_tosa_BI_nhwc(test_data: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_slice_tensor_tosa_INT_nhwc(test_data: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Slice(),
test_data(),
aten_op,
@@ -71,8 +72,8 @@ def test_slice_tensor_tosa_BI_nhwc(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_slice_tensor_u55_BI(test_data: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_slice_tensor_u55_INT(test_data: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Slice(),
test_data(),
aten_ops=[],
@@ -83,8 +84,8 @@ def test_slice_tensor_u55_BI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_slice_tensor_u85_BI(test_data: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_slice_tensor_u85_INT(test_data: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Slice(),
test_data(),
aten_ops=[],
@@ -92,3 +93,29 @@ def test_slice_tensor_u85_BI(test_data: torch.Tensor):
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_slice_tensor_vgf_FP(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Slice(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_slice_tensor_vgf_INT(test_data: torch.Tensor):
+ pipeline = VgfPipeline[input_t1](
+ Slice(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_softmax.py b/backends/arm/test/ops/test_softmax.py
index 5ab616c0eea..4bbd4d83285 100644
--- a/backends/arm/test/ops/test_softmax.py
+++ b/backends/arm/test/ops/test_softmax.py
@@ -10,10 +10,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.softmax.default" # Used for checking that we do not have softmax in the graph after decompose
@@ -42,9 +43,9 @@ def forward(self, x):
@common.parametrize("test_data", Softmax.test_data)
-def test_softmax_tosa_MI(test_data):
+def test_softmax_tosa_FP(test_data):
data, dim = test_data()
- pipeline = TosaPipelineMI[input_t1](Softmax(dim), data, [])
+ pipeline = TosaPipelineFP[input_t1](Softmax(dim), data, [])
pipeline.add_stage_after(
"to_edge_transform_and_lower", pipeline.tester.check_not, [exir_op]
)
@@ -52,9 +53,9 @@ def test_softmax_tosa_MI(test_data):
@common.parametrize("test_data", Softmax.test_data)
-def test_softmax_tosa_BI(test_data):
+def test_softmax_tosa_INT(test_data):
data, dim = test_data()
- pipeline = TosaPipelineBI[input_t1](Softmax(dim), data, [])
+ pipeline = TosaPipelineINT[input_t1](Softmax(dim), data, [])
pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
pipeline.change_args("run_method_and_compare_outputs", qtol=1)
pipeline.run()
@@ -68,9 +69,9 @@ def test_softmax_tosa_BI(test_data):
},
)
@common.XfailIfNoCorstone300
-def test_softmax_u55_BI(test_data):
+def test_softmax_u55_INT(test_data):
data, dim = test_data()
- pipeline = EthosU55PipelineBI[input_t1](Softmax(dim), data, [], run_on_fvp=True)
+ pipeline = EthosU55PipelineINT[input_t1](Softmax(dim), data, [], run_on_fvp=True)
pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
pipeline.change_args("run_method_and_compare_outputs", qtol=1)
pipeline.run()
@@ -84,9 +85,41 @@ def test_softmax_u55_BI(test_data):
},
)
@common.XfailIfNoCorstone320
-def test_softmax_u85_BI(test_data):
+def test_softmax_u85_INT(test_data):
data, dim = test_data()
- pipeline = EthosU85PipelineBI[input_t1](Softmax(dim), data, [], run_on_fvp=True)
+ pipeline = EthosU85PipelineINT[input_t1](Softmax(dim), data, [], run_on_fvp=True)
pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
pipeline.change_args("run_method_and_compare_outputs", qtol=1)
pipeline.run()
+
+
+@common.parametrize("test_data", Softmax.test_data)
+@common.SkipIfNoModelConverter
+def test_softmax_vgf_FP(test_data):
+ data, dim = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Softmax(dim),
+ data,
+ [],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.add_stage_after(
+ "to_edge_transform_and_lower", pipeline.tester.check_not, [exir_op]
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Softmax.test_data)
+@common.SkipIfNoModelConverter
+def test_softmax_vgf_INT(test_data):
+ data, dim = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Softmax(dim),
+ data,
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
+ # TODO: MLETORCH-1136 Change args of run_method_and_compare_outputs of the vgf tests
+ # pipeline.change_args("run_method_and_compare_outputs", qtol=1)
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_split.py b/backends/arm/test/ops/test_split.py
index 90458584995..388e85762af 100644
--- a/backends/arm/test/ops/test_split.py
+++ b/backends/arm/test/ops/test_split.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
exir_op = "executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default"
@@ -63,9 +64,9 @@ def forward(
"test_data",
(Split.test_data | Split.test_data_list),
)
-def test_split_with_sizes_tosa_MI(test_data: input_t1):
+def test_split_with_sizes_tosa_FP(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Split(),
test_data(),
aten_op=[],
@@ -75,9 +76,9 @@ def test_split_with_sizes_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", Split.test_data_list)
-def test_split_with_sizes_tosa_MI_2(test_data: input_t1):
+def test_split_with_sizes_tosa_FP_2(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
SplitWithSizes(),
test_data(),
aten_op=[],
@@ -90,9 +91,9 @@ def test_split_with_sizes_tosa_MI_2(test_data: input_t1):
"test_data",
(Split.test_data | Split.test_data_list),
)
-def test_split_with_sizes_tosa_MI_one_out(test_data: input_t1):
+def test_split_with_sizes_tosa_FP_one_out(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
SplitSingleOut(),
test_data(),
aten_op=[],
@@ -105,9 +106,24 @@ def test_split_with_sizes_tosa_MI_one_out(test_data: input_t1):
"test_data",
(Split.test_data | Split.test_data_list),
)
-def test_split_with_sizes_tosa_BI(test_data: input_t1):
+def test_split_with_sizes_tosa_FP_two_out(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
+ SplitTwoOut(),
+ test_data(),
+ aten_op=[],
+ exir_op=exir_op,
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ (Split.test_data | Split.test_data_list),
+)
+def test_split_with_sizes_tosa_INT(test_data: input_t1):
+
+ pipeline = TosaPipelineINT[input_t1](
Split(),
test_data(),
aten_op=[],
@@ -120,8 +136,8 @@ def test_split_with_sizes_tosa_BI(test_data: input_t1):
"test_data",
(Split.test_data | Split.test_data_list),
)
-def test_split_with_sizes_u55_BI(test_data: input_t1):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_split_with_sizes_u55_INT(test_data: input_t1):
+ pipeline = EthosU55PipelineINT[input_t1](
Split(),
test_data(),
aten_ops=[],
@@ -135,9 +151,9 @@ def test_split_with_sizes_u55_BI(test_data: input_t1):
"test_data",
(Split.test_data | Split.test_data_list),
)
-def test_split_with_sizes_u85_BI(test_data: input_t1):
+def test_split_with_sizes_u85_INT(test_data: input_t1):
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
Split(),
test_data(),
aten_ops=[],
@@ -145,3 +161,84 @@ def test_split_with_sizes_u85_BI(test_data: input_t1):
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ (Split.test_data | Split.test_data_list),
+)
+@common.SkipIfNoModelConverter
+def test_split_with_sizes_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Split(),
+ test_data(),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Split.test_data_list)
+@common.SkipIfNoModelConverter
+def test_split_with_sizes_vgf_FP_2(test_data: input_t1):
+
+ pipeline = VgfPipeline[input_t1](
+ SplitWithSizes(),
+ test_data(),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ (Split.test_data | Split.test_data_list),
+)
+@common.SkipIfNoModelConverter
+def test_split_with_sizes_vgf_FP_one_out(test_data: input_t1):
+
+ pipeline = VgfPipeline[input_t1](
+ SplitSingleOut(),
+ test_data(),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ (Split.test_data | Split.test_data_list),
+)
+@common.SkipIfNoModelConverter
+def test_split_with_sizes_vgf_FP_two_out(test_data: input_t1):
+
+ pipeline = VgfPipeline[input_t1](
+ SplitTwoOut(),
+ test_data(),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ (Split.test_data | Split.test_data_list),
+)
+@common.SkipIfNoModelConverter
+def test_split_with_sizes_vgf_INT(test_data: input_t1):
+
+ pipeline = VgfPipeline[input_t1](
+ Split(),
+ test_data(),
+ aten_op=[],
+ exir_op=exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_sqrt.py b/backends/arm/test/ops/test_sqrt.py
index 0c79f534656..00ec1f48af8 100644
--- a/backends/arm/test/ops/test_sqrt.py
+++ b/backends/arm/test/ops/test_sqrt.py
@@ -9,20 +9,21 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
class Sqrt(torch.nn.Module):
input_t = Tuple[torch.Tensor]
- aten_op_MI = "torch.ops.aten.sqrt.default"
- exir_op_MI = "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Tensor"
+ aten_op_FP = "torch.ops.aten.sqrt.default"
+ exir_op_FP = "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Tensor"
- aten_op_BI = "torch.ops.aten.pow.Tensor_Scalar"
- exir_op_BI = "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar"
+ aten_op_INT = "torch.ops.aten.pow.Tensor_Scalar"
+ exir_op_INT = "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar"
def __init__(self):
super().__init__()
@@ -45,35 +46,35 @@ def forward(self, x):
@common.parametrize("test_data", Sqrt.test_data)
-def test_sqrt_tosa_MI(test_data: Sqrt.input_t):
- pipeline = TosaPipelineMI[Sqrt.input_t](
+def test_sqrt_tosa_FP(test_data: Sqrt.input_t):
+ pipeline = TosaPipelineFP[Sqrt.input_t](
Sqrt(),
test_data(),
- Sqrt.aten_op_MI,
- Sqrt.exir_op_MI,
+ Sqrt.aten_op_FP,
+ Sqrt.exir_op_FP,
)
pipeline.run()
@common.parametrize("test_data", Sqrt.test_data)
-def test_sqrt_tosa_BI(test_data: Sqrt.input_t):
- pipeline = TosaPipelineBI[Sqrt.input_t](
+def test_sqrt_tosa_INT(test_data: Sqrt.input_t):
+ pipeline = TosaPipelineINT[Sqrt.input_t](
Sqrt(),
test_data(),
- Sqrt.aten_op_BI,
- Sqrt.exir_op_BI,
+ Sqrt.aten_op_INT,
+ Sqrt.exir_op_INT,
)
pipeline.run()
@common.parametrize("test_data", Sqrt.test_data, fvp_xfails)
@common.XfailIfNoCorstone300
-def test_sqrt_u55_BI(test_data: Sqrt.input_t):
- pipeline = EthosU55PipelineBI[Sqrt.input_t](
+def test_sqrt_u55_INT(test_data: Sqrt.input_t):
+ pipeline = EthosU55PipelineINT[Sqrt.input_t](
Sqrt(),
test_data(),
- Sqrt.aten_op_BI,
- Sqrt.exir_op_BI,
+ Sqrt.aten_op_INT,
+ Sqrt.exir_op_INT,
run_on_fvp=True,
)
pipeline.run()
@@ -81,12 +82,38 @@ def test_sqrt_u55_BI(test_data: Sqrt.input_t):
@common.parametrize("test_data", Sqrt.test_data, fvp_xfails)
@common.XfailIfNoCorstone320
-def test_sqrt_u85_BI(test_data: Sqrt.input_t):
- pipeline = EthosU85PipelineBI[Sqrt.input_t](
+def test_sqrt_u85_INT(test_data: Sqrt.input_t):
+ pipeline = EthosU85PipelineINT[Sqrt.input_t](
Sqrt(),
test_data(),
- Sqrt.aten_op_BI,
- Sqrt.exir_op_BI,
+ Sqrt.aten_op_INT,
+ Sqrt.exir_op_INT,
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", Sqrt.test_data)
+@common.SkipIfNoModelConverter
+def test_sqrt_vgf_FP(test_data: Sqrt.input_t):
+ pipeline = VgfPipeline[Sqrt.input_t](
+ Sqrt(),
+ test_data(),
+ Sqrt.aten_op_FP,
+ Sqrt.exir_op_FP,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Sqrt.test_data)
+@common.SkipIfNoModelConverter
+def test_sqrt_vgf_INT(test_data: Sqrt.input_t):
+ pipeline = VgfPipeline[Sqrt.input_t](
+ Sqrt(),
+ test_data(),
+ Sqrt.aten_op_INT,
+ Sqrt.exir_op_INT,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_squeeze.py b/backends/arm/test/ops/test_squeeze.py
index e5f606c887e..5c9f031deec 100644
--- a/backends/arm/test/ops/test_squeeze.py
+++ b/backends/arm/test/ops/test_squeeze.py
@@ -14,10 +14,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor] # Input x
@@ -56,9 +57,14 @@ def forward(self, x: torch.Tensor):
return x.squeeze()
+##############
+## Squeeze ###
+##############
+
+
@common.parametrize("test_data", Squeeze.test_parameters)
-def test_squeeze_dim_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_squeeze_dim_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Squeeze(),
test_data(),
aten_op="torch.ops.aten.squeeze.default",
@@ -68,8 +74,8 @@ def test_squeeze_dim_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", Squeeze.test_parameters)
-def test_squeeze_dim_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_squeeze_dim_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Squeeze(),
test_data(),
aten_op="torch.ops.aten.squeeze.default",
@@ -80,8 +86,8 @@ def test_squeeze_dim_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", Squeeze.test_parameters)
@common.XfailIfNoCorstone300
-def test_squeeze_dim_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_squeeze_dim_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Squeeze(),
test_data(),
aten_ops="torch.ops.aten.squeeze.default",
@@ -93,8 +99,8 @@ def test_squeeze_dim_u55_BI(test_data: Tuple):
@common.parametrize("test_data", Squeeze.test_parameters)
@common.XfailIfNoCorstone320
-def test_squeeze_dim_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_squeeze_dim_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Squeeze(),
test_data(),
aten_ops="torch.ops.aten.squeeze.default",
@@ -104,9 +110,40 @@ def test_squeeze_dim_u85_BI(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", Squeeze.test_parameters)
+@common.SkipIfNoModelConverter
+def test_squeeze_dim_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Squeeze(),
+ test_data(),
+ "torch.ops.aten.squeeze.default",
+ [],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Squeeze.test_parameters)
+@common.SkipIfNoModelConverter
+def test_squeeze_dim_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Squeeze(),
+ test_data(),
+ "torch.ops.aten.squeeze.default",
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+#################
+## SqueezeDim ###
+#################
+
+
@common.parametrize("test_data", SqueezeDim.test_parameters)
-def test_squeeze_dim_tosa_MI_2(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_squeeze_dim_tosa_FP_2(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
SqueezeDim(),
test_data(),
aten_op="torch.ops.aten.squeeze.dim",
@@ -116,8 +153,8 @@ def test_squeeze_dim_tosa_MI_2(test_data: Tuple):
@common.parametrize("test_data", SqueezeDim.test_parameters)
-def test_squeeze_dim_tosa_BI_2(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_squeeze_dim_tosa_INT_2(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
SqueezeDim(),
test_data(),
aten_op="torch.ops.aten.squeeze.dim",
@@ -128,8 +165,8 @@ def test_squeeze_dim_tosa_BI_2(test_data: Tuple):
@common.parametrize("test_data", SqueezeDim.test_parameters)
@common.XfailIfNoCorstone300
-def test_squeeze_dim_u55_BI_2(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_squeeze_dim_u55_INT_2(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
SqueezeDim(),
test_data(),
aten_ops="torch.ops.aten.squeeze.dim",
@@ -141,8 +178,8 @@ def test_squeeze_dim_u55_BI_2(test_data: Tuple):
@common.parametrize("test_data", SqueezeDim.test_parameters)
@common.XfailIfNoCorstone320
-def test_squeeze_dim_u85_BI_2(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_squeeze_dim_u85_INT_2(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
SqueezeDim(),
test_data(),
aten_ops="torch.ops.aten.squeeze.dim",
@@ -152,9 +189,40 @@ def test_squeeze_dim_u85_BI_2(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", SqueezeDim.test_parameters)
+@common.SkipIfNoModelConverter
+def test_squeeze_dim_vgf_FP_2(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ SqueezeDim(),
+ test_data(),
+ "torch.ops.aten.squeeze.dim",
+ [],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", SqueezeDim.test_parameters)
+@common.SkipIfNoModelConverter
+def test_squeeze_dim_vgf_INT_2(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ SqueezeDim(),
+ test_data(),
+ "torch.ops.aten.squeeze.dim",
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+##################
+## SqueezeDims ###
+##################
+
+
@common.parametrize("test_data", SqueezeDims.test_parameters)
-def test_squeeze_dims_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_squeeze_dims_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
SqueezeDims(),
test_data(),
aten_op="torch.ops.aten.squeeze.dims",
@@ -164,8 +232,8 @@ def test_squeeze_dims_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", SqueezeDims.test_parameters)
-def test_squeeze_dims_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_squeeze_dims_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
SqueezeDims(),
test_data(),
aten_op="torch.ops.aten.squeeze.dims",
@@ -176,8 +244,8 @@ def test_squeeze_dims_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", SqueezeDims.test_parameters)
@common.XfailIfNoCorstone300
-def test_squeeze_dims_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_squeeze_dims_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
SqueezeDims(),
test_data(),
aten_ops="torch.ops.aten.squeeze.dims",
@@ -189,8 +257,8 @@ def test_squeeze_dims_u55_BI(test_data: Tuple):
@common.parametrize("test_data", SqueezeDims.test_parameters)
@common.XfailIfNoCorstone320
-def test_squeeze_dims_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_squeeze_dims_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
SqueezeDims(),
test_data(),
aten_ops="torch.ops.aten.squeeze.dims",
@@ -198,3 +266,29 @@ def test_squeeze_dims_u85_BI(test_data: Tuple):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", SqueezeDims.test_parameters)
+@common.SkipIfNoModelConverter
+def test_squeeze_dims_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ SqueezeDims(),
+ test_data(),
+ "torch.ops.aten.squeeze.dims",
+ [],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", SqueezeDims.test_parameters)
+@common.SkipIfNoModelConverter
+def test_squeeze_dims_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ SqueezeDims(),
+ test_data(),
+ "torch.ops.aten.squeeze.dims",
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_sub.py b/backends/arm/test/ops/test_sub.py
index 5957e27d5a9..e89fee04b62 100644
--- a/backends/arm/test/ops/test_sub.py
+++ b/backends/arm/test/ops/test_sub.py
@@ -10,10 +10,11 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.sub.Tensor"
@@ -63,9 +64,9 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
@common.parametrize("test_data", sub_test_data)
-def test_sub_tensor_tosa_MI(test_data):
- """Test Subtraction (TOSA MI)"""
- pipeline = TosaPipelineMI[input_t1](
+def test_sub_tensor_tosa_FP(test_data):
+ """Test Subtraction (TOSA FP)"""
+ pipeline = TosaPipelineFP[input_t1](
Sub(),
test_data(),
aten_op,
@@ -75,9 +76,9 @@ def test_sub_tensor_tosa_MI(test_data):
@common.parametrize("test_data", sub2_test_data)
-def test_sub_tensor_tosa_MI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
- """Test Two-Operand Subtraction (TOSA MI)"""
- pipeline = TosaPipelineMI[input_t2](
+def test_sub_tensor_tosa_FP_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
+ """Test Two-Operand Subtraction (TOSA FP)"""
+ pipeline = TosaPipelineFP[input_t2](
Sub2(),
test_data(),
aten_op,
@@ -87,9 +88,9 @@ def test_sub_tensor_tosa_MI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
@common.parametrize("test_data", sub_test_data)
-def test_sub_tensor_tosa_BI(test_data):
- """Test Subtraction (TOSA BI)"""
- pipeline = TosaPipelineBI[input_t1](
+def test_sub_tensor_tosa_INT(test_data):
+ """Test Subtraction (TOSA INT)"""
+ pipeline = TosaPipelineINT[input_t1](
Sub(),
test_data(),
aten_op,
@@ -99,9 +100,9 @@ def test_sub_tensor_tosa_BI(test_data):
@common.parametrize("test_data", sub2_test_data)
-def test_sub_tensor_tosa_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
- """Test Two-Operand Subtraction (TOSA BI)"""
- pipeline = TosaPipelineBI[input_t2](
+def test_sub_tensor_tosa_INT_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
+ """Test Two-Operand Subtraction (TOSA INT)"""
+ pipeline = TosaPipelineINT[input_t2](
Sub2(),
test_data(),
aten_op,
@@ -112,9 +113,9 @@ def test_sub_tensor_tosa_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
@common.parametrize("test_data", sub_test_data, fvp_sub_xfails)
@common.XfailIfNoCorstone300
-def test_sub_tensor_u55_BI(test_data):
+def test_sub_tensor_u55_INT(test_data):
"""Test Subtraction on Ethos-U55 (FVP Mode)"""
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
Sub(),
test_data(),
aten_op,
@@ -126,9 +127,9 @@ def test_sub_tensor_u55_BI(test_data):
@common.parametrize("test_data", sub2_test_data, fvp_sub2_xfails)
@common.XfailIfNoCorstone300
-def test_sub_tensor_u55_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
+def test_sub_tensor_u55_INT_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
"""Test Two-Operand Subtraction on Ethos-U55 (FVP Mode)"""
- pipeline = EthosU55PipelineBI[input_t2](
+ pipeline = EthosU55PipelineINT[input_t2](
Sub2(),
test_data(),
aten_op,
@@ -140,9 +141,9 @@ def test_sub_tensor_u55_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
@common.parametrize("test_data", sub_test_data, fvp_sub_xfails)
@common.XfailIfNoCorstone320
-def test_sub_tensor_u85_BI_2(test_data):
+def test_sub_tensor_u85_INT_2(test_data):
"""Test Subtraction on Ethos-U85 (FVP Mode)"""
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
Sub(),
test_data(),
aten_op,
@@ -154,9 +155,9 @@ def test_sub_tensor_u85_BI_2(test_data):
@common.parametrize("test_data", sub2_test_data, fvp_sub2_xfails)
@common.XfailIfNoCorstone320
-def test_sub_tensor_u85_BI(test_data: Tuple[torch.Tensor, torch.Tensor]):
+def test_sub_tensor_u85_INT(test_data: Tuple[torch.Tensor, torch.Tensor]):
"""Test Two-Operand Subtraction on Ethos-U85 (FVP Mode)"""
- pipeline = EthosU85PipelineBI[input_t2](
+ pipeline = EthosU85PipelineINT[input_t2](
Sub2(),
test_data(),
aten_op,
@@ -164,3 +165,59 @@ def test_sub_tensor_u85_BI(test_data: Tuple[torch.Tensor, torch.Tensor]):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", sub_test_data)
+@common.SkipIfNoModelConverter
+def test_sub_tensor_vgf_FP(test_data: Tuple[torch.Tensor]):
+ """Test Subtraction (VGF FP)"""
+ pipeline = VgfPipeline[input_t1](
+ Sub(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", sub2_test_data)
+@common.SkipIfNoModelConverter
+def test_sub_tensor_vgf_FP_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
+ """Test Two-Operand Subtraction (VGF FP)"""
+ pipeline = VgfPipeline[input_t2](
+ Sub2(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", sub_test_data)
+@common.SkipIfNoModelConverter
+def test_sub_tensor_vgf_INT(test_data: Tuple[torch.Tensor]):
+ """Test Subtraction (VGF INT)"""
+ pipeline = VgfPipeline[input_t1](
+ Sub(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", sub2_test_data)
+@common.SkipIfNoModelConverter
+def test_sub_tensor_vgf_INT_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
+ """Test Two-Operand Subtraction (VGF INT)"""
+ pipeline = VgfPipeline[input_t2](
+ Sub2(),
+ test_data(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_sum.py b/backends/arm/test/ops/test_sum.py
index c1e958174cf..250ee938a7d 100644
--- a/backends/arm/test/ops/test_sum.py
+++ b/backends/arm/test/ops/test_sum.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.sum.dim_IntList"
@@ -41,8 +42,8 @@ def forward(self, x: torch.Tensor, dim: int, keepdim: bool):
@common.parametrize("test_data", Sum.test_parameters)
-def test_sum_dim_intlist_tosa_MI(test_data: input_t1):
- pipeline = TosaPipelineMI[input_t1](
+def test_sum_dim_intlist_tosa_FP(test_data: input_t1):
+ pipeline = TosaPipelineFP[input_t1](
Sum(),
test_data(),
aten_op,
@@ -52,8 +53,8 @@ def test_sum_dim_intlist_tosa_MI(test_data: input_t1):
@common.parametrize("test_data", Sum.test_parameters)
-def test_sum_dim_intlist_tosa_BI(test_data: input_t1):
- pipeline = TosaPipelineBI[input_t1](
+def test_sum_dim_intlist_tosa_INT(test_data: input_t1):
+ pipeline = TosaPipelineINT[input_t1](
Sum(),
test_data(),
aten_op,
@@ -64,8 +65,8 @@ def test_sum_dim_intlist_tosa_BI(test_data: input_t1):
@common.parametrize("test_data", Sum.test_parameters)
@common.XfailIfNoCorstone300
-def test_view_u55_BI_1_0(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_view_u55_INT_1_0(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Sum(),
test_data(),
aten_op,
@@ -77,8 +78,8 @@ def test_view_u55_BI_1_0(test_data: Tuple):
@common.parametrize("test_data", Sum.test_parameters)
@common.XfailIfNoCorstone320
-def test_view_u85_BI_1_0(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_view_u85_INT_1_0(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Sum(),
test_data(),
aten_op,
@@ -88,6 +89,27 @@ def test_view_u85_BI_1_0(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", Sum.test_parameters)
+@common.SkipIfNoModelConverter
+def test_sum_dim_intlist_vgf_FP(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Sum(), test_data(), aten_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Sum.test_parameters)
+@common.SkipIfNoModelConverter
+def test_sum_dim_intlist_vgf_INT(test_data: input_t1):
+ pipeline = VgfPipeline[input_t1](
+ Sum(),
+ test_data(),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
reject_inputs = {
"reject_large_0_dim": lambda: (torch.rand((65537, 1, 1)), 0, False),
"reject_large_2_dim": lambda: (torch.rand((800, 90, 1)), 2, False),
@@ -96,8 +118,8 @@ def test_view_u85_BI_1_0(test_data: Tuple):
@common.parametrize("test_data", reject_inputs)
-def test_view_u55_BI_not_delegated(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_view_u55_INT_not_delegated(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Sum(),
test_data(),
aten_op,
diff --git a/backends/arm/test/ops/test_tanh.py b/backends/arm/test/ops/test_tanh.py
index 73d51cb8c3e..098d878addc 100644
--- a/backends/arm/test/ops/test_tanh.py
+++ b/backends/arm/test/ops/test_tanh.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.tanh.default"
@@ -40,8 +41,8 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_tanh_tosa_MI(test_data: Tuple):
- pipeline = TosaPipelineMI[input_t1](
+def test_tanh_tosa_FP(test_data: Tuple):
+ pipeline = TosaPipelineFP[input_t1](
Tanh(),
(test_data(),),
aten_op,
@@ -51,8 +52,8 @@ def test_tanh_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_tanh_tosa_BI(test_data: Tuple):
- pipeline = TosaPipelineBI[input_t1](
+def test_tanh_tosa_INT(test_data: Tuple):
+ pipeline = TosaPipelineINT[input_t1](
Tanh(),
(test_data(),),
aten_op,
@@ -62,8 +63,8 @@ def test_tanh_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_tanh_u55_BI(test_data: Tuple):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_tanh_u55_INT(test_data: Tuple):
+ pipeline = EthosU55PipelineINT[input_t1](
Tanh(),
(test_data(),),
aten_op,
@@ -74,8 +75,8 @@ def test_tanh_u55_BI(test_data: Tuple):
@common.parametrize("test_data", test_data_suite)
-def test_tanh_u85_BI(test_data: Tuple):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_tanh_u85_INT(test_data: Tuple):
+ pipeline = EthosU85PipelineINT[input_t1](
Tanh(),
(test_data(),),
aten_op,
@@ -83,3 +84,24 @@ def test_tanh_u85_BI(test_data: Tuple):
run_on_fvp=False,
)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_tanh_vgf_FP(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Tanh(), (test_data(),), aten_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_tanh_vgf_INT(test_data: Tuple):
+ pipeline = VgfPipeline[input_t1](
+ Tanh(),
+ (test_data(),),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py
index 9fcd65dc957..db04b9425c2 100644
--- a/backends/arm/test/ops/test_to_copy.py
+++ b/backends/arm/test/ops/test_to_copy.py
@@ -14,7 +14,8 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
OpNotSupportedPipeline,
- TosaPipelineMI,
+ TosaPipelineFP,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor] # Input x
@@ -36,12 +37,12 @@ def forward(self, x: torch.Tensor):
quantization.
However, the model being exported may have some explicit casting to floating
point dtypes. The casting or their decomposition should be rejected during
-partition. This test will be coveraged by class TestToCopy_BI.
+partition. This test will be coveraged by class TestToCopy_INT.
Note: This is also covered by test_scalars.py.
"""
-_TO_COPY_TEST_DATA_MI = {
+_TO_COPY_TEST_DATA_FP = {
"rand_fp16": lambda: (torch.rand((1, 2, 3, 4), dtype=torch.float16), torch.float32),
"rand_fp32": lambda: (torch.rand((1, 2, 3, 4), dtype=torch.float32), torch.float16),
"rand_int8": lambda: (
@@ -59,11 +60,11 @@ def forward(self, x: torch.Tensor):
}
-@common.parametrize("test_data", _TO_COPY_TEST_DATA_MI)
-def test_copy_tosa_MI(test_data: Tuple):
+@common.parametrize("test_data", _TO_COPY_TEST_DATA_FP)
+def test_copy_tosa_FP(test_data: Tuple):
test_tensor, new_dtype = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Cast(new_dtype),
(test_tensor,),
aten_op=[],
@@ -72,14 +73,28 @@ def test_copy_tosa_MI(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", _TO_COPY_TEST_DATA_FP)
+@common.SkipIfNoModelConverter
+def test_copy_vgf_FP(test_data: Tuple):
+ test_tensor, new_dtype = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Cast(new_dtype),
+ (test_tensor,),
+ aten_op=[],
+ exir_op=[],
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
"""
-Casting operations that output floating-point dtypes should be rejected under BI profile,
+Casting operations that output floating-point dtypes should be rejected under INT profile,
rather than introducing an invalid dtype into the tosa graph.
For example, x.to(dtype=torch.float32) will be eventually lowered to
exir_ops.edge.dim_order_ops._to_dim_order_copy.default. We should reject this operation
in ToCopySupported::is_node_tosa_supported() before it goes into the delegated graph.
"""
-_TO_COPY_TEST_DATA_BI = {
+_TO_COPY_TEST_DATA_INT = {
"rand_int8_fp32": lambda: (
torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int8),
torch.float32,
@@ -103,8 +118,8 @@ def test_copy_tosa_MI(test_data: Tuple):
}
-@common.parametrize("test_data", _TO_COPY_TEST_DATA_BI)
-def test_copy_tosa_BI(test_data: Tuple):
+@common.parametrize("test_data", _TO_COPY_TEST_DATA_INT)
+def test_copy_tosa_INT(test_data: Tuple):
test_tensor, new_dtype = test_data()
pipeline = OpNotSupportedPipeline[input_t1](
@@ -116,3 +131,10 @@ def test_copy_tosa_BI(test_data: Tuple):
quantize=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", _TO_COPY_TEST_DATA_INT)
+@common.SkipIfNoModelConverter
+def test_copy_vgf_INT(test_data: Tuple):
+ # Op not supported
+ pass
diff --git a/backends/arm/test/ops/test_unary_combos.py b/backends/arm/test/ops/test_unary_combos.py
new file mode 100644
index 00000000000..db442d2d8d0
--- /dev/null
+++ b/backends/arm/test/ops/test_unary_combos.py
@@ -0,0 +1,134 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Tuple
+
+import pytest
+
+import torch
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
+)
+
+Tensor1 = Tuple[torch.Tensor]
+
+
+class NegAdd(torch.nn.Module):
+ # neg(x) + 1
+ edge_op_list = [
+ "executorch_exir_dialects_edge__ops_aten_neg_default",
+ "executorch_exir_dialects_edge__ops_aten_add_Tensor",
+ ]
+
+ def get_inputs(self) -> Tensor1:
+ return (torch.rand(10, 10, 10),)
+
+ def forward(self, x):
+ return torch.neg(x) + 1.0
+
+
+class MinAddZero(torch.nn.Module):
+ # min(x, 0) + 1
+ edge_op_list = [
+ "executorch_exir_dialects_edge__ops_aten_full_like_default",
+ "executorch_exir_dialects_edge__ops_aten_minimum_default",
+ "executorch_exir_dialects_edge__ops_aten_add_Tensor",
+ ]
+
+ # range [-1, 1]
+ def get_inputs(self) -> Tensor1:
+ return (torch.rand(10, 10, 10) * 2 - 1,)
+
+ def forward(self, x):
+ # We want Tensor-Tensor minimum
+ z = torch.full_like(x, 0.0)
+ return torch.minimum(x, z) + 1.0
+
+
+class MaxAddZero(torch.nn.Module):
+ # max(x, 0) + 1.0
+ edge_op_list = [
+ "executorch_exir_dialects_edge__ops_aten_full_like_default",
+ "executorch_exir_dialects_edge__ops_aten_maximum_default",
+ "executorch_exir_dialects_edge__ops_aten_add_Tensor",
+ ]
+
+ # range [-1, 1]
+ def get_inputs(self) -> Tensor1:
+ return (torch.rand(10, 10, 10) * 2 - 1,)
+
+ def forward(self, x):
+ z = torch.full_like(x, 0.0)
+ return torch.maximum(x, z) + 1.0
+
+
+class AbsAdd(torch.nn.Module):
+ # abs(x) + 1.0
+ edge_op_list = [
+ "executorch_exir_dialects_edge__ops_aten_abs_default",
+ "executorch_exir_dialects_edge__ops_aten_add_Tensor",
+ ]
+
+ def get_inputs(self) -> Tensor1:
+ return (torch.rand(10, 10, 10),)
+
+ def forward(self, x):
+ return torch.abs(x) + 1.0
+
+
+MODELS = [NegAdd, AbsAdd, MaxAddZero, MinAddZero]
+
+
+def _build(model_cls):
+ m = model_cls()
+ return m, m.get_inputs(), model_cls.edge_op_list
+
+
+@pytest.mark.parametrize("model_cls", MODELS, ids=lambda c: c.__name__)
+def test_unary_combos_tosa_FP(model_cls):
+ m, inputs, exir = _build(model_cls)
+ p = TosaPipelineFP[Tensor1](m, inputs, aten_op=[], exir_op=exir)
+ p.run()
+
+
+@pytest.mark.parametrize("model_cls", MODELS, ids=lambda c: c.__name__)
+def test_unary_combos_tosa_INT(model_cls):
+ m, inputs, exir = _build(model_cls)
+ p = TosaPipelineINT[Tensor1](m, inputs, aten_op=[], exir_op=exir, qtol=1)
+ p.run()
+
+
+@common.XfailIfNoCorstone300
+@pytest.mark.parametrize("model_cls", MODELS, ids=lambda c: c.__name__)
+def test_unary_combos_u55_INT(model_cls):
+ m, inputs, exir = _build(model_cls)
+ p = EthosU55PipelineINT[Tensor1](
+ m, inputs, aten_ops=[], exir_ops=exir, run_on_fvp=True
+ )
+ p.run()
+
+
+@common.XfailIfNoCorstone320
+@pytest.mark.parametrize("model_cls", MODELS, ids=lambda c: c.__name__)
+def test_unary_combos_u85_INT(model_cls):
+ m, inputs, exir = _build(model_cls)
+ p = EthosU85PipelineINT[Tensor1](
+ m, inputs, aten_ops=[], exir_ops=exir, run_on_fvp=True
+ )
+ p.run()
+
+
+@common.SkipIfNoModelConverter
+@pytest.mark.parametrize("model_cls", MODELS, ids=lambda c: c.__name__)
+def test_unary_combos_vgf_INT(model_cls):
+ m, inputs, exir = _build(model_cls)
+ p = VgfPipeline[Tensor1](
+ m, inputs, aten_op=[], exir_op=exir, tosa_version="TOSA-1.0+INT"
+ )
+ p.run()
diff --git a/backends/arm/test/ops/test_unbind.py b/backends/arm/test/ops/test_unbind.py
index 5de9db9a5ab..cd33f8217df 100644
--- a/backends/arm/test/ops/test_unbind.py
+++ b/backends/arm/test/ops/test_unbind.py
@@ -9,8 +9,9 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = tuple[torch.Tensor]
@@ -34,9 +35,9 @@ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor]:
@common.parametrize("test_data", Unbind.test_data)
-def test_unbind_int_tosa_MI(test_data: test_data_t):
+def test_unbind_int_tosa_FP(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
Unbind(*init_data),
input_data(),
Unbind.aten_op,
@@ -45,11 +46,37 @@ def test_unbind_int_tosa_MI(test_data: test_data_t):
@common.parametrize("test_data", Unbind.test_data)
-def test_unbind_int_tosa_BI(test_data: test_data_t):
+def test_unbind_int_tosa_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
Unbind(*init_data),
input_data(),
Unbind.aten_op,
)
pipeline.run()
+
+
+@common.parametrize("test_data", Unbind.test_data)
+@common.SkipIfNoModelConverter
+def test_unbind_int_vgf_FP(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ Unbind(*init_data),
+ input_data(),
+ Unbind.aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Unbind.test_data)
+@common.SkipIfNoModelConverter
+def test_unbind_int_vgf_INT(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ Unbind(*init_data),
+ input_data(),
+ Unbind.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_unflatten.py b/backends/arm/test/ops/test_unflatten.py
index 8a540a8040e..95c68b2940d 100644
--- a/backends/arm/test/ops/test_unflatten.py
+++ b/backends/arm/test/ops/test_unflatten.py
@@ -9,8 +9,9 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = tuple[torch.Tensor]
@@ -35,9 +36,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", Unflatten.test_data)
-def test_unflatten_int_tosa_MI(test_data: test_data_t):
+def test_unflatten_int_tosa_FP(test_data: test_data_t):
module, inputs = test_data()
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
module,
inputs,
Unflatten.aten_op,
@@ -46,11 +47,37 @@ def test_unflatten_int_tosa_MI(test_data: test_data_t):
@common.parametrize("test_data", Unflatten.test_data)
-def test_unflatten_int_tosa_BI(test_data: test_data_t):
+def test_unflatten_int_tosa_INT(test_data: test_data_t):
module, inputs = test_data()
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
module,
inputs,
Unflatten.aten_op,
)
pipeline.run()
+
+
+@common.parametrize("test_data", Unflatten.test_data)
+@common.SkipIfNoModelConverter
+def test_unflatten_int_vgf_FP(test_data: test_data_t):
+ module, inputs = test_data()
+ pipeline = VgfPipeline[input_t](
+ module,
+ inputs,
+ Unflatten.aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Unflatten.test_data)
+@common.SkipIfNoModelConverter
+def test_unflatten_int_vgf_INT(test_data: test_data_t):
+ module, inputs = test_data()
+ pipeline = VgfPipeline[input_t](
+ module,
+ inputs,
+ Unflatten.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_unsqueeze.py b/backends/arm/test/ops/test_unsqueeze.py
index 4ad238a099a..54e1b0dd0ce 100644
--- a/backends/arm/test/ops/test_unsqueeze.py
+++ b/backends/arm/test/ops/test_unsqueeze.py
@@ -13,10 +13,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.unsqueeze.default"
@@ -34,9 +35,9 @@ def forward(self, x: torch.Tensor, dim):
@common.parametrize("test_tensor", Unsqueeze.test_parameters)
-def test_unsqueeze_tosa_MI(test_tensor: torch.Tensor):
+def test_unsqueeze_tosa_FP(test_tensor: torch.Tensor):
for i in range(-test_tensor[0].dim() - 1, test_tensor[0].dim() + 1):
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Unsqueeze(),
(*test_tensor, i),
aten_op,
@@ -46,8 +47,8 @@ def test_unsqueeze_tosa_MI(test_tensor: torch.Tensor):
@common.parametrize("test_tensor", Unsqueeze.test_parameters)
-def test_unsqueeze_tosa_BI(test_tensor: torch.Tensor):
- pipeline = TosaPipelineBI[input_t1](
+def test_unsqueeze_tosa_INT(test_tensor: torch.Tensor):
+ pipeline = TosaPipelineINT[input_t1](
Unsqueeze(),
(*test_tensor, 0),
aten_op,
@@ -58,8 +59,8 @@ def test_unsqueeze_tosa_BI(test_tensor: torch.Tensor):
@common.parametrize("test_tensor", Unsqueeze.test_parameters)
@common.XfailIfNoCorstone300
-def test_unsqueeze_u55_BI(test_tensor: torch.Tensor):
- pipeline = EthosU55PipelineBI[input_t1](
+def test_unsqueeze_u55_INT(test_tensor: torch.Tensor):
+ pipeline = EthosU55PipelineINT[input_t1](
Unsqueeze(),
(*test_tensor, 0),
aten_op,
@@ -71,8 +72,8 @@ def test_unsqueeze_u55_BI(test_tensor: torch.Tensor):
@common.parametrize("test_tensor", Unsqueeze.test_parameters)
@common.XfailIfNoCorstone320
-def test_unsqueeze_u85_BI(test_tensor: torch.Tensor):
- pipeline = EthosU85PipelineBI[input_t1](
+def test_unsqueeze_u85_INT(test_tensor: torch.Tensor):
+ pipeline = EthosU85PipelineINT[input_t1](
Unsqueeze(),
(*test_tensor, 0),
aten_op,
@@ -80,3 +81,26 @@ def test_unsqueeze_u85_BI(test_tensor: torch.Tensor):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_tensor", Unsqueeze.test_parameters)
+@common.SkipIfNoModelConverter
+def test_unsqueeze_vgf_FP(test_tensor: torch.Tensor):
+ for i in range(-test_tensor[0].dim() - 1, test_tensor[0].dim() + 1):
+ pipeline = VgfPipeline[input_t1](
+ Unsqueeze(), (*test_tensor, i), aten_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_tensor", Unsqueeze.test_parameters)
+@common.SkipIfNoModelConverter
+def test_unsqueeze_vgf_INT(test_tensor: torch.Tensor):
+ for i in range(-test_tensor[0].dim() - 1, test_tensor[0].dim() + 1):
+ pipeline = VgfPipeline[input_t1](
+ Unsqueeze(),
+ (*test_tensor, i),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_upsample_bilinear2d.py b/backends/arm/test/ops/test_upsample_bilinear2d.py
index d1c07c3ab0f..95e69bc5204 100644
--- a/backends/arm/test/ops/test_upsample_bilinear2d.py
+++ b/backends/arm/test/ops/test_upsample_bilinear2d.py
@@ -9,10 +9,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.upsample_bilinear2d.vec"
@@ -110,12 +111,12 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite_tosa)
-def test_upsample_bilinear2d_vec_tosa_MI_UpsamplingBilinear2d(
+def test_upsample_bilinear2d_vec_tosa_FP_UpsamplingBilinear2d(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
UpsamplingBilinear2d(size, scale_factor),
(test_data,),
aten_op,
@@ -127,12 +128,12 @@ def test_upsample_bilinear2d_vec_tosa_MI_UpsamplingBilinear2d(
@common.parametrize("test_data", test_data_suite_tosa)
-def test_upsample_bilinear2d_vec_tosa_MI_Upsample(
+def test_upsample_bilinear2d_vec_tosa_FP_Upsample(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Upsample(size, scale_factor),
(test_data,),
aten_op,
@@ -145,12 +146,12 @@ def test_upsample_bilinear2d_vec_tosa_MI_Upsample(
@common.parametrize("test_data", test_data_suite_tosa)
-def test_upsample_bilinear2d_vec_tosa_MI_Interpolate(
+def test_upsample_bilinear2d_vec_tosa_FP_Interpolate(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Interpolate(size, scale_factor),
(test_data,),
aten_op,
@@ -162,12 +163,12 @@ def test_upsample_bilinear2d_vec_tosa_MI_Interpolate(
@common.parametrize("test_data", test_data_suite_tosa)
-def test_upsample_bilinear2d_vec_tosa_BI_intropolate(
+def test_upsample_bilinear2d_vec_tosa_INT_intropolate(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
UpsamplingBilinear2d(size, scale_factor),
(test_data,),
aten_op,
@@ -179,12 +180,12 @@ def test_upsample_bilinear2d_vec_tosa_BI_intropolate(
@common.parametrize("test_data", test_data_suite_tosa)
-def test_upsample_bilinear2d_vec_tosa_BI_Upsample(
+def test_upsample_bilinear2d_vec_tosa_INT_Upsample(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
Upsample(size, scale_factor),
(test_data,),
aten_op,
@@ -197,7 +198,7 @@ def test_upsample_bilinear2d_vec_tosa_BI_Upsample(
@common.parametrize("test_data", test_data_u55)
@common.XfailIfNoCorstone300
-def test_upsample_bilinear2d_vec_U55_BI_Upsample_not_delegated(
+def test_upsample_bilinear2d_vec_U55_INT_Upsample_not_delegated(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
@@ -215,7 +216,7 @@ def test_upsample_bilinear2d_vec_U55_BI_Upsample_not_delegated(
@common.parametrize("test_data", test_data_u55)
@common.XfailIfNoCorstone300
-def test_upsample_bilinear2d_vec_U55_BI_Interpolate_not_delegated(
+def test_upsample_bilinear2d_vec_U55_INT_Interpolate_not_delegated(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
@@ -233,7 +234,7 @@ def test_upsample_bilinear2d_vec_U55_BI_Interpolate_not_delegated(
@common.parametrize("test_data", test_data_u55)
@common.XfailIfNoCorstone300
-def test_upsample_bilinear2d_vec_U55_BI_UpsamplingBilinear2d_not_delegated(
+def test_upsample_bilinear2d_vec_U55_INT_UpsamplingBilinear2d_not_delegated(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
@@ -251,10 +252,10 @@ def test_upsample_bilinear2d_vec_U55_BI_UpsamplingBilinear2d_not_delegated(
@common.parametrize("test_data", test_data_suite_Uxx)
@common.XfailIfNoCorstone320
-def test_upsample_bilinear2d_vec_U85_BI_Upsample(test_data: input_t1):
+def test_upsample_bilinear2d_vec_U85_INT_Upsample(test_data: input_t1):
test_data, size, scale_factor, compare_outputs = test_data
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
Upsample(size, scale_factor),
(test_data,),
aten_op,
@@ -269,12 +270,12 @@ def test_upsample_bilinear2d_vec_U85_BI_Upsample(test_data: input_t1):
@common.parametrize("test_data", test_data_suite_Uxx)
@common.XfailIfNoCorstone320
-def test_upsample_bilinear2d_vec_U85_BI_Interpolate(
+def test_upsample_bilinear2d_vec_U85_INT_Interpolate(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
Interpolate(size, scale_factor),
(test_data,),
aten_op,
@@ -289,12 +290,12 @@ def test_upsample_bilinear2d_vec_U85_BI_Interpolate(
@common.parametrize("test_data", test_data_suite_Uxx)
@common.XfailIfNoCorstone320
-def test_upsample_bilinear2d_vec_U85_BI_UpsamplingBilinear2d(
+def test_upsample_bilinear2d_vec_U85_INT_UpsamplingBilinear2d(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
UpsamplingBilinear2d(size, scale_factor),
(test_data,),
aten_op,
@@ -305,3 +306,99 @@ def test_upsample_bilinear2d_vec_U85_BI_UpsamplingBilinear2d(
if not compare_outputs:
pipeline.pop_stage(-1)
pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_tosa)
+@common.SkipIfNoModelConverter
+def test_upsample_bilinear2d_vgf_FP_UpsamplingBilinear2d(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data
+ pipeline = VgfPipeline[input_t1](
+ UpsamplingBilinear2d(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_tosa)
+@common.SkipIfNoModelConverter
+def test_upsample_bilinear2d_vgf_FP_Upsample(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data
+ pipeline = VgfPipeline[input_t1](
+ Upsample(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_tosa)
+@common.SkipIfNoModelConverter
+def test_upsample_bilinear2d_vgf_FP_Interpolate(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data
+ pipeline = VgfPipeline[input_t1](
+ Interpolate(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_tosa)
+@common.SkipIfNoModelConverter
+def test_upsample_bilinear2d_vgf_INT_UpsamplingBilinear2d(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data
+ pipeline = VgfPipeline[input_t1](
+ UpsamplingBilinear2d(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_tosa)
+@common.SkipIfNoModelConverter
+def test_upsample_bilinear2d_vgf_INT_Upsample(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data
+ pipeline = VgfPipeline[input_t1](
+ Upsample(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_tosa)
+@common.SkipIfNoModelConverter
+def test_upsample_bilinear2d_vgf_INT_Interpolate(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data
+ pipeline = VgfPipeline[input_t1](
+ Interpolate(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_upsample_nearest2d.py b/backends/arm/test/ops/test_upsample_nearest2d.py
index dee32249a39..a39adefc168 100644
--- a/backends/arm/test/ops/test_upsample_nearest2d.py
+++ b/backends/arm/test/ops/test_upsample_nearest2d.py
@@ -10,8 +10,9 @@
from executorch.backends.arm.test.tester.test_pipeline import (
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.upsample_nearest2d.vec"
@@ -104,10 +105,10 @@ def forward(self, x):
@common.parametrize("test_data", test_data_suite)
-def test_upsample_nearest2d_vec_tosa_MI(test_data: torch.Tensor):
+def test_upsample_nearest2d_vec_tosa_FP(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
UpsamplingNearest2d(size, scale_factor),
(test_data,),
aten_op,
@@ -119,10 +120,10 @@ def test_upsample_nearest2d_vec_tosa_MI(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_upsample_nearest2d_vec_tosa_MI_nearest(test_data: torch.Tensor):
+def test_upsample_nearest2d_vec_tosa_FP_nearest(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Upsample(size, scale_factor),
(test_data,),
aten_op,
@@ -135,10 +136,10 @@ def test_upsample_nearest2d_vec_tosa_MI_nearest(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_upsample_nearest2d_vec_tosa_MI_interpolate(test_data: torch.Tensor):
+def test_upsample_nearest2d_vec_tosa_FP_interpolate(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Interpolate(size, scale_factor),
(test_data,),
aten_op,
@@ -150,10 +151,10 @@ def test_upsample_nearest2d_vec_tosa_MI_interpolate(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_upsample_nearest2d_vec_tosa_BI_interpolate(test_data: torch.Tensor):
+def test_upsample_nearest2d_vec_tosa_INT(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
UpsamplingNearest2d(size, scale_factor),
(test_data,),
aten_op,
@@ -165,10 +166,10 @@ def test_upsample_nearest2d_vec_tosa_BI_interpolate(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
-def test_upsample_nearest2d_vec_tosa_BI_nearest(test_data: torch.Tensor):
+def test_upsample_nearest2d_vec_tosa_INT_nearest(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
Upsample(size, scale_factor),
(test_data,),
aten_op,
@@ -179,9 +180,120 @@ def test_upsample_nearest2d_vec_tosa_BI_nearest(test_data: torch.Tensor):
pipeline.run()
+@common.parametrize("test_data", test_data_suite)
+def test_upsample_nearest2d_vec_tosa_INT_interpolate(test_data: torch.Tensor):
+ test_data, size, scale_factor, compare_outputs = test_data()
+
+ pipeline = TosaPipelineINT[input_t1](
+ Interpolate(size, scale_factor),
+ (test_data,),
+ aten_op,
+ exir_op=[],
+ )
+ if not compare_outputs:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_upsample_nearest2d_vgf_FP(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data()
+ pipeline = VgfPipeline[input_t1](
+ UpsamplingNearest2d(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_upsample_nearest2d_vgf_FP_nearest(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Upsample(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_upsample_nearest2d_vgf_FP_interpolate(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Interpolate(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_upsample_nearest2d_vgf_INT(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data()
+ pipeline = VgfPipeline[input_t1](
+ UpsamplingNearest2d(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_upsample_nearest2d_vgf_INT_nearest(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Upsample(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.SkipIfNoModelConverter
+def test_upsample_nearest2d_vgf_INT_interpolate(test_data: torch.Tensor):
+ data, size, scale_factor, compare = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Interpolate(size, scale_factor),
+ (data,),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ if not compare:
+ pipeline.pop_stage(-1)
+ pipeline.run()
+
+
@common.parametrize("test_data", test_data_u55)
@common.XfailIfNoCorstone300
-def test_upsample_nearest2d_vec_U55_BI_Upsample_not_delegated(
+def test_upsample_nearest2d_vec_U55_INT_Upsample_not_delegated(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data()
@@ -199,7 +311,7 @@ def test_upsample_nearest2d_vec_U55_BI_Upsample_not_delegated(
@common.parametrize("test_data", test_data_u55)
@common.XfailIfNoCorstone300
-def test_upsample_nearest2d_vec_U55_BI_Interpolate_not_delegated(
+def test_upsample_nearest2d_vec_U55_INT_Interpolate_not_delegated(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data()
@@ -217,7 +329,7 @@ def test_upsample_nearest2d_vec_U55_BI_Interpolate_not_delegated(
@common.parametrize("test_data", test_data_u55)
@common.XfailIfNoCorstone300
-def test_upsample_nearest2d_vec_U55_BI_UpsamplingBilinear2d_not_delegated(
+def test_upsample_nearest2d_vec_U55_INT_UpsamplingBilinear2d_not_delegated(
test_data: torch.Tensor,
):
test_data, size, scale_factor, compare_outputs = test_data()
@@ -234,7 +346,7 @@ def test_upsample_nearest2d_vec_U55_BI_UpsamplingBilinear2d_not_delegated(
@common.parametrize("test_data", test_data_suite_dynamic)
-def test_upsample_nearest2d_dynamic_MI_nearest(test_data: torch.Tensor):
+def test_upsample_nearest2d_dynamic_FP_nearest(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
batch_size = torch.export.Dim("batch", min=0, max=1000)
@@ -243,7 +355,7 @@ def test_upsample_nearest2d_dynamic_MI_nearest(test_data: torch.Tensor):
dynamic_shapes = {"x": {0: batch_size, 2: input_height, 3: input_width}}
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
UpsamplingNearest2d(size, scale_factor),
(test_data,),
aten_op,
@@ -256,7 +368,7 @@ def test_upsample_nearest2d_dynamic_MI_nearest(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_dynamic)
-def test_upsample_nearest2d_dynamic_BI_nearest(test_data: torch.Tensor):
+def test_upsample_nearest2d_dynamic_INT_nearest(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
batch_size = torch.export.Dim("batch", min=0, max=2)
@@ -265,7 +377,7 @@ def test_upsample_nearest2d_dynamic_BI_nearest(test_data: torch.Tensor):
dynamic_shapes = {"x": {0: batch_size, 2: input_height, 3: input_width}}
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
UpsamplingNearest2d(size, scale_factor),
(test_data,),
aten_op,
@@ -278,7 +390,7 @@ def test_upsample_nearest2d_dynamic_BI_nearest(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_dynamic)
-def test_upsample_nearest2d_dynamic_MI_interpolate(test_data: torch.Tensor):
+def test_upsample_nearest2d_dynamic_FP_interpolate(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
batch_size = torch.export.Dim("batch", min=0, max=2)
@@ -293,7 +405,7 @@ def test_upsample_nearest2d_dynamic_MI_interpolate(test_data: torch.Tensor):
}
}
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Interpolate(size, scale_factor),
(test_data,),
aten_op,
@@ -306,7 +418,7 @@ def test_upsample_nearest2d_dynamic_MI_interpolate(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_dynamic)
-def test_upsample_nearest2d_dynamic_BI_interpolate(test_data: torch.Tensor):
+def test_upsample_nearest2d_dynamic_INT_interpolate(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
batch_size = torch.export.Dim("batch", min=0, max=2)
@@ -321,7 +433,7 @@ def test_upsample_nearest2d_dynamic_BI_interpolate(test_data: torch.Tensor):
}
}
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
Interpolate(size, scale_factor),
(test_data,),
aten_op,
@@ -334,7 +446,7 @@ def test_upsample_nearest2d_dynamic_BI_interpolate(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_dynamic)
-def test_upsample_nearest2d_dynamic_MI_upsample(test_data: torch.Tensor):
+def test_upsample_nearest2d_dynamic_FP_upsample(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
batch_size = torch.export.Dim("batch", min=0, max=1000)
@@ -349,7 +461,7 @@ def test_upsample_nearest2d_dynamic_MI_upsample(test_data: torch.Tensor):
}
}
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Upsample(size, scale_factor),
(test_data,),
aten_op,
@@ -362,7 +474,7 @@ def test_upsample_nearest2d_dynamic_MI_upsample(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite_dynamic)
-def test_upsample_nearest2d_dynamic_BI_upsample(test_data: torch.Tensor):
+def test_upsample_nearest2d_dynamic_INT_upsample(test_data: torch.Tensor):
test_data, size, scale_factor, compare_outputs = test_data()
batch_size = torch.export.Dim("batch", min=0, max=2)
@@ -377,7 +489,7 @@ def test_upsample_nearest2d_dynamic_BI_upsample(test_data: torch.Tensor):
}
}
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
Upsample(size, scale_factor),
(test_data,),
aten_op,
diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py
index ef073a6387f..9567f90c480 100644
--- a/backends/arm/test/ops/test_var.py
+++ b/backends/arm/test/ops/test_var.py
@@ -10,10 +10,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
- TosaPipelineMI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t1 = Tuple[torch.Tensor] # Input x
@@ -155,10 +156,15 @@ def forward(
return x.var(dim=self.dim, keepdim=self.keepdim, correction=self.correction)
+##########
+## Var ###
+##########
+
+
@common.parametrize("test_data", Var.test_parameters)
-def test_var_dim_tosa_MI_no_dim(test_data: Tuple):
+def test_var_dim_tosa_FP_no_dim(test_data: Tuple):
test_data, keepdim, correction = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
Var(keepdim, correction),
(test_data,),
aten_op=[],
@@ -168,9 +174,9 @@ def test_var_dim_tosa_MI_no_dim(test_data: Tuple):
@common.parametrize("test_data", Var.test_parameters)
-def test_var_dim_tosa_BI_no_dim(test_data: Tuple):
+def test_var_dim_tosa_INT_no_dim(test_data: Tuple):
test_data, keepdim, correction = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
Var(keepdim, correction),
(test_data,),
aten_op=[],
@@ -181,9 +187,9 @@ def test_var_dim_tosa_BI_no_dim(test_data: Tuple):
@common.parametrize("test_data", Var.test_parameters)
@common.XfailIfNoCorstone300
-def test_var_dim_u55_BI_no_dim(test_data: Tuple):
+def test_var_dim_u55_INT_no_dim(test_data: Tuple):
test_data, keepdim, correction = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
Var(keepdim, correction),
(test_data,),
aten_ops=[],
@@ -195,9 +201,9 @@ def test_var_dim_u55_BI_no_dim(test_data: Tuple):
@common.parametrize("test_data", Var.test_parameters)
@common.XfailIfNoCorstone320
-def test_var_dim_u85_BI_no_dim(test_data: Tuple):
+def test_var_dim_u85_INT_no_dim(test_data: Tuple):
test_data, keepdim, correction = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
Var(keepdim, correction),
(test_data,),
aten_ops=[],
@@ -207,10 +213,39 @@ def test_var_dim_u85_BI_no_dim(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", Var.test_parameters)
+@common.SkipIfNoModelConverter
+def test_var_dim_vgf_FP_no_dim(test_data: Tuple):
+ data, keepdim, correction = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Var(keepdim, correction), (data,), [], [], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", Var.test_parameters)
+@common.SkipIfNoModelConverter
+def test_var_dim_vgf_INT_no_dim(test_data: Tuple):
+ data, keepdim, correction = test_data()
+ pipeline = VgfPipeline[input_t1](
+ Var(keepdim, correction),
+ (data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+#############
+## VarDim ###
+#############
+
+
@common.parametrize("test_data", VarDim.test_parameters)
-def test_var_dim_tosa_MI(test_data: Tuple):
+def test_var_dim_tosa_FP(test_data: Tuple):
test_data, dim, keepdim, unbiased = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
VarDim(dim, keepdim, unbiased),
(test_data,),
aten_op=[],
@@ -220,10 +255,10 @@ def test_var_dim_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", VarDim.test_parameters)
-def test_var_dim_tosa_BI(test_data: Tuple):
+def test_var_dim_tosa_INT(test_data: Tuple):
test_data, dim, keepdim, unbiased = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
VarDim(dim, keepdim, unbiased),
(test_data,),
aten_op=[],
@@ -234,9 +269,9 @@ def test_var_dim_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", VarDim.test_parameters_u55)
@common.XfailIfNoCorstone300
-def test_var_dim_u55_BI(test_data: Tuple):
+def test_var_dim_u55_INT(test_data: Tuple):
test_data, dim, keepdim, unbiased = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
VarDim(dim, keepdim, unbiased),
(test_data,),
aten_ops=[],
@@ -248,9 +283,9 @@ def test_var_dim_u55_BI(test_data: Tuple):
@common.parametrize("test_data", VarDim.test_parameters)
@common.XfailIfNoCorstone320
-def test_var_dim_u85_BI(test_data: Tuple):
+def test_var_dim_u85_INT(test_data: Tuple):
test_data, dim, keepdim, unbiased = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
VarDim(dim, keepdim, unbiased),
(test_data,),
aten_ops=[],
@@ -260,10 +295,39 @@ def test_var_dim_u85_BI(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", VarDim.test_parameters)
+@common.SkipIfNoModelConverter
+def test_var_dim_vgf_FP(test_data: Tuple):
+ data, dim, keepdim, unbiased = test_data()
+ pipeline = VgfPipeline[input_t1](
+ VarDim(dim, keepdim, unbiased), (data,), [], [], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", VarDim.test_parameters)
+@common.SkipIfNoModelConverter
+def test_var_dim_vgf_INT(test_data: Tuple):
+ data, dim, keepdim, unbiased = test_data()
+ pipeline = VgfPipeline[input_t1](
+ VarDim(dim, keepdim, unbiased),
+ (data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
+####################
+## VarCorrection ###
+####################
+
+
@common.parametrize("test_data", VarCorrection.test_parameters)
-def test_var_dim_tosa_MI_correction(test_data: Tuple):
+def test_var_dim_tosa_FP_correction(test_data: Tuple):
test_data, dim, keepdim, correction = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
VarCorrection(dim, keepdim, correction),
(test_data,),
aten_op=[],
@@ -273,9 +337,9 @@ def test_var_dim_tosa_MI_correction(test_data: Tuple):
@common.parametrize("test_data", VarCorrection.test_parameters)
-def test_var_dim_tosa_BI_correction(test_data: Tuple):
+def test_var_dim_tosa_INT_correction(test_data: Tuple):
test_data, dim, keepdim, correction = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
VarCorrection(dim, keepdim, correction),
(test_data,),
aten_op=[],
@@ -286,9 +350,9 @@ def test_var_dim_tosa_BI_correction(test_data: Tuple):
@common.parametrize("test_data", VarCorrection.test_parameters)
@common.XfailIfNoCorstone300
-def test_var_dim_u55_BI_correction(test_data: Tuple):
+def test_var_dim_u55_INT_correction(test_data: Tuple):
test_data, dim, keepdim, correction = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
VarCorrection(dim, keepdim, correction),
(test_data,),
aten_ops=[],
@@ -300,9 +364,9 @@ def test_var_dim_u55_BI_correction(test_data: Tuple):
@common.parametrize("test_data", VarCorrection.test_parameters)
@common.XfailIfNoCorstone320
-def test_var_dim_u85_BI_correction(test_data: Tuple):
+def test_var_dim_u85_INT_correction(test_data: Tuple):
test_data, dim, keepdim, correction = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
VarCorrection(dim, keepdim, correction),
(test_data,),
aten_ops=[],
@@ -310,3 +374,27 @@ def test_var_dim_u85_BI_correction(test_data: Tuple):
run_on_fvp=True,
)
pipeline.run()
+
+
+@common.parametrize("test_data", VarCorrection.test_parameters)
+@common.SkipIfNoModelConverter
+def test_var_dim_vgf_FP_correction(test_data: Tuple):
+ data, dim, keepdim, corr = test_data()
+ pipeline = VgfPipeline[input_t1](
+ VarCorrection(dim, keepdim, corr), (data,), [], [], tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", VarCorrection.test_parameters)
+@common.SkipIfNoModelConverter
+def test_var_dim_vgf_INT_correction(test_data: Tuple):
+ data, dim, keepdim, corr = test_data()
+ pipeline = VgfPipeline[input_t1](
+ VarCorrection(dim, keepdim, corr),
+ (data,),
+ [],
+ [],
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_view.py b/backends/arm/test/ops/test_view.py
index fc780b1d32c..71cb2ed73bb 100644
--- a/backends/arm/test/ops/test_view.py
+++ b/backends/arm/test/ops/test_view.py
@@ -13,11 +13,12 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
aten_op = "torch.ops.aten.view.default"
@@ -58,9 +59,9 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", View.needs_transpose_tests)
-def test_view_tosa_MI(test_data: Tuple):
+def test_view_tosa_FP(test_data: Tuple):
test_tensor, new_shape = test_data()
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
View(new_shape),
(test_tensor,),
aten_op,
@@ -70,9 +71,9 @@ def test_view_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", View.needs_transpose_tests)
-def test_view_tosa_BI(test_data: Tuple):
+def test_view_tosa_INT(test_data: Tuple):
test_tensor, new_shape = test_data()
- pipeline = TosaPipelineBI[input_t1](
+ pipeline = TosaPipelineINT[input_t1](
View(new_shape),
(test_tensor,),
aten_op,
@@ -98,9 +99,9 @@ def test_view_tosa_BI(test_data: Tuple):
@common.parametrize("test_data", View.needs_transpose_tests, xfails=xfails)
@common.XfailIfNoCorstone300
-def test_view_u55_BI(test_data: Tuple):
+def test_view_u55_INT(test_data: Tuple):
test_tensor, new_shape = test_data()
- pipeline = EthosU55PipelineBI[input_t1](
+ pipeline = EthosU55PipelineINT[input_t1](
View(new_shape),
(test_tensor,),
aten_op,
@@ -109,9 +110,35 @@ def test_view_u55_BI(test_data: Tuple):
pipeline.run()
+@common.parametrize("test_data", View.needs_transpose_tests)
+@common.SkipIfNoModelConverter
+def test_view_vgf_FP(test_data: Tuple):
+ test_tensor, new_shape = test_data()
+ pipeline = VgfPipeline[input_t1](
+ View(new_shape),
+ (test_tensor,),
+ aten_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_data", View.needs_transpose_tests)
+@common.SkipIfNoModelConverter
+def test_view_vgf_INT(test_data: Tuple):
+ test_tensor, new_shape = test_data()
+ pipeline = VgfPipeline[input_t1](
+ View(new_shape),
+ (test_tensor,),
+ aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.run()
+
+
@common.parametrize("test_data", View.rank_product_too_large, xfails=xfails)
@common.XfailIfNoCorstone300
-def test_view_u55_BI_not_delegated(test_data: Tuple):
+def test_view_u55_INT_not_delegated(test_data: Tuple):
test_tensor, new_shape = test_data()
pipeline = OpNotSupportedPipeline[input_t1](
View(new_shape),
@@ -126,9 +153,9 @@ def test_view_u55_BI_not_delegated(test_data: Tuple):
@common.parametrize("test_data", View.needs_transpose_tests, xfails=xfails)
@common.XfailIfNoCorstone320
-def test_view_u85_BI(test_data: Tuple):
+def test_view_u85_INT(test_data: Tuple):
test_tensor, new_shape = test_data()
- pipeline = EthosU85PipelineBI[input_t1](
+ pipeline = EthosU85PipelineINT[input_t1](
View(new_shape),
(test_tensor,),
aten_op,
diff --git a/backends/arm/test/ops/test_where.py b/backends/arm/test/ops/test_where.py
index a60cf587a3e..ea036d26361 100644
--- a/backends/arm/test/ops/test_where.py
+++ b/backends/arm/test/ops/test_where.py
@@ -14,10 +14,11 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU85PipelineBI,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
from executorch.backends.xnnpack.test.tester.tester import Quantize
@@ -136,23 +137,23 @@ def scalar_condition(input: torch.Tensor):
"float32_scalar_cond": lambda: float32_scalar_cond,
}
-test_modules_MI = {
+test_modules_FP = {
**test_modules_common,
"float32_tensor_cond_tuple_dtype": lambda: float32_tensor_cond_tuple_dtype,
"float32_tensor_cond_tuple_dtype_bool": lambda: float32_tensor_cond_tuple_dtype_bool,
"int32_scalar_cond": lambda: int32_scalar_cond,
}
-test_modules_BI = {
+test_modules_INT = {
**test_modules_common,
}
input_t = Tuple[torch.Tensor]
-@common.parametrize("test_module", test_modules_MI)
-def test_where_self_tosa_MI(test_module):
- pipeline = TosaPipelineMI[input_t](
+@common.parametrize("test_module", test_modules_FP)
+def test_where_self_tosa_FP(test_module):
+ pipeline = TosaPipelineFP[input_t](
test_module(),
test_module().get_inputs(),
aten_op,
@@ -161,9 +162,9 @@ def test_where_self_tosa_MI(test_module):
pipeline.run()
-@common.parametrize("test_module", test_modules_BI)
-def test_where_self_tosa_BI(test_module):
- pipeline = TosaPipelineBI[input_t](
+@common.parametrize("test_module", test_modules_INT)
+def test_where_self_tosa_INT(test_module):
+ pipeline = TosaPipelineINT[input_t](
test_module(),
test_module().get_inputs(),
aten_op,
@@ -173,9 +174,9 @@ def test_where_self_tosa_BI(test_module):
pipeline.run()
-@common.parametrize("test_module", test_modules_BI)
+@common.parametrize("test_module", test_modules_INT)
@common.XfailIfNoCorstone300
-def test_where_self_u55_BI_not_delegated(test_module):
+def test_where_self_u55_INT_not_delegated(test_module):
# There will be one full_like op which will be delegated.
num_delegates = 1
num_exir = 0
@@ -202,11 +203,11 @@ def test_where_self_u55_BI_not_delegated(test_module):
pipeline.run()
-@common.parametrize("test_module", test_modules_BI)
+@common.parametrize("test_module", test_modules_INT)
@common.XfailIfNoCorstone320
-def test_where_self_u85_BI(test_module):
+def test_where_self_u85_INT(test_module):
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
test_module(),
test_module().get_inputs(),
aten_op,
@@ -215,3 +216,30 @@ def test_where_self_u85_BI(test_module):
symmetric_io_quantization=True,
)
pipeline.run()
+
+
+@common.parametrize("test_module", test_modules_FP)
+@common.SkipIfNoModelConverter
+def test_where_self_vgf_FP(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+FP",
+ )
+ pipeline.run()
+
+
+@common.parametrize("test_module", test_modules_INT)
+@common.SkipIfNoModelConverter
+def test_where_self_vgf_INT(test_module):
+ pipeline = VgfPipeline[input_t](
+ test_module(),
+ test_module().get_inputs(),
+ aten_op,
+ exir_op,
+ tosa_version="TOSA-1.0+INT",
+ symmetric_io_quantization=True,
+ )
+ pipeline.run()
diff --git a/backends/arm/test/ops/test_zeros.py b/backends/arm/test/ops/test_zeros.py
index d8f9dcbee29..a1cf39c906f 100644
--- a/backends/arm/test/ops/test_zeros.py
+++ b/backends/arm/test/ops/test_zeros.py
@@ -7,11 +7,12 @@
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
OpNotSupportedPipeline,
- TosaPipelineBI,
- TosaPipelineMI,
+ TosaPipelineFP,
+ TosaPipelineINT,
+ VgfPipeline,
)
input_t = tuple[torch.Tensor]
@@ -49,9 +50,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
@common.parametrize("test_data", ZerosAdd.test_data)
-def test_zeros_tosa_MI(test_data: test_data_t):
+def test_zeros_tosa_FP(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
ZerosAdd(*init_data),
input_data(),
ZerosAdd.aten_op,
@@ -60,9 +61,9 @@ def test_zeros_tosa_MI(test_data: test_data_t):
@common.parametrize("test_data", ZerosAdd.test_data)
-def test_zeros_tosa_BI(test_data: test_data_t):
+def test_zeros_tosa_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
ZerosAdd(*init_data),
input_data(),
ZerosAdd.aten_op,
@@ -73,9 +74,9 @@ def test_zeros_tosa_BI(test_data: test_data_t):
@common.parametrize("test_data", ZerosAdd.test_data)
@common.XfailIfNoCorstone300
-def test_zeros_u55_BI(test_data: test_data_t):
+def test_zeros_u55_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
ZerosAdd(*init_data),
input_data(),
ZerosAdd.aten_op,
@@ -87,9 +88,9 @@ def test_zeros_u55_BI(test_data: test_data_t):
@common.parametrize("test_data", ZerosAdd.test_data)
@common.XfailIfNoCorstone320
-def test_zeros_u85_BI(test_data: test_data_t):
+def test_zeros_u85_INT(test_data: test_data_t):
input_data, init_data = test_data
- pipeline = EthosU85PipelineBI[input_t](
+ pipeline = EthosU85PipelineINT[input_t](
ZerosAdd(*init_data),
input_data(),
ZerosAdd.aten_op,
@@ -108,9 +109,39 @@ def test_zeros_u85_BI(test_data: test_data_t):
"int32_int64": "MLETORCG-716: Do not delegate empty networks to vela",
},
)
-def test_zeros_tosa_BI_not_delegated(test_data: test_data_t):
+def test_zeros_tosa_INT_not_delegated(test_data: test_data_t):
input_data, init_data = test_data
pipeline = OpNotSupportedPipeline[input_t](
ZerosAdd(*init_data), input_data(), non_delegated_ops={}, quantize=True
)
pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ ZerosAdd.test_data,
+)
+@common.SkipIfNoModelConverter
+def test_zeros_vgf_FP(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ ZerosAdd(*init_data), input_data(), ZerosAdd.aten_op, tosa_version="TOSA-1.0+FP"
+ )
+ pipeline.run()
+
+
+@common.parametrize(
+ "test_data",
+ ZerosAdd.test_data,
+)
+@common.SkipIfNoModelConverter
+def test_zeros_vgf_INT(test_data: test_data_t):
+ input_data, init_data = test_data
+ pipeline = VgfPipeline[input_t](
+ ZerosAdd(*init_data),
+ input_data(),
+ ZerosAdd.aten_op,
+ tosa_version="TOSA-1.0+INT",
+ )
+ pipeline.pop_stage("check.quant_nodes")
+ pipeline.run()
diff --git a/backends/arm/test/passes/test_convert_expand_copy_to_repeat.py b/backends/arm/test/passes/test_convert_expand_copy_to_repeat.py
index 38c1cf3296e..aa877c355bd 100644
--- a/backends/arm/test/passes/test_convert_expand_copy_to_repeat.py
+++ b/backends/arm/test/passes/test_convert_expand_copy_to_repeat.py
@@ -30,7 +30,7 @@ def get_inputs(self) -> input_t:
return (torch.rand(3, 1),)
-def test_expand_to_repeat_tosa_BI():
+def test_expand_to_repeat_tosa_INT():
module = Expand()
pipeline = PassPipeline[input_t](
module,
diff --git a/backends/arm/test/passes/test_convert_split_to_slice.py b/backends/arm/test/passes/test_convert_split_to_slice.py
index 7ca6b71236f..fba52308ff0 100644
--- a/backends/arm/test/passes/test_convert_split_to_slice.py
+++ b/backends/arm/test/passes/test_convert_split_to_slice.py
@@ -45,7 +45,7 @@ def forward(self, x):
@common.parametrize("module", modules)
-def test_split_to_slice_tosa_BI(module):
+def test_split_to_slice_tosa_INT(module):
pipeline = PassPipeline[input_t](
module,
module.get_inputs(),
diff --git a/backends/arm/test/passes/test_convert_to_clamp.py b/backends/arm/test/passes/test_convert_to_clamp.py
index c35dd1c72a5..cc854eeacd7 100644
--- a/backends/arm/test/passes/test_convert_to_clamp.py
+++ b/backends/arm/test/passes/test_convert_to_clamp.py
@@ -45,7 +45,7 @@ def forward(self, x):
@common.parametrize("test_data", HardTanh.test_data)
-def test_tosa_MI_hardtahn(test_data: input_t):
+def test_tosa_FP_hardtahn(test_data: input_t):
module = HardTanh()
op_checks_before_pass = {
"executorch_exir_dialects_edge__ops_aten_hardtanh_default": 1,
@@ -69,7 +69,7 @@ def test_tosa_MI_hardtahn(test_data: input_t):
@common.parametrize("test_data", ReLU.test_data)
-def test_tosa_MI_relu(test_data: input_t):
+def test_tosa_FP_relu(test_data: input_t):
module = ReLU()
op_checks_before_pass = {
"executorch_exir_dialects_edge__ops_aten_relu_default": 1,
diff --git a/backends/arm/test/passes/test_decompose_avg_pool2d_pass.py b/backends/arm/test/passes/test_decompose_avg_pool2d_pass.py
new file mode 100644
index 00000000000..4d686039456
--- /dev/null
+++ b/backends/arm/test/passes/test_decompose_avg_pool2d_pass.py
@@ -0,0 +1,75 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+from executorch.backends.arm._passes.decompose_avg_pool2d import DecomposeAvgPool2d
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import PassPipeline
+
+input_t = Tuple[torch.Tensor] # Input x
+
+
+class AvgPool2dWithStride(torch.nn.Module):
+ """
+ avg_pool2d model with explicit stride parameter
+ """
+
+ def get_inputs(self) -> input_t:
+ return (torch.rand(1, 3, 8, 8),)
+
+ def forward(self, x):
+ return torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
+
+
+class AvgPool2dWithoutStride(torch.nn.Module):
+ """
+ avg_pool2d model without stride parameter (should default to kernel_size)
+ """
+
+ def get_inputs(self) -> input_t:
+ return (torch.rand(1, 3, 8, 8),)
+
+ def forward(self, x):
+ return torch.nn.functional.avg_pool2d(x, kernel_size=3)
+
+
+class AvgPool2dListKernel(torch.nn.Module):
+ """
+ avg_pool2d model with list kernel_size and no stride
+ """
+
+ def get_inputs(self) -> input_t:
+ return (torch.rand(1, 3, 8, 8),)
+
+ def forward(self, x):
+ return torch.nn.functional.avg_pool2d(x, kernel_size=[2, 3])
+
+
+modules = {
+ "avg_pool2d_with_stride": AvgPool2dWithStride(),
+ "avg_pool2d_without_stride": AvgPool2dWithoutStride(),
+ "avg_pool2d_list_kernel": AvgPool2dListKernel(),
+}
+
+
+@common.parametrize("module", modules)
+def test_decompose_avg_pool2d_tosa_MI(module):
+ """Test that DecomposeAvgPool2d pass works correctly with and without stride parameters."""
+ pipeline = PassPipeline[input_t](
+ module,
+ module.get_inputs(),
+ quantize=False,
+ ops_before_pass={
+ "executorch_exir_dialects_edge__ops_aten_avg_pool2d_default": 1,
+ },
+ ops_after_pass={
+ # After decomposition, we should still see avg_pool2d (transformed)
+ "executorch_exir_dialects_edge__ops_aten_avg_pool2d_default": 1,
+ },
+ pass_list=[DecomposeAvgPool2d],
+ )
+ pipeline.run()
diff --git a/backends/arm/test/passes/test_decompose_cosine_similarity_pass.py b/backends/arm/test/passes/test_decompose_cosine_similarity_pass.py
index 4ae413ce456..80a328f39c6 100644
--- a/backends/arm/test/passes/test_decompose_cosine_similarity_pass.py
+++ b/backends/arm/test/passes/test_decompose_cosine_similarity_pass.py
@@ -28,7 +28,7 @@ def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
@common.parametrize("module", modules)
-def test_decompose_cosine_similarity_tosa_BI(module):
+def test_decompose_cosine_similarity_tosa_INT(module):
ops_after_pass = {
"executorch_exir_dialects_edge__ops_aten_mul_Tensor": 5,
diff --git a/backends/arm/test/passes/test_decompose_div_pass.py b/backends/arm/test/passes/test_decompose_div_pass.py
index 24e18b4f523..b52e264bf11 100644
--- a/backends/arm/test/passes/test_decompose_div_pass.py
+++ b/backends/arm/test/passes/test_decompose_div_pass.py
@@ -43,7 +43,7 @@ def forward(self, x):
@common.parametrize("module", modules)
-def test_decompose_div_tosa_MI(module):
+def test_decompose_div_tosa_FP(module):
pipeline = PassPipeline[input_t](
module,
module.get_inputs(),
diff --git a/backends/arm/test/passes/test_decompose_layernorm_pass.py b/backends/arm/test/passes/test_decompose_layernorm_pass.py
index 9c375ceaf8f..d3c2cd6efd7 100644
--- a/backends/arm/test/passes/test_decompose_layernorm_pass.py
+++ b/backends/arm/test/passes/test_decompose_layernorm_pass.py
@@ -32,7 +32,7 @@ def get_inputs(self) -> input_t:
return (torch.rand(10),)
-def test_decompose_layernorm_tosa_MI():
+def test_decompose_layernorm_tosa_FP():
module = LayerNorm()
pipeline = PassPipeline[input_t](
module,
diff --git a/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py b/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py
index de605f666ac..5b4c84edbfd 100644
--- a/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py
+++ b/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py
@@ -55,7 +55,7 @@ def get_inputs(self) -> input_t:
@common.parametrize("module", modules)
-def test_decompose_vector_norm_tosa_BI(module):
+def test_decompose_vector_norm_tosa_INT(module):
"""
This test creates a PassPipeline that applies the DecomposeLinearVectorNormPass.
The expected primitive ops vary depending on the norm order:
diff --git a/backends/arm/test/passes/test_decompose_meandim_pass.py b/backends/arm/test/passes/test_decompose_meandim_pass.py
index 84aa954118d..22dda5d9244 100644
--- a/backends/arm/test/passes/test_decompose_meandim_pass.py
+++ b/backends/arm/test/passes/test_decompose_meandim_pass.py
@@ -10,8 +10,8 @@
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- TosaPipelineBI,
+ EthosU55PipelineINT,
+ TosaPipelineINT,
)
input_t = Tuple[torch.Tensor] # Input x
@@ -84,10 +84,10 @@ def get_inputs(self) -> input_t:
@common.parametrize("module", modules)
-def test_decompose_meandim_tosa_BI(module):
+def test_decompose_meandim_tosa_INT(module):
# Decompose meandim_pass requires initiating the pas with args, which is not supported
# by RunPasses in the arm_tester -> PassPipeline cannot be used.
- pipeline = TosaPipelineBI[input_t](
+ pipeline = TosaPipelineINT[input_t](
module,
module.get_inputs(),
[],
@@ -106,10 +106,10 @@ def test_decompose_meandim_tosa_BI(module):
@common.parametrize("module", modules)
-def test_decompose_meandim_u55_BI(module):
+def test_decompose_meandim_u55_INT(module):
# Decompose meandim_pass requires initiating the pas with args, which is not supported
# by RunPasses in the arm_tester -> PassPipeline cannot be used.
- pipeline = EthosU55PipelineBI[input_t](
+ pipeline = EthosU55PipelineINT[input_t](
module, module.get_inputs(), [], run_on_fvp=False
)
pipeline.pop_stage("check_not.exir")
diff --git a/backends/arm/test/passes/test_decompose_softmax_pass.py b/backends/arm/test/passes/test_decompose_softmax_pass.py
index 6c7ed7cfb60..3af1976e3f3 100644
--- a/backends/arm/test/passes/test_decompose_softmax_pass.py
+++ b/backends/arm/test/passes/test_decompose_softmax_pass.py
@@ -47,7 +47,7 @@ def get_inputs(self) -> input_t:
return (torch.rand(2, 3),)
-def test_softmax_basic_tosa_MI():
+def test_softmax_basic_tosa_FP():
module = Softmax()
pipeline = PassPipeline[input_t](
module,
@@ -74,7 +74,7 @@ def test_softmax_basic_tosa_MI():
pipeline.run()
-def test_softmax_log_tosa_MI():
+def test_softmax_log_tosa_FP():
module = SoftmaxLog()
pipeline = PassPipeline[input_t](
module,
diff --git a/backends/arm/test/passes/test_decompose_var_pass.py b/backends/arm/test/passes/test_decompose_var_pass.py
index 65357fc2212..c347a2f667c 100644
--- a/backends/arm/test/passes/test_decompose_var_pass.py
+++ b/backends/arm/test/passes/test_decompose_var_pass.py
@@ -56,7 +56,7 @@ def get_inputs(self) -> input_t:
@common.parametrize("module", modules)
-def test_decompose_var_tosa_MI(module):
+def test_decompose_var_tosa_FP(module):
pipeline = PassPipeline[input_t](
module,
module.get_inputs(),
diff --git a/backends/arm/test/passes/test_decorate_fp32_to_int32_casting_pass.py b/backends/arm/test/passes/test_decorate_fp32_to_int32_casting_pass.py
index bc4b66e5f72..84573878aef 100644
--- a/backends/arm/test/passes/test_decorate_fp32_to_int32_casting_pass.py
+++ b/backends/arm/test/passes/test_decorate_fp32_to_int32_casting_pass.py
@@ -10,7 +10,7 @@
from executorch.backends.arm.test.tester.test_pipeline import (
OpNotSupportedPipeline,
- TosaPipelineMI,
+ TosaPipelineFP,
)
input_t1 = Tuple[torch.Tensor] # Input x
@@ -46,11 +46,11 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", test_data_fp32_input)
-def test_decorate_fp32_to_int32_casting_tosa_MI(test_data: Tuple):
+def test_decorate_fp32_to_int32_casting_tosa_FP(test_data: Tuple):
test_tensor, target_dtype = test_data()
module = FP32ToINT32Casting(target_dtype)
- pipeline = TosaPipelineMI[input_t1](
+ pipeline = TosaPipelineFP[input_t1](
module,
(test_tensor,),
aten_op=[],
@@ -61,11 +61,11 @@ def test_decorate_fp32_to_int32_casting_tosa_MI(test_data: Tuple):
@common.parametrize("test_data", test_data_fp32_input)
-def test_decorate_fp32_to_int32_casting_tosa_BI(test_data: Tuple):
+def test_decorate_fp32_to_int32_casting_tosa_INT(test_data: Tuple):
"""
- Casting operation involving floating-point dtypes will be rejected in BI/INT profile.
+ Casting operation involving floating-point dtypes will be rejected in INT/INT profile.
Therefore, the DecorateFp32toInt32CastingPass is not required in this profile.
- Add a BI test to ensure that such casting is rejected as expected.
+ Add a INT test to ensure that such casting is rejected as expected.
"""
test_tensor, target_dtype = test_data()
module = FP32ToINT32Casting(target_dtype)
diff --git a/backends/arm/test/passes/test_fold_qdq_pass.py b/backends/arm/test/passes/test_fold_qdq_pass.py
index 86324d523c6..994676ff442 100644
--- a/backends/arm/test/passes/test_fold_qdq_pass.py
+++ b/backends/arm/test/passes/test_fold_qdq_pass.py
@@ -24,7 +24,7 @@ def forward(self, x, y):
@common.parametrize("test_data", SimpleQuantizeModel.test_data)
-def test_fold_qdq_pass_tosa_BI(test_data: input_t):
+def test_fold_qdq_pass_tosa_INT(test_data: input_t):
"""
Tests the FoldAndAnnotateQParamsPass which folds dq/q nodes into
the node and stores the quantization parameters in meta.
diff --git a/backends/arm/test/passes/test_fuse_batchnorm_pass.py b/backends/arm/test/passes/test_fuse_batchnorm_pass.py
index f91c8245270..59fae7cafbd 100644
--- a/backends/arm/test/passes/test_fuse_batchnorm_pass.py
+++ b/backends/arm/test/passes/test_fuse_batchnorm_pass.py
@@ -138,7 +138,7 @@ def forward(self, x):
@common.parametrize("module", modules)
-def test_fuse_batchnorm_tosa_MI(module: torch.nn.Module):
+def test_fuse_batchnorm_tosa_FP(module: torch.nn.Module):
"""Test various cases where the batchnorm should either be fused with a previous
conv, or converted to a new conv."""
pipeline = PassPipeline[input_t](
diff --git a/backends/arm/test/passes/test_fuse_constant_ops_pass.py b/backends/arm/test/passes/test_fuse_constant_ops_pass.py
index 4ec6942430f..1a318c5cd42 100644
--- a/backends/arm/test/passes/test_fuse_constant_ops_pass.py
+++ b/backends/arm/test/passes/test_fuse_constant_ops_pass.py
@@ -15,6 +15,7 @@
from executorch.backends.arm.test.tester.test_pipeline import PassPipeline
input_t = Tuple[torch.Tensor] # Input x
+input_t2 = Tuple[torch.Tensor, torch.Tensor]
class FuseParameter(torch.nn.Module):
@@ -86,15 +87,35 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
return operator.add(sliced, x)
+class CatConst(torch.nn.Module):
+ ops_before_pass = {
+ "executorch_exir_dialects_edge__ops_aten_cat_default": 1,
+ }
+ ops_after_pass = {
+ "executorch_exir_dialects_edge__ops_aten_cat_default": 1,
+ }
+ ops_not_after_pass = []
+
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, a, b):
+ return torch.cat((a, b), dim=0)
+
+
modules = {
"fuse_parameter": FuseParameter(),
"fuse_buffer": FuseBuffer(),
"fuse_const_tensor": FuseLiftedTensor(),
}
+cat_module = {
+ "fuse_cat": CatConst(),
+}
+
@common.parametrize("module", modules)
-def test_fuse_const_ops_tosa_MI(module: torch.nn.Module):
+def test_fuse_const_ops_tosa_FP(module: torch.nn.Module):
pipeline = PassPipeline[input_t](
module=module,
test_data=(torch.rand(1),),
@@ -108,7 +129,7 @@ def test_fuse_const_ops_tosa_MI(module: torch.nn.Module):
@common.parametrize("module", modules)
-def test_fuse_const_ops_tosa_BI(module: torch.nn.Module):
+def test_fuse_const_ops_tosa_INT(module: torch.nn.Module):
pipeline = PassPipeline[input_t](
module,
(torch.rand(10, 10),),
@@ -118,3 +139,16 @@ def test_fuse_const_ops_tosa_BI(module: torch.nn.Module):
passes_with_exported_program=[ComputeConstantOpsAOT, FuseConstantArgsPass],
)
pipeline.run()
+
+
+@common.parametrize("module", cat_module)
+def test_fuse_const_ops_tosa_BI_cat(module: torch.nn.Module):
+ pipeline = PassPipeline[input_t2](
+ module,
+ (torch.rand(3), torch.rand(2)),
+ quantize=True,
+ ops_before_pass=module.ops_before_pass,
+ ops_after_pass=module.ops_after_pass,
+ passes_with_exported_program=[ComputeConstantOpsAOT, FuseConstantArgsPass],
+ )
+ pipeline.run()
diff --git a/backends/arm/test/passes/test_fuse_equal_placeholders_ops_pass.py b/backends/arm/test/passes/test_fuse_equal_placeholders_ops_pass.py
index 9a26157ed7e..f6e437ba034 100644
--- a/backends/arm/test/passes/test_fuse_equal_placeholders_ops_pass.py
+++ b/backends/arm/test/passes/test_fuse_equal_placeholders_ops_pass.py
@@ -12,7 +12,7 @@
)
from executorch.backends.arm.test.tester.test_pipeline import (
PassPipeline,
- TosaPipelineMI,
+ TosaPipelineFP,
)
input_t = Tuple[torch.Tensor] # Input x
@@ -76,7 +76,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
return m, n
-def test_fuse_equal_placeholders_constants_tosa_MI():
+def test_fuse_equal_placeholders_constants_tosa_FP():
module = FuseWeightsConstants()
data = (torch.rand(1, 2, 8),)
pipeline = PassPipeline[input_t](
@@ -97,7 +97,7 @@ def test_fuse_equal_placeholders_constants_tosa_MI():
assert "_common" in constant_keys[1], "FuseEqualPlaceholders constants failed"
-def test_fuse_equal_placeholders_state_dict_tosa_MI():
+def test_fuse_equal_placeholders_state_dict_tosa_FP():
module = FuseWeightsStateDict()
data = (torch.rand(1, 2, 8),)
pipeline = PassPipeline[input_t](
@@ -118,7 +118,7 @@ def test_fuse_equal_placeholders_state_dict_tosa_MI():
assert "_common" in state_dict_keys[1], "FuseEqualPlaceholders state_dict failed"
-def test_not_fuse_tensor_with_different_type_MI():
+def test_not_fuse_tensor_with_different_type_FP():
module = NotFuseTensorWithDifferentType()
data = (
torch.rand(
@@ -131,7 +131,7 @@ def test_not_fuse_tensor_with_different_type_MI():
dtype=torch.int,
),
)
- pipeline = TosaPipelineMI[input_t](
+ pipeline = TosaPipelineFP[input_t](
module,
data,
aten_op=[],
diff --git a/backends/arm/test/passes/test_insert_int64_to_int32_cast_pass.py b/backends/arm/test/passes/test_insert_int64_to_int32_cast_pass.py
index d3b8fcc4640..da6eeb59459 100644
--- a/backends/arm/test/passes/test_insert_int64_to_int32_cast_pass.py
+++ b/backends/arm/test/passes/test_insert_int64_to_int32_cast_pass.py
@@ -25,7 +25,7 @@ def get_inputs(self) -> input_t:
)
-def test_int64_model_tosa_MI():
+def test_int64_model_tosa_FP():
module = Int64InputModel()
op_checks_before = {
"executorch_exir_dialects_edge__ops_aten_embedding_default": 1,
diff --git a/backends/arm/test/passes/test_insert_table_ops_pass.py b/backends/arm/test/passes/test_insert_table_ops_pass.py
index 88ef96d71ab..5e695c237a0 100644
--- a/backends/arm/test/passes/test_insert_table_ops_pass.py
+++ b/backends/arm/test/passes/test_insert_table_ops_pass.py
@@ -27,19 +27,19 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", Sigmoid.test_data)
-def test_insert_table_tosa_BI(test_data: input_t):
+def test_insert_table_tosa_INT(test_data: input_t):
module = Sigmoid()
pipeline = PassPipeline[input_t](
module,
test_data,
quantize=True,
- ops_before_pass={},
+ ops_before_pass={"executorch_exir_dialects_edge__ops_aten_sigmoid_default": 1},
ops_after_pass={
"executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 1,
"executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 1,
- "tosa._table": 1,
+ "backend__ops_tosa_TABLE_default": 1,
},
- ops_not_after_pass=["aten_sigmoid_default"],
+ ops_not_after_pass=["executorch_exir_dialects_edge__ops_aten_sigmoid_default"],
pass_list=[FoldAndAnnotateQParamsPass],
passes_with_exported_program=[InsertTableOpsPass],
)
diff --git a/backends/arm/test/passes/test_int32_cast_embedding_pass.py b/backends/arm/test/passes/test_int32_cast_embedding_pass.py
index c822b361428..7adca527d75 100644
--- a/backends/arm/test/passes/test_int32_cast_embedding_pass.py
+++ b/backends/arm/test/passes/test_int32_cast_embedding_pass.py
@@ -25,7 +25,7 @@ def get_inputs(self) -> input_t:
)
-def test_int64_model_tosa_MI():
+def test_int64_model_tosa_FP():
module = Int32Embedding()
op_checks_before = {
"executorch_exir_dialects_edge__ops_aten_embedding_default": 1,
diff --git a/backends/arm/test/passes/test_ioquantization_pass.py b/backends/arm/test/passes/test_ioquantization_pass.py
index b9599aeffcc..da3b81aa096 100644
--- a/backends/arm/test/passes/test_ioquantization_pass.py
+++ b/backends/arm/test/passes/test_ioquantization_pass.py
@@ -10,7 +10,7 @@
from executorch.backends.arm.test import common
-from executorch.backends.arm.test.tester.test_pipeline import EthosU55PipelineBI
+from executorch.backends.arm.test.tester.test_pipeline import EthosU55PipelineINT
from executorch.exir.passes.quantize_io_pass import QuantizeInputs, QuantizeOutputs
@@ -27,12 +27,12 @@ def forward(self, x, y):
@common.parametrize("test_data", SimpleModel.test_data)
-def test_ioquantisation_pass_u55_BI(test_data: input_t):
+def test_ioquantisation_pass_u55_INT(test_data: input_t):
"""
Test the executorch/exir/passes/quanize_io_pass pass works(meaning we don't get Q/DQ nodes) on a simple model
"""
model = SimpleModel()
- pipeline = EthosU55PipelineBI(
+ pipeline = EthosU55PipelineINT(
model,
test_data,
aten_ops=[],
diff --git a/backends/arm/test/passes/test_remove_clone_pass.py b/backends/arm/test/passes/test_remove_clone_pass.py
index 9f317b44043..dea0bb06f5e 100755
--- a/backends/arm/test/passes/test_remove_clone_pass.py
+++ b/backends/arm/test/passes/test_remove_clone_pass.py
@@ -28,7 +28,7 @@ def get_inputs(self) -> input_t:
return (torch.rand(3, 1),)
-def test_remove_clone_tosa_BI():
+def test_remove_clone_tosa_INT():
module = Clone()
pipeline = PassPipeline[input_t](
module,
diff --git a/backends/arm/test/passes/test_rescale_pass.py b/backends/arm/test/passes/test_rescale_pass.py
index 420fdab5f45..7ede72d9c4d 100644
--- a/backends/arm/test/passes/test_rescale_pass.py
+++ b/backends/arm/test/passes/test_rescale_pass.py
@@ -9,13 +9,18 @@
import pytest
import torch
-import torch.library
from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.test_pipeline import (
- EthosU55PipelineBI,
- EthosU85PipelineBI,
- TosaPipelineBI,
+ EthosU55PipelineINT,
+ EthosU85PipelineINT,
+ TosaPipelineINT,
)
+from executorch.backends.arm.tosa_specification import (
+ TosaLoweringContext,
+ TosaSpecification,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch._subclasses.fake_tensor import FakeTensorMode
input_t = Tuple[torch.Tensor, torch.Tensor] # Input x
@@ -45,8 +50,19 @@ def test_rescale_op():
127,
),
]
- for sample_input in sample_inputs[1:2]:
- torch.library.opcheck(torch.ops.tosa._rescale, sample_input)
+
+ with TosaLoweringContext(
+ TosaSpecification.create_from_string("TOSA-1.0+INT")
+ ), FakeTensorMode() as mode:
+ for sample_input in sample_inputs:
+ exir_ops.backend.tosa.RESCALE.default(
+ *tuple(
+ [
+ mode.from_tensor(i) if isinstance(i, torch.Tensor) else i
+ for i in sample_input
+ ]
+ )
+ )
def test_nonzero_zp_for_int32():
@@ -67,9 +83,22 @@ def test_nonzero_zp_for_int32():
1, # Should be 0, expect error
),
]
- for sample_input in sample_inputs:
- with pytest.raises(Exception, match="opcheck"):
- torch.library.opcheck(torch.ops.tosa._rescale, sample_input)
+
+ with TosaLoweringContext(
+ TosaSpecification.create_from_string("TOSA-1.0+INT")
+ ), FakeTensorMode() as mode:
+ for sample_input in sample_inputs:
+ with pytest.raises(
+ ValueError, match="TOSA requires (output|input)_zp to be zero"
+ ):
+ exir_ops.backend.tosa.RESCALE.default(
+ *tuple(
+ [
+ mode.from_tensor(i) if isinstance(i, torch.Tensor) else i
+ for i in sample_input
+ ]
+ )
+ )
def test_zp_outside_range():
@@ -90,9 +119,21 @@ def test_zp_outside_range():
-129, # Should be >-129m expect error
),
]
- for sample_input in sample_inputs:
- with pytest.raises(Exception, match="opcheck"):
- torch.library.opcheck(torch.ops.tosa._rescale, sample_input)
+ with TosaLoweringContext(
+ TosaSpecification.create_from_string("TOSA-1.0+INT")
+ ), FakeTensorMode() as mode:
+ for sample_input in sample_inputs:
+ with pytest.raises(
+ Exception, match="(in_zp|out_zp)=-?[0-9]* outside valid range"
+ ):
+ exir_ops.backend.tosa.RESCALE.default(
+ *tuple(
+ [
+ mode.from_tensor(i) if isinstance(i, torch.Tensor) else i
+ for i in sample_input
+ ]
+ )
+ )
class RescaleNetwork(torch.nn.Module):
@@ -120,7 +161,7 @@ def test_quantized_rescale_tosa_bi(test_data: tuple[torch.Tensor, torch.Tensor])
"""Tests a model with many ops that requires rescales. As more ops are quantized to int32 and
need the InsertRescalesPass, make sure that they play nicely together."""
module = RescaleNetwork()
- pipeline = TosaPipelineBI(
+ pipeline = TosaPipelineINT(
module=module,
test_data=test_data,
aten_op=[],
@@ -137,7 +178,7 @@ def test_quantized_rescale_u55(test_data: tuple[torch.Tensor, torch.Tensor]):
"""Tests a model with many ops that requires rescales. As more ops are quantized to int32 and
need the InsertRescalesPass, make sure that they play nicely together."""
module = RescaleNetwork()
- pipeline = EthosU55PipelineBI(
+ pipeline = EthosU55PipelineINT(
module=module,
test_data=test_data,
aten_ops=[],
@@ -153,7 +194,7 @@ def test_quantized_rescale_u85(test_data: tuple[torch.Tensor, torch.Tensor]):
"""Tests a model with many ops that requires rescales. As more ops are quantized to int32 and
need the InsertRescalesPass, make sure that they play nicely together."""
module = RescaleNetwork()
- pipeline = EthosU85PipelineBI(
+ pipeline = EthosU85PipelineINT(
module=module,
test_data=test_data,
aten_ops=[],
diff --git a/backends/arm/test/passes/test_unsqueeze_before_repeat_pass.py b/backends/arm/test/passes/test_unsqueeze_before_repeat_pass.py
index a12ac38b866..fc405e21f2a 100644
--- a/backends/arm/test/passes/test_unsqueeze_before_repeat_pass.py
+++ b/backends/arm/test/passes/test_unsqueeze_before_repeat_pass.py
@@ -38,7 +38,7 @@ def forward(self, x: torch.Tensor):
@common.parametrize("test_data", Repeat.test_data)
-def test_unsqueeze_before_repeat_tosa_MI(test_data: input_t):
+def test_unsqueeze_before_repeat_tosa_FP(test_data: input_t):
"""
When rank(input) != number of repeated dimensions (=4 in Repeat module),
insert view.
diff --git a/backends/arm/test/quantizer/test_generic_annotater.py b/backends/arm/test/quantizer/test_generic_annotater.py
index 4a4a333084c..4eaf1c205cc 100644
--- a/backends/arm/test/quantizer/test_generic_annotater.py
+++ b/backends/arm/test/quantizer/test_generic_annotater.py
@@ -8,7 +8,7 @@
import torch
from executorch.backends.arm.quantizer import is_annotated
-from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineBI
+from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineINT
from executorch.backends.test.harness.stages import StageType
from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
@@ -32,7 +32,7 @@ def example_inputs(self):
def check_annotation(model):
- pipeline = TosaPipelineBI[input_t1](model, model.example_inputs(), [], [])
+ pipeline = TosaPipelineINT[input_t1](model, model.example_inputs(), [], [])
pipeline.pop_stage("check_count.exir")
pipeline.pop_stage("run_method_and_compare_outputs")
pipeline.run()
diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py
index 34959e1ed6d..4335e96c730 100644
--- a/backends/arm/test/runner_utils.py
+++ b/backends/arm/test/runner_utils.py
@@ -18,10 +18,10 @@
import numpy as np
import torch
-from executorch.backends.arm.arm_backend import get_tosa_spec, is_tosa
+from executorch.backends.arm.arm_backend import is_tosa, is_vgf
from executorch.backends.arm.test.conftest import is_option_enabled
from executorch.backends.arm.tosa_specification import (
- Tosa_0_80,
+ get_tosa_spec,
Tosa_1_00,
TosaSpecification,
)
@@ -57,6 +57,8 @@
torch.complex128: np.complex128,
}
+VALID_TARGET = {"corstone-300", "corstone-320", "vkml_emulation_layer"}
+
class QuantizationParams:
__slots__ = ["node_name", "zp", "scale", "qmin", "qmax", "dtype"]
@@ -128,28 +130,8 @@ def get_input_quantization_params(
return quant_params
-def get_output_nodes(program: ExportedProgram) -> list[Node]:
- """
- Get output node to this model.
-
- Args:
- program (ExportedProgram): The program to get the output nodes from.
- Returns:
- The nodes that are the outputs of the 'program'.
- """
- output_nodes = []
- for node in program.graph.nodes:
- if node.op == "output":
- for output in node.args[0]:
- output_nodes.append(output)
- if len(output_nodes) == 0:
- raise RuntimeError("No output nodes found.")
- else:
- return output_nodes
-
-
def get_output_quantization_params(
- output_nodes: list[Node],
+ output_node: Node,
) -> dict[Node, QuantizationParams | None]:
"""
Get output QuantizationParams from a program.
@@ -162,7 +144,7 @@ def get_output_quantization_params(
RuntimeError if no output quantization parameters are found.
"""
quant_params = {}
- for node in output_nodes:
+ for node in output_node.args[0]:
if node.target == torch.ops.quantized_decomposed.dequantize_per_tensor.default:
quant_params[node] = QuantizationParams(
node_name=node.args[0].name,
@@ -218,6 +200,69 @@ def __torch_function__(self, func, types, args=..., kwargs=None):
return func(*args, **kwargs)
+def run_target(
+ executorch_program_manager: ExecutorchProgramManager,
+ inputs: Tuple[torch.Tensor],
+ intermediate_path: str | Path,
+ target_board: Literal["corestone-300", "corestone-320", "vkml_emulation_layer"],
+ elf_path: str | Path,
+ timeout: int = 120, # s
+):
+ if target_board not in VALID_TARGET:
+ raise ValueError(f"Unsupported target: {target_board}")
+
+ if target_board in ("corstone-300", "corstone-320"):
+ return run_corstone(
+ executorch_program_manager,
+ inputs,
+ intermediate_path,
+ target_board,
+ elf_path,
+ timeout,
+ )
+ elif target_board == "vkml_emulation_layer":
+ return run_vkml_emulation_layer(
+ executorch_program_manager,
+ intermediate_path,
+ elf_path,
+ )
+
+
+def run_vkml_emulation_layer(
+ executorch_program_manager: ExecutorchProgramManager,
+ intermediate_path: str | Path,
+ elf_path: str | Path,
+):
+ """Executes an inference of the exported_program on ML Emulation Layer for Vulkan
+ Args:
+ `executorch_program_manager`: The executorch program to run.
+ `intermediate_path`: Directory to save the .pte and capture outputs.
+ `elf_path`: Path to the Vulkan-capable executor_runner binary.
+ """
+
+ intermediate_path = Path(intermediate_path)
+ intermediate_path.mkdir(exist_ok=True)
+ elf_path = Path(elf_path)
+ if not elf_path.exists():
+ raise FileNotFoundError(f"Did not find elf file {elf_path}")
+
+ # Save pte to file
+ pte_path = os.path.join(intermediate_path, "program.pte")
+ with open(pte_path, "wb") as f:
+ f.write(executorch_program_manager.buffer)
+
+ cmd_line = [elf_path, "-model_path", pte_path]
+ result = _run_cmd(cmd_line)
+
+ result_stdout = result.stdout.decode() # noqa: F841
+ # TODO: MLETORCH-1234: Support VGF e2e tests in VgfPipeline
+ # TODO: Add regex to check for error or fault messages in stdout from Emulation Layer
+ # TODO: Retrieve and return the output tensors once VGF runtime is able to dump them.
+ raise NotImplementedError(
+ "Output parsing from VKML Emulation Layer is not yet implemented. "
+ )
+
+
def run_corstone(
executorch_program_manager: ExecutorchProgramManager,
inputs: Tuple[torch.Tensor],
@@ -229,7 +274,7 @@ def run_corstone(
"""Executes an inference of the exported_program on FVP.
Returns a list of tensors with the output.
Args:
- `executorch_program_manager`: the executorch program to run.
+ `executorch_program_manager`: The executorch program to run.
The output of a EdgeProgramManager.to_executorch() call.
`inputs`: A list of tensors with the inputs of the inference.
`dump_path`: A directory where the .pte and inputs are saved to file.
@@ -346,9 +391,9 @@ def run_corstone(
f"Corstone simulation failed:\ncmd: {' '.join(command_args)}\nlog: \n {result_stdout}\n{result.stderr.decode()}"
)
- output_nodes = get_output_nodes(exported_program)
output_np = []
- for i, node in enumerate(output_nodes):
+ output_node = exported_program.graph_module.graph.output_node()
+ for i, node in enumerate(output_node.args[0]):
output_shape = node.meta["val"].shape
output_dtype = node.meta["val"].dtype
tosa_ref_output = np.fromfile(
@@ -467,7 +512,7 @@ def dbg_tosa_fb_to_json(tosa_fb: bytes) -> Dict:
major = version._Major()
minor = version._Minor()
patch = version._Patch()
- if not ((major == 1 and minor == 0) or (major == 0 and minor == 80)):
+ if not ((major == 1 and minor == 0)):
raise RuntimeError(
f"Unsupported version in TOSA flatbuffer: version={major}.{minor}.{patch}"
)
@@ -558,18 +603,52 @@ def model_converter_installed() -> bool:
return True
-def get_elf_path(target_board):
- elf_path = os.path.join(
- "arm_test",
- f"arm_semihosting_executor_runner_{target_board}",
- "arm_executor_runner",
- )
+def vkml_emulation_layer_installed() -> bool:
+ # Check VK_INSTANCE_LAYERS
+ vk_instance_layers = os.environ.get("VK_INSTANCE_LAYERS", "")
+ required_layers = {
+ "VK_LAYER_ML_Graph_Emulation",
+ "VK_LAYER_ML_Tensor_Emulation",
+ }
+ existing_layers = set(vk_instance_layers.split(":"))
+ layers_exists = required_layers.issubset(existing_layers)
+
+ # Check LD_LIBRARY_PATH for "emulation-layer/deploy"
+ ld_library_path = os.environ.get("LD_LIBRARY_PATH", "")
+ deploy_exists = False
+ for path in ld_library_path.split(os.path.pathsep):
+ if "emulation-layer/deploy" in path and os.path.isdir(path):
+ deploy_exists = True
+
+ return layers_exists and deploy_exists
+
+
+def assert_elf_path_exists(elf_path):
if not os.path.exists(elf_path):
raise FileNotFoundError(
- f"Did not find build arm_executor_runner in path {elf_path}, run setup_testing.sh?"
+ f"Did not find build arm_executor_runner or executor_runner in path {elf_path}, run setup_testing.sh?"
)
- else:
- return elf_path
+
+
+def get_elf_path(target_board):
+ if target_board not in VALID_TARGET:
+ raise ValueError(f"Unsupported target: {target_board}")
+
+ if target_board in ("corstone-300", "corstone-320"):
+ elf_path = os.path.join(
+ "arm_test",
+ f"arm_semihosting_executor_runner_{target_board}",
+ "arm_executor_runner",
+ )
+ assert_elf_path_exists(elf_path)
+ elif target_board == "vkml_emulation_layer":
+ elf_path = os.path.join(
+ "cmake-out",
+ "executor_runner",
+ )
+ assert_elf_path_exists(elf_path)
+
+ return elf_path
def arm_executor_runner_exists(target_board):
@@ -590,21 +669,7 @@ def run_tosa_graph(
inputs_np = [input.numpy() for input in inputs]
transpose_data_format(inputs_np, to="NHWC")
- if isinstance(tosa_version, Tosa_0_80):
- import tosa_tools.v0_80.tosa_reference_model as reference_model
-
- # tosa_profile: 0 = Base Inference, 1 = Main Inference, 2 = Main Training.
- tosa_profile = 1 if tosa_version.support_float() else 0
- debug_mode = "ALL" if logger.level <= logging.DEBUG else None
- outputs_np, status = reference_model.run(
- graph,
- inputs_np,
- verbosity=_tosa_refmodel_loglevel(logger.level),
- tosa_profile=tosa_profile,
- initialize_variable_tensor_from_numpy=True,
- debug_mode=debug_mode,
- )
- elif isinstance(tosa_version, Tosa_1_00):
+ if isinstance(tosa_version, Tosa_1_00):
import tosa_reference_model as reference_model
debug_mode = "ALL" if logger.level <= logging.DEBUG else None
@@ -643,6 +708,8 @@ def transpose_data_format(data: list[np.ndarray], to: Literal["NHWC", "NCHW"]):
def get_target_board(compile_spec: list[CompileSpec]) -> str | None:
+ if is_vgf(compile_spec):
+ return "vkml_emulation_layer"
for spec in compile_spec:
if spec.key == "compile_flags":
flags = spec.value.decode()
diff --git a/backends/arm/test/setup_testing.sh b/backends/arm/test/setup_testing.sh
index fd47a6bb464..449075f9611 100755
--- a/backends/arm/test/setup_testing.sh
+++ b/backends/arm/test/setup_testing.sh
@@ -7,52 +7,10 @@
set -eu
-script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
-et_root_dir=$(cd ${script_dir}/../../.. && pwd)
-ethos_u_root_dir=${et_root_dir}/examples/arm/ethos-u-scratch/ethos-u
-
-toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
-et_build_dir=${et_root_dir}/arm_test/cmake-out
+script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
+et_root_dir=$(realpath "${script_dir}/../../..")
+build_executor_runner=${et_root_dir}/backends/arm/scripts/build_executor_runner.sh
build_root_test_dir=${et_root_dir}/arm_test/arm_semihosting_executor_runner
-# Build Arm Baremetal executor_runner in semihosting mode.
-# Put in backends/arm/test/res to be used by unit tests.
-function build_semihosting_executorch_runner() {
- target_board=$1
- system_config=$2
- build_test_dir=${build_root_test_dir}_${target_board}
- echo "[${FUNCNAME[0]}] Configuring ${target_board} with system config ${system_config}"
- if [[ ${target_board} == "corstone-300" ]]; then
- local target_cpu=cortex-m55
- elif [[ ${target_board} == "corstone-320" ]]; then
- local target_cpu=cortex-m85
- else
- echo "[${FUNCNAME[0]}] ERROR: Invalid target_board specified!"
- exit 1
- fi
- cd ${et_root_dir}/examples/arm/executor_runner
- pwd
- mkdir -p ${build_test_dir}
- cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \
- -DCMAKE_BUILD_TYPE=RelWithDebInfo \
- -DTARGET_CPU=${target_cpu} \
- -DSEMIHOSTING=ON \
- -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=${build_test_dir} \
- -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \
- -DET_DIR_PATH:PATH=${et_root_dir} \
- -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \
- -DPYTHON_EXECUTABLE=$(which python3) \
- -DSYSTEM_CONFIG=${system_config} \
- -B ${build_test_dir}
- echo "[${FUNCNAME[0]}] Configured CMAKE"
-
- n=$(nproc)
- cmake --build ${build_test_dir} -j"$((n - 5))" -- arm_executor_runner
- echo "[${FUNCNAME[0]}] Generated baremetal elf file: with semihosting enabled"
- find ${build_test_dir} -name "arm_executor_runner"
-}
-
-# Use most optimal system_configs for testing
-build_semihosting_executorch_runner corstone-300 Ethos_U55_High_End_Embedded
-
-build_semihosting_executorch_runner corstone-320 Ethos_U85_SYS_DRAM_Mid
+${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --output="${build_root_test_dir}_corstone-300"
+${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --output="${build_root_test_dir}_corstone-320"
\ No newline at end of file
diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh
index 609a8430522..14444eca02d 100755
--- a/backends/arm/test/test_arm_baremetal.sh
+++ b/backends/arm/test/test_arm_baremetal.sh
@@ -17,7 +17,6 @@ _setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly ins
TEST_SUITE=$1
-TOSA_VERSION="${2:-TOSA-1.0+INT}"
# Source the tools
# This should be prepared by the setup.sh
@@ -101,7 +100,7 @@ test_pytest_models() { # Test ops and other things
source backends/arm/scripts/install_models_for_test.sh
# Run arm baremetal pytest tests without FVP
- pytest --verbose --color=yes --durations=0 backends/arm/test/models
+ pytest --verbose --color=yes --numprocesses=auto --durations=0 backends/arm/test/models
echo "${TEST_SUITE_NAME}: PASS"
}
@@ -117,7 +116,6 @@ test_pytest_ops_ethosu_fvp() { # Same as test_pytest but also sometime verify us
# Prepare Corstone-3x0 FVP for pytest
backends/arm/scripts/build_executorch.sh
- backends/arm/scripts/build_portable_kernels.sh
# Build semihosting version of the runner used by pytest testing. This builds:
# arm_test/arm_semihosting_executor_runner_corstone-300
# arm_test/arm_semihosting_executor_runner_corstone-320
@@ -133,7 +131,6 @@ test_pytest_models_ethosu_fvp() { # Same as test_pytest but also sometime verify
# Prepare Corstone-3x0 FVP for pytest
backends/arm/scripts/build_executorch.sh
- backends/arm/scripts/build_portable_kernels.sh
# Build semihosting version of the runner used by pytest testing. This builds:
# arm_test/arm_semihosting_executor_runner_corstone-300
# arm_test/arm_semihosting_executor_runner_corstone-320
@@ -143,7 +140,7 @@ test_pytest_models_ethosu_fvp() { # Same as test_pytest but also sometime verify
source backends/arm/scripts/install_models_for_test.sh
# Run arm baremetal pytest tests with FVP
- pytest --verbose --color=yes --durations=0 backends/arm/test/models
+ pytest --verbose --color=yes --numprocesses=auto --durations=0 backends/arm/test/models
echo "${TEST_SUITE_NAME}: PASS"
}
@@ -159,17 +156,23 @@ test_run_ethosu_fvp() { # End to End model tests using run.sh
# TOSA quantized
echo "${TEST_SUITE_NAME}: Test ethos-u target TOSA"
- examples/arm/run.sh --et_build_root=arm_test/test_run --target=${TOSA_VERSION} --model_name=add
- examples/arm/run.sh --et_build_root=arm_test/test_run --target=${TOSA_VERSION} --model_name=mul
+ examples/arm/run.sh --et_build_root=arm_test/test_run --target=TOSA-1.0+INT --model_name=add
+ examples/arm/run.sh --et_build_root=arm_test/test_run --target=TOSA-1.0+INT --model_name=mul
# Ethos-U55
echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U55"
examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=add
+ examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=add --bundleio
+ examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=add --bundleio --etdump
+ examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=add --etdump
examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=mul
# Ethos-U85
echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85"
examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=add
+ examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=add --bundleio
+ examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=add --bundleio --etdump
+ examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=add --etdump
examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=mul
# Cortex-M op tests
@@ -189,17 +192,17 @@ test_models_tosa() { # End to End model tests using model_test.py
# TOSA quantized
echo "${TEST_SUITE_NAME}: Test ethos-u target TOSA"
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=mv2
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=mv3
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=lstm
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=edsr
- # python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=emformer_transcribe # Takes long time to run
- # python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=emformer_join # Takes long time to run
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=w2l
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=ic3
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=ic4
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=resnet18
- python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=${TOSA_VERSION} --model=resnet50
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=mv2
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=mv3
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=lstm
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=edsr
+ # python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=emformer_transcribe # Takes long time to run
+ # python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=emformer_join # Takes long time to run
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=w2l
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=ic3
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=ic4
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=resnet18
+ python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA-1.0+INT --model=resnet50
echo "${TEST_SUITE_NAME}: PASS"
}
@@ -253,6 +256,31 @@ test_full_ethosu_fvp() { # All End to End model tests
echo "${TEST_SUITE_NAME}: PASS"
}
+test_smaller_stories_llama() {
+ echo "${TEST_SUITE_NAME}: Test smaller_stories_llama"
+
+ backends/arm/scripts/build_executorch.sh
+
+ mkdir -p stories110M
+ pushd stories110M
+ wget -N https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt
+ echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
+ popd
+
+ # Get path to source directory
+ pytest \
+ -c /dev/null \
+ --verbose \
+ --color=yes \
+ --numprocesses=auto \
+ --log-level=DEBUG \
+ --junit-xml=stories110M/test-reports/unittest.xml \
+ -s \
+ backends/arm/test/models/test_llama.py \
+ --llama_inputs stories110M/stories110M.pt stories110M/params.json stories110m
+
+ echo "${TEST_SUITE_NAME}: PASS"
+ }
${TEST_SUITE}
diff --git a/backends/arm/test/test_model.py b/backends/arm/test/test_model.py
index 5e53da4a0ef..f0dd9f3ff9c 100755
--- a/backends/arm/test/test_model.py
+++ b/backends/arm/test/test_model.py
@@ -110,15 +110,6 @@ def build_libs(et_build_root: str, script_path: str):
"--etdump",
]
)
- run_external_cmd(
- [
- "bash",
- os.path.join(script_path, "build_portable_kernels.sh"),
- f"--et_build_root={et_build_root}",
- "--build_type=Release",
- "--portable_kernels=aten::_softmax.out",
- ]
- )
def build_pte(
@@ -166,6 +157,7 @@ def build_ethosu_runtime(
extra_flags: str,
elf_build_path: str,
):
+ elf_build_path = os.path.join(elf_build_path, "cmake-out")
run_external_cmd(
[
"bash",
@@ -183,7 +175,7 @@ def build_ethosu_runtime(
]
)
- elf_file = os.path.join(elf_build_path, "cmake-out", "arm_executor_runner")
+ elf_file = os.path.join(elf_build_path, "arm_executor_runner")
return elf_file
diff --git a/backends/arm/test/tester/analyze_output_utils.py b/backends/arm/test/tester/analyze_output_utils.py
index 96060b7b563..bd8f7703fa1 100644
--- a/backends/arm/test/tester/analyze_output_utils.py
+++ b/backends/arm/test/tester/analyze_output_utils.py
@@ -10,7 +10,6 @@
from executorch.backends.arm.arm_backend import get_intermediate_path
from executorch.backends.arm.test.runner_utils import (
get_input_quantization_params,
- get_output_nodes,
get_output_quantization_params,
)
@@ -254,9 +253,9 @@ def dump_error_output(
export_stage = tester.stages.get(StageType.EXPORT, None)
quantize_stage = tester.stages.get(StageType.QUANTIZE, None)
if export_stage is not None and quantize_stage is not None:
- output_nodes = get_output_nodes(export_stage.artifact)
+ output_node = export_stage.artifact.graph_module.output_node()
qp_input = get_input_quantization_params(export_stage.artifact)
- qp_output = get_output_quantization_params(output_nodes)
+ qp_output = get_output_quantization_params(output_node)
logger.error(f"Input QuantArgs: {qp_input}")
logger.error(f"Output QuantArgs: {qp_output}")
diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py
index 60081ac8145..174c5a9849b 100644
--- a/backends/arm/test/tester/arm_tester.py
+++ b/backends/arm/test/tester/arm_tester.py
@@ -25,20 +25,20 @@
import executorch.backends.xnnpack.test.tester.tester as tester
+import serializer.tosa_serializer as ts # type: ignore[import-untyped]
+
import torch.fx
import torch.utils._pytree as pytree
-import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore[import-untyped]
from executorch.backends.arm._passes.arm_pass_manager import ArmPassManager
from executorch.backends.arm.arm_backend import (
get_intermediate_path,
- get_tosa_spec,
is_ethosu,
is_tosa,
is_vgf,
)
-from executorch.backends.arm.ethosu_partitioner import EthosUPartitioner
+from executorch.backends.arm.ethosu import EthosUPartitioner
from executorch.backends.arm.quantizer import (
EthosUQuantizer,
get_symmetric_quantization_config,
@@ -48,10 +48,9 @@
from executorch.backends.arm.test.runner_utils import (
dbg_tosa_fb_to_json,
get_elf_path,
- get_output_nodes,
get_output_quantization_params,
get_target_board,
- run_corstone,
+ run_target,
TosaReferenceModelDispatch,
)
@@ -61,7 +60,7 @@
)
from executorch.backends.arm.tosa_mapping import extract_tensor_meta
from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import get_tosa_spec, TosaSpecification
from executorch.backends.arm.vgf_partitioner import VgfPartitioner
@@ -171,7 +170,9 @@ def dump_artifact(self, path_to_dump: Optional[str]):
super().dump_artifact(path_to_dump)
_dump_lowered_modules_artifact(path_to_dump, self.artifact, self.graph_module)
- def run(self, artifact: ExportedProgram, inputs=None) -> None:
+ def run(
+ self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False
+ ) -> None:
artifact_to_run = copy.deepcopy(artifact)
self.edge_dialect_program = to_edge_transform_and_lower(
artifact_to_run,
@@ -179,6 +180,7 @@ def run(self, artifact: ExportedProgram, inputs=None) -> None:
compile_config=self.edge_compile_conf,
partitioner=self.partitioners,
constant_methods=self.constant_methods,
+ generate_etrecord=generate_etrecord,
)
@@ -209,7 +211,7 @@ def run_artifact(self, inputs):
f"Did not find build arm_executor_runner in path {elf_path}, run setup_testing.sh?"
)
- return run_corstone(
+ return run_target(
self.executorch_program_manager,
inputs_flattened,
intermediate_path,
@@ -481,9 +483,8 @@ def run_method_and_compare_outputs(
reference_stage = self.stages[StageType.INITIAL_MODEL]
exported_program = self.stages[StageType.EXPORT].artifact
- output_nodes = get_output_nodes(exported_program)
-
- output_qparams = get_output_quantization_params(output_nodes)
+ output_node = exported_program.graph_module.graph.output_node()
+ output_qparams = get_output_quantization_params(output_node)
quantization_scales = []
for node in output_qparams:
diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py
index 678de81d38d..5c648d5ff2c 100644
--- a/backends/arm/test/tester/test_pipeline.py
+++ b/backends/arm/test/tester/test_pipeline.py
@@ -4,6 +4,7 @@
# LICENSE file in the root directory of this source tree.
import logging
+import warnings as _warnings
from typing import (
Any,
@@ -29,7 +30,10 @@
)
from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.arm_tester import ArmTester, RunPasses
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import (
+ TosaLoweringContext,
+ TosaSpecification,
+)
from executorch.backends.xnnpack.test.tester.tester import Quantize
from executorch.exir.backend.compile_spec_schema import CompileSpec
@@ -226,6 +230,12 @@ def find_pos(self, stage_id: str):
raise Exception(f"Stage id {stage_id} not found in pipeline")
+ def has_stage(self, stage_id: str):
+ try:
+ return self.find_pos(stage_id) >= 0
+ except:
+ return False
+
def add_stage_after(self, stage_id: str, func: Callable, *args, **kwargs):
"""Adds a stage after the given stage id."""
pos = self.find_pos(stage_id) + 1
@@ -271,9 +281,36 @@ def run(self):
raise e
-class TosaPipelineBI(BasePipelineMaker, Generic[T]):
+class TOSAPipelineMaker(BasePipelineMaker, Generic[T]):
+
+ @staticmethod
+ def is_tosa_ref_model_available():
+ """Checks if the TOSA reference model is available."""
+ # Not all deployments of ET have the TOSA reference model available.
+ # Make sure we don't try to use it if it's not available.
+ try:
+ import tosa_reference_model
+
+ # Check if the module has content
+ return bool(dir(tosa_reference_model))
+ except ImportError:
+ return False
+
+ def run(self):
+ if (
+ self.has_stage("run_method_and_compare_outputs")
+ and not self.is_tosa_ref_model_available()
+ ):
+ _warnings.warn(
+ "Warning: Skipping run_method_and_compare_outputs stage. TOSA reference model is not available."
+ )
+ self.pop_stage("run_method_and_compare_outputs")
+ super().run()
+
+
+class TosaPipelineINT(TOSAPipelineMaker, Generic[T]):
"""
- Lowers a graph to BI TOSA spec (with quantization) and tests it with the TOSA reference model.
+ Lowers a graph to INT TOSA spec (with quantization) and tests it with the TOSA reference model.
Attributes:
module: The module which the pipeline is applied to.
@@ -298,7 +335,6 @@ def __init__(
aten_op: str | List[str],
exir_op: Optional[str | List[str]] = None,
run_on_tosa_ref_model: bool = True,
- tosa_version: str = "TOSA-0.80+BI",
symmetric_io_quantization: bool = False,
per_channel_quantization: bool = True,
use_to_edge_transform_and_lower: bool = True,
@@ -307,10 +343,14 @@ def __init__(
rtol: float = 1e-03,
qtol: int = 1,
dynamic_shapes: Optional[Tuple[Any]] = None,
+ tosa_extensions: Optional[List[str]] = None,
):
+ if tosa_extensions is None:
+ tosa_extensions = []
tosa_profiles = {
- "0.80": TosaSpecification.create_from_string("TOSA-0.80+BI"),
- "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT"),
+ "1.0": TosaSpecification.create_from_string(
+ "TOSA-1.0+INT" + "".join([f"+{ext}" for ext in tosa_extensions])
+ ),
}
tosa_version = conftest.get_option("tosa_version")
@@ -372,9 +412,9 @@ def __init__(
)
-class TosaPipelineMI(BasePipelineMaker, Generic[T]):
+class TosaPipelineFP(TOSAPipelineMaker, Generic[T]):
"""
- Lowers a graph to MI TOSA spec and tests it with the TOSA reference model.
+ Lowers a graph to FP TOSA spec and tests it with the TOSA reference model.
Attributes:
module: The module which the pipeline is applied to.
@@ -399,7 +439,6 @@ def __init__(
aten_op: str | List[str],
exir_op: Optional[str | List[str]] = None,
run_on_tosa_ref_model: bool = True,
- tosa_version: str = "TOSA-0.80+MI",
use_to_edge_transform_and_lower: bool = True,
custom_path: str = None,
atol: float = 1e-03,
@@ -409,10 +448,14 @@ def __init__(
transform_passes: Optional[
Union[Sequence[PassType], Dict[str, Sequence[PassType]]]
] = None,
+ tosa_extensions: Optional[List[str]] = None,
):
+ if tosa_extensions is None:
+ tosa_extensions = []
tosa_profiles = {
- "0.80": TosaSpecification.create_from_string("TOSA-0.80+MI"),
- "1.0": TosaSpecification.create_from_string("TOSA-1.0+FP"),
+ "1.0": TosaSpecification.create_from_string(
+ "TOSA-1.0+FP" + "".join([f"+{ext}" for ext in tosa_extensions])
+ ),
}
tosa_version = conftest.get_option("tosa_version")
@@ -449,9 +492,9 @@ def __init__(
)
-class EthosU55PipelineBI(BasePipelineMaker, Generic[T]):
+class EthosU55PipelineINT(BasePipelineMaker, Generic[T]):
"""
- Lowers a graph to u55 BI TOSA spec and tests it on the Corstone300 FVP, if run_on_fvp is true.
+ Lowers a graph to u55 INT TOSA spec and tests it on the Corstone300 FVP, if run_on_fvp is true.
Attributes:
module: The module which the pipeline is applied to.
@@ -536,9 +579,9 @@ def __init__(
)
-class EthosU85PipelineBI(BasePipelineMaker, Generic[T]):
+class EthosU85PipelineINT(BasePipelineMaker, Generic[T]):
"""
- Lowers a graph to u85 BI TOSA spec and tests it on the Corstone320 FVP, if run_on_fvp is true.
+ Lowers a graph to u85 INT TOSA spec and tests it on the Corstone320 FVP, if run_on_fvp is true.
Attributes:
module: The module which the pipeline is applied to.
@@ -623,7 +666,7 @@ def __init__(
)
-class PassPipeline(BasePipelineMaker, Generic[T]):
+class PassPipeline(TOSAPipelineMaker, Generic[T]):
"""
Runs single passes directly on an edge_program and checks operators before/after.
@@ -659,19 +702,22 @@ def __init__(
pass_functions: Optional[List[Callable]] = None,
passes_with_exported_program: Optional[List[Type[ExportPass]]] = None,
custom_path: str = None,
+ tosa_extensions: Optional[List[str]] = None,
):
+ if tosa_extensions is None:
+ tosa_extensions = []
tosa_profiles = {
- "0.80": TosaSpecification.create_from_string(
- "TOSA-0.80+" + ("BI" if quantize else "MI")
- ),
"1.0": TosaSpecification.create_from_string(
- "TOSA-1.0+" + ("INT" if quantize else "FP")
+ "TOSA-1.0+"
+ + ("INT" if quantize else "FP")
+ + "".join([f"+{ext}" for ext in tosa_extensions]),
),
}
tosa_version = conftest.get_option("tosa_version")
+ self.tosa_spec = tosa_profiles[tosa_version]
compile_spec = common.get_tosa_compile_spec(
- tosa_profiles[tosa_version], custom_path=custom_path
+ self.tosa_spec, custom_path=custom_path
)
super().__init__(
module,
@@ -710,8 +756,12 @@ def __init__(
self.add_stage(self.tester.check_not, ops_not_after_pass, suffix="after")
self.add_stage(self.tester.run_method_and_compare_outputs)
+ def run(self):
+ with TosaLoweringContext(self.tosa_spec):
+ super().run()
+
-class TransformAnnotationPassPipeline(BasePipelineMaker, Generic[T]):
+class TransformAnnotationPassPipeline(TOSAPipelineMaker, Generic[T]):
"""
Runs transform_for_annotation_pipeline passes directly on an exported program and checks output.
@@ -728,10 +778,14 @@ def __init__(
module: torch.nn.Module,
test_data: T,
custom_path: str = None,
+ tosa_extensions: Optional[List[str]] = None,
):
+ if tosa_extensions is None:
+ tosa_extensions = []
tosa_profiles = {
- "0.80": TosaSpecification.create_from_string("TOSA-0.80+BI"),
- "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT"),
+ "1.0": TosaSpecification.create_from_string(
+ "TOSA-1.0+INT" + "".join([f"+{ext}" for ext in tosa_extensions]),
+ ),
}
tosa_version = conftest.get_option("tosa_version")
@@ -763,7 +817,7 @@ def __init__(
)
-class OpNotSupportedPipeline(BasePipelineMaker, Generic[T]):
+class OpNotSupportedPipeline(TOSAPipelineMaker, Generic[T]):
"""
Runs the partitioner on a module and checks that ops are not delegated to test
SupportedTOSAOperatorChecks.
@@ -787,19 +841,23 @@ def __init__(
custom_path: str = None,
quantize: Optional[bool] = False,
u55_subset: Optional[bool] = False,
+ tosa_extensions: Optional[List[str]] = None,
):
+ if tosa_extensions is None:
+ tosa_extensions = []
tosa_profiles = {
- "0.80": "TOSA-0.80+" + ("BI" if quantize else "MI"),
- "1.0": "TOSA-1.0+" + ("INT" if quantize else "FP"),
+ "1.0": TosaSpecification.create_from_string(
+ "TOSA-1.0+"
+ + ("INT" if quantize else "FP")
+ + ("+u55" if u55_subset and quantize else "")
+ + "".join([f"+{ext}" for ext in tosa_extensions]),
+ ),
}
- tosa_version = tosa_profiles[conftest.get_option("tosa_version")]
+ tosa_version = conftest.get_option("tosa_version")
- if u55_subset and quantize:
- tosa_version = f"{tosa_version}+u55"
+ tosa_spec = tosa_profiles[tosa_version]
- compile_spec = common.get_tosa_compile_spec(
- tosa_version, custom_path=custom_path
- )
+ compile_spec = common.get_tosa_compile_spec(tosa_spec, custom_path=custom_path)
super().__init__(
module,
test_data,
@@ -808,7 +866,7 @@ def __init__(
[],
)
- if "INT" in tosa_version or "BI" in tosa_version:
+ if tosa_spec.support_integer():
self.add_stage(self.tester.quantize, pos=0)
self.change_args("check_not.exir", [])
@@ -834,7 +892,9 @@ class VgfPipeline(BasePipelineMaker, Generic[T]):
exir_ops: Exir dialect ops expected to be found in the graph after to_edge.
if not using use_edge_to_transform_and_lower.
- run_on_vulkan_runtime: Not yet supported.
+ run_on_vulkan_runtime: Partially supported. However, comparison between reference and model
+ outputs is expected to fail, as the VGF runtime doesn't dump the output tensors in a usable
+ format at the moment.
vgf_compiler_flags: Optional compiler flags.
@@ -864,11 +924,16 @@ def __init__(
transform_passes: Optional[
Union[Sequence[PassType], Dict[str, Sequence[PassType]]]
] = None,
+ tosa_extensions: Optional[List[str]] = None,
):
- tosa_profile = TosaSpecification.create_from_string(tosa_version)
+ if tosa_extensions is None:
+ tosa_extensions = []
+ tosa_spec = TosaSpecification.create_from_string(
+ tosa_version + "".join([f"+{ext}" for ext in tosa_extensions])
+ )
compile_spec = common.get_vgf_compile_spec(
- tosa_profile, compiler_flags=vgf_compiler_flags, custom_path=custom_path
+ tosa_spec, compiler_flags=vgf_compiler_flags, custom_path=custom_path
)
super().__init__(
@@ -882,7 +947,7 @@ def __init__(
transform_passes=transform_passes,
)
- if "INT" in tosa_version:
+ if tosa_spec.support_integer():
quantizer = VgfQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
@@ -929,4 +994,11 @@ def __init__(
)
if run_on_vulkan_runtime:
- pass
+ self.add_stage(self.tester.serialize)
+ self.add_stage(
+ self.tester.run_method_and_compare_outputs,
+ atol=atol,
+ rtol=rtol,
+ qtol=qtol,
+ inputs=self.test_data,
+ )
diff --git a/backends/arm/third-party/reference_model/patches/v0.80/reference_model/0001-Move-tosa-tools-to-be-namespaced-into-tosa-tools.v0_.patch b/backends/arm/third-party/reference_model/patches/v0.80/reference_model/0001-Move-tosa-tools-to-be-namespaced-into-tosa-tools.v0_.patch
deleted file mode 100644
index 512c105bda2..00000000000
--- a/backends/arm/third-party/reference_model/patches/v0.80/reference_model/0001-Move-tosa-tools-to-be-namespaced-into-tosa-tools.v0_.patch
+++ /dev/null
@@ -1,154 +0,0 @@
-From 20c2059723d5c6952cecfb7fcde92601639ef825 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Per=20=C3=85strand?=
-Date: Wed, 5 Feb 2025 12:31:47 +0100
-Subject: [PATCH 1/2] Move tosa-tools to be namespaced into tosa-tools.v0_80
-
----
- CMakeLists.txt | 4 ++-
- pyproject.toml | 3 ++-
- setup.cfg | 70 +++++++++++++++++++++++++-------------------------
- setup.py | 3 ++-
- 4 files changed, 42 insertions(+), 38 deletions(-)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 68e8d8a..34becd0 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -1,4 +1,6 @@
--cmake_minimum_required (VERSION 3.4)
-+cmake_minimum_required (VERSION 3.19)
-+
-+cmake_policy(SET CMP0077 NEW)
-
- set(CMAKE_INSTALL_PREFIX ".")
- project(tosa_tools LANGUAGES CXX)
-diff --git a/pyproject.toml b/pyproject.toml
-index 7565f93..60448e7 100644
---- a/pyproject.toml
-+++ b/pyproject.toml
-@@ -6,7 +6,8 @@ requires = [
- "setuptools>=42",
- "wheel",
- "setuptools_scm[toml]>=6.0",
-- "cmake"
-+ "cmake",
-+ "ninja",
- ]
- build-backend = "setuptools.build_meta"
-
-diff --git a/setup.cfg b/setup.cfg
-index 82ec9b8..c1bd1a8 100644
---- a/setup.cfg
-+++ b/setup.cfg
-@@ -2,7 +2,7 @@
- # SPDX-License-Identifier: Apache-2.0
-
- [metadata]
--name = tosa-tools
-+name = tosa-tools-v0.80
- # version = done by setuptools_scm in pyproject.toml
- author = Arm Limited
- #author_email =
-@@ -25,44 +25,44 @@ install_requires =
- python_requires = >=3.6
- include_package_data = True
- packages =
-- runner
-- generator
-- checker
-- frameworks
-- tests
-- conformance
-- xunit
-- json2fbbin
-- json2numpy
-- schemavalidation
-- convert2conformance
-- tosa
-- serializer
-- tosa_reference_model
-+ tosa_tools.v0_80.verif.runner
-+ tosa_tools.v0_80.verif.generator
-+ tosa_tools.v0_80.verif.checker
-+ tosa_tools.v0_80.verif.frameworks
-+ tosa_tools.v0_80.verif.tests
-+ tosa_tools.v0_80.verif.conformance
-+ tosa_tools.v0_80.xunit
-+ tosa_tools.v0_80.json2fbbin
-+ tosa_tools.v0_80.json2numpy
-+ tosa_tools.v0_80.schemavalidation
-+ tosa_tools.v0_80.convert2conformance
-+ tosa_tools.v0_80.tosa
-+ tosa_tools.v0_80.serializer
-+ tosa_tools.v0_80.tosa_reference_model
- package_dir =
-- = verif
-- xunit = scripts/xunit
-- json2fbbin = scripts/json2fbbin
-- json2numpy = scripts/json2numpy
-- convert2conformance = scripts/convert2conformance
-- tosa = thirdparty/serialization_lib/python/tosa
-- serializer = thirdparty/serialization_lib/python/serializer
-- tosa_reference_model = py_package
-- schemavalidation = scripts/schemavalidation
-+ tosa_tools.v0_80.verif = verif
-+ tosa_tools.v0_80.xunit = scripts/xunit
-+ tosa_tools.v0_80.json2fbbin = scripts/json2fbbin
-+ tosa_tools.v0_80.json2numpy = scripts/json2numpy
-+ tosa_tools.v0_80.convert2conformance = scripts/convert2conformance
-+ tosa_tools.v0_80.tosa = thirdparty/serialization_lib/python/tosa
-+ tosa_tools.v0_80.serializer = thirdparty/serialization_lib/python/serializer
-+ tosa_tools.v0_80.tosa_reference_model = py_package
-+ tosa_tools.v0_80.schemavalidation = scripts/schemavalidation
-
- [options.entry_points]
- console_scripts =
-- tosa_verif_run_ref = runner.tosa_verif_run_tests:main
-- tosa_verif_run_tests = runner.tosa_verif_run_tests:main
-- tosa_verif_build_tests = generator.tosa_verif_build_tests:main
-- tosa_json2numpy = json2numpy.json2numpy:main
-- tosa_json2fbbin = json2fbbin.json2fbbin:main
-- tosa_verif_result_check = checker.tosa_result_checker:main
-- tosa_convert2conformance = convert2conformance.convert2conformance:main
-- tosa_verif_framework_generator = frameworks.tosa_verif_framework_generator:main
-- tosa_verif_framework_compiler_runner = frameworks.tosa_verif_framework_compiler_runner:main
-- tosa_verif_conformance_generator = conformance.tosa_verif_conformance_generator:main
-- tosa_schemavalidation = schemavalidation.schemavalidation:main
-+ tosa_verif_run_ref = tosa_tools.v0_80.verif.runner.tosa_verif_run_tests:main
-+ tosa_verif_run_tests = tosa_tools.v0_80.verif.runner.tosa_verif_run_tests:main
-+ tosa_verif_build_tests = tosa_tools.v0_80.verif.generator.tosa_verif_build_tests:main
-+ tosa_json2numpy = tosa_tools.v0_80.verif.json2numpy.json2numpy:main
-+ tosa_json2fbbin = tosa_tools.v0_80.verif.json2fbbin.json2fbbin:main
-+ tosa_verif_result_check = tosa_tools.v0_80.verif.checker.tosa_result_checker:main
-+ tosa_convert2conformance = tosa_tools.v0_80.verif.convert2conformance.convert2conformance:main
-+ tosa_verif_framework_generator = tosa_tools.v0_80.verif.frameworks.tosa_verif_framework_generator:main
-+ tosa_verif_framework_compiler_runner = tosa_tools.v0_80.verif.frameworks.tosa_verif_framework_compiler_runner:main
-+ tosa_verif_conformance_generator = tosa_tools.v0_80.verif.conformance.tosa_verif_conformance_generator:main
-+ tosa_schemavalidation = tosa_tools.v0_80.verif.schemavalidation.schemavalidation:main
-
- [options.package_data]
- schemavalidation=
-diff --git a/setup.py b/setup.py
-index 8c6b4cd..95896ad 100644
---- a/setup.py
-+++ b/setup.py
-@@ -20,7 +20,7 @@ class CMakeBuild(build_py):
- root_dir = Path(__file__).parent
- build_dir = root_dir / "build"
- build_dir.mkdir(exist_ok=True)
-- package_dir = root_dir / "py_package"
-+ package_dir = root_dir / "build/lib/tosa_tools/v0_80/tosa_reference_model/"
-
- cmake_cmd = [
- "cmake",
-@@ -90,6 +90,7 @@ class CMakeBuild(build_py):
- # Python will know which one to import
- copied_so = False
- so_dir = build_dir / "reference_model"
-+ package_dir.mkdir(parents=True, exist_ok=True)
- print(f"copying .so files from '{so_dir}' to '{package_dir}'")
- for so_file in so_dir.glob("tosa_reference_model.*.so"):
- shutil.copy(so_file, package_dir)
---
-2.39.5 (Apple Git-154)
-
diff --git a/backends/arm/third-party/reference_model/patches/v0.80/serialization_lib/0001-Make-TOSA-serializer-lib-to-be-self-contained.patch b/backends/arm/third-party/reference_model/patches/v0.80/serialization_lib/0001-Make-TOSA-serializer-lib-to-be-self-contained.patch
deleted file mode 100644
index cc9cbc4edad..00000000000
--- a/backends/arm/third-party/reference_model/patches/v0.80/serialization_lib/0001-Make-TOSA-serializer-lib-to-be-self-contained.patch
+++ /dev/null
@@ -1,283 +0,0 @@
-From b3c8c3f779a7e051826f317598fb831fa9cfe923 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Per=20=C3=85strand?=
-Date: Wed, 5 Feb 2025 12:30:09 +0100
-Subject: [PATCH] Make TOSA serializer lib to be self contained
-
----
- CMakeLists.txt | 4 ++
- python/serializer/tosa_serializer.py | 57 ++++++++++++++--------------
- 2 files changed, 32 insertions(+), 29 deletions(-)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index ac34b75..5e191aa 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -19,6 +19,8 @@
- cmake_minimum_required(VERSION 3.13.4)
- project(TosaSerialization)
-
-+cmake_policy(SET CMP0077 NEW)
-+
- set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to")
- set(CMAKE_CXX_STANDARD_REQUIRED YES)
-
-@@ -27,6 +29,8 @@ set(CMAKE_VERBOSE_MAKEFILE ON)
- option(BUILD_TESTS "Build test applications" ON)
- option(FLATBUFFERS_ROOT "Location where the flatbuffers 'include' and 'lib' folders to be found" Off)
-
-+message(STATUS "FLATBUFFERS_ROOT set to: ${FLATBUFFERS_ROOT}")
-+
- include_directories(${PROJECT_SOURCE_DIR}/third_party/half/include)
-
- include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
-diff --git a/python/serializer/tosa_serializer.py b/python/serializer/tosa_serializer.py
-index 7bc75f0..d191997 100644
---- a/python/serializer/tosa_serializer.py
-+++ b/python/serializer/tosa_serializer.py
-@@ -14,12 +14,11 @@
-
- import os
- import struct
--import serializer.tosa_serializer as ts
- import json
- import flatbuffers
- import numpy as np
- from enum import IntEnum, unique
--from tosa import (
-+from ..tosa import (
- TosaGraph,
- TosaRegion,
- TosaBasicBlock,
-@@ -27,8 +26,8 @@ from tosa import (
- TosaOperator,
- Version,
- )
--import tosa.DType as TosaDType
--import tosa.Op as TosaOp
-+from ..tosa import DType as TosaDType
-+from ..tosa import Op as TosaOp
-
- # Keep version number in sync with the version default value with schema/tosa.fbs
- TOSA_VERSION_MAJOR = 0
-@@ -159,7 +158,7 @@ class TosaSerializerAttribute(TosaSerializerUnion):
- output_zp,
- accum_dtype,
- ):
-- from tosa import PoolAttribute as a, Attribute
-+ from ..tosa import PoolAttribute as a, Attribute
-
- self.utype = Attribute.Attribute().PoolAttribute
-
-@@ -172,7 +171,7 @@ class TosaSerializerAttribute(TosaSerializerUnion):
- self.ints.append((a.AddAccumDtype, accum_dtype))
-
- def ConvAttribute(self, pad, stride, dilation, input_zp, weight_zp, local_bound):
-- from tosa import ConvAttribute as a, Attribute
-+ from ..tosa import ConvAttribute as a, Attribute
-
- self.utype = Attribute.Attribute().ConvAttribute
- self.optFcns = (a.Start, a.End)
-@@ -187,7 +186,7 @@ class TosaSerializerAttribute(TosaSerializerUnion):
- def TransposeConvAttribute(
- self, outpad, stride, output_shape, input_zp, weight_zp, local_bound
- ):
-- from tosa import TransposeConvAttribute as a, Attribute
-+ from ..tosa import TransposeConvAttribute as a, Attribute
-
- self.utype = Attribute.Attribute().TransposeConvAttribute
- self.optFcns = (a.Start, a.End)
-@@ -200,7 +199,7 @@ class TosaSerializerAttribute(TosaSerializerUnion):
- self.bools.append((a.AddLocalBound, local_bound))
-
- def PadAttribute(self, serializer_builder, padding, pad_const_int, pad_const_fp):
-- from tosa import PadAttribute as a, Attribute
-+ from ..tosa import PadAttribute as a, Attribute
-
- self.utype = Attribute.Attribute().PadAttribute
- self.optFcns = (a.Start, a.End)
-@@ -210,14 +209,14 @@ class TosaSerializerAttribute(TosaSerializerUnion):
-
- # pad_const_fp attribute serialized as uint8 vector
- pad_const_float_as_bytes = struct.pack(" Tensor", # schema
+ (
+ TosaSpecification.create_from_string("TOSA-1.0+INT"),
+ ), # target TOSA specifications
+)
+def RESCALE(
+ x: torch.Tensor, dtype: torch.dtype, scale: float, in_zp: int, out_zp: int
+) -> torch.Tensor:
+ tosa_spec = get_context_spec()
+ """Casts the input tensor to dtype `dtype` to produce the correct tensor meta for a _rescale op.
+ Additionally validates TOSA constraints of a RESCALE op.
+ """
+ if not tosa_spec.support_integer():
+ raise TosaValueError(
+ f"TOSA spec {tosa_spec} doesn't support integers", op="RESCALE"
+ )
+
+ if dtype not in (torch.int32, torch.int8, torch.int16):
+ raise NotImplementedError(
+ f"tosa::rescale currently only supports int32, int16 and int8, not {dtype}"
+ )
+ if dtype in (torch.int32, torch.int16) and out_zp != 0:
+ raise ValueError(
+ f"TOSA requires output_zp to be zero when the output dtype is {dtype}."
+ )
+ if x.dtype in (torch.int32, torch.int16) and in_zp != 0:
+ raise ValueError(
+ f"TOSA requires input_zp to be zero when the input dtype is {dtype}"
+ )
+ if x.dtype == torch.int8 and not -128 <= in_zp <= 127:
+ raise ValueError(f"{in_zp=} outside valid range (-128,127) for int8.")
+ if dtype == torch.int8 and not -128 <= out_zp <= 127:
+ raise ValueError(f"{out_zp=} outside valid range (-128,127) for int8.")
+
+ return torch.empty_like(x, dtype=dtype)
diff --git a/backends/arm/tosa/dialect/ops/table.py b/backends/arm/tosa/dialect/ops/table.py
new file mode 100644
index 00000000000..5fbbf55f910
--- /dev/null
+++ b/backends/arm/tosa/dialect/ops/table.py
@@ -0,0 +1,53 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm.tosa.dialect.lib import TosaValueError
+from executorch.backends.arm.tosa.dialect.ops_registration import register_fake_tosa_op
+
+from executorch.backends.arm.tosa_specification import (
+ get_context_spec,
+ TosaSpecification,
+)
+
+
+@register_fake_tosa_op(
+ "TABLE(Tensor input1, Tensor table) -> Tensor", # schema
+ (
+ TosaSpecification.create_from_string("TOSA-1.0+INT"),
+ ), # target TOSA specifications
+)
+def TABLE(a, table):
+ tosa_spec = get_context_spec()
+ # verifiy input types according to the spec
+ if not tosa_spec.support_integer():
+ raise TosaValueError(
+ f"TOSA spec {tosa_spec} doesn't support integers", op="TABLE"
+ )
+
+ if a.dtype == torch.int8:
+ if table.shape != torch.Size((256,)):
+ raise TosaValueError(
+ f"Table of wrong size ({table.shape}!={torch.Size((256,))}", op="TABLE"
+ )
+ if table.dtype != torch.int8:
+ raise TosaValueError(f"Table dtype {table.dtype} is not int8", op="TABLE")
+ return_dtype = torch.int8
+ elif a.dtype == torch.int16:
+ if not tosa_spec.support_extension("int16"):
+ raise TosaValueError(
+ f"Context TOSA spec {tosa_spec} doesn't support int16", op="TABLE"
+ )
+ if table.shape != torch.Size((513,)):
+ raise TosaValueError(
+ f"Table of wrong size ({table.shape}!={torch.Size((513,))})", op="TABLE"
+ )
+ if table.dtype != torch.int16:
+ raise TosaValueError(f"Table dtype {table.dtype} is not int32", op="TABLE")
+ return_dtype = torch.int32
+ else:
+ raise TosaValueError(f"Unsupported dtype for {tosa_spec}", op="TABLE")
+
+ return torch.empty_like(a, dtype=return_dtype)
diff --git a/backends/arm/tosa/dialect/ops/transpose.py b/backends/arm/tosa/dialect/ops/transpose.py
new file mode 100644
index 00000000000..43095c97bd7
--- /dev/null
+++ b/backends/arm/tosa/dialect/ops/transpose.py
@@ -0,0 +1,35 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm.tosa.dialect.lib import TosaValueError
+from executorch.backends.arm.tosa.dialect.ops_registration import register_fake_tosa_op
+
+from executorch.backends.arm.tosa_specification import TosaSpecification
+
+
+@register_fake_tosa_op(
+ "TRANSPOSE(Tensor input, int[] perms) -> Tensor", # schema
+ (
+ TosaSpecification.create_from_string("TOSA-1.0+FP"),
+ TosaSpecification.create_from_string("TOSA-1.0+INT"),
+ ), # target TOSA specifications
+)
+def TRANSPOSE(a, perms):
+ # The TOSA TRANSPOSE only do the transpose in the TOSA serialized world,
+ # so just return the same shape and type.
+
+ # For certain operators we need the data in a specific data format. Changing tosa_dim_order
+ # is not sufficient as we also need transpose the data.
+ # By utilizing an edge IR passthrough operator we can keep the edge program in
+ # channels-first/contiguous and get the desired behavior in the TOSA lowering.
+
+ if len(perms) not in (4, 5):
+ raise TosaValueError(
+ f"Only 4D and 5D tensors are supported, got {len(perms)}: {perms}",
+ op="TRANSPOSE",
+ )
+
+ return torch.empty_like(a, dtype=a.dtype)
diff --git a/backends/arm/tosa/dialect/ops_registration.py b/backends/arm/tosa/dialect/ops_registration.py
index 865eca6b21b..ad83824b3a2 100644
--- a/backends/arm/tosa/dialect/ops_registration.py
+++ b/backends/arm/tosa/dialect/ops_registration.py
@@ -26,7 +26,7 @@
_registered_tosa_ops_by_func: dict[Callable, Callable] = {}
-def register_tosa_op(
+def register_fake_tosa_op(
op_schema: str, tosa_specs: Iterable[TosaSpecification]
) -> Callable[[Callable[P, R]], Callable[P, R]]:
"""
diff --git a/backends/arm/tosa/schemas/tosa_0.80.fbs b/backends/arm/tosa/schemas/tosa_0.80.fbs
deleted file mode 100644
index a781b0d8a24..00000000000
--- a/backends/arm/tosa/schemas/tosa_0.80.fbs
+++ /dev/null
@@ -1,314 +0,0 @@
-// Copyright 2025 Arm Limited and/or its affiliates.
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-namespace tosa;
-
-// This corresponds to the version.
-file_identifier "TOSA";
-// File extension of any written files.
-file_extension "tosa";
-
-// NOTE: New values added to the schema should be placed
-// at the end of the list in order to keep schema stable.
-
-enum DType:uint32 {
- UNKNOWN = 0,
- BOOL,
- UINT8,
- INT4,
- INT8,
- INT16,
- INT32,
- INT48,
- FP32,
- UINT16,
- FP16,
- BF16,
- SHAPE,
-}
-
-enum ResizeMode:uint32 {
- UNKNOWN = 0,
- NEAREST,
- BILINEAR,
-}
-
-enum Op:uint32 {
- UNKNOWN = 0,
- ARGMAX,
- AVG_POOL2D,
- CONV2D,
- CONV3D,
- DEPTHWISE_CONV2D,
- FULLY_CONNECTED,
- MATMUL,
- MAX_POOL2D,
- TRANSPOSE_CONV2D,
- CLAMP,
- RESERVED,
- SIGMOID,
- TANH,
- ADD,
- ARITHMETIC_RIGHT_SHIFT,
- BITWISE_AND,
- BITWISE_OR,
- BITWISE_XOR,
- INTDIV,
- LOGICAL_AND,
- LOGICAL_LEFT_SHIFT,
- LOGICAL_RIGHT_SHIFT,
- LOGICAL_OR,
- LOGICAL_XOR,
- MAXIMUM,
- MINIMUM,
- MUL,
- POW,
- SUB,
- TABLE,
- ABS,
- BITWISE_NOT,
- CEIL,
- CLZ,
- EXP,
- FLOOR,
- LOG,
- LOGICAL_NOT,
- NEGATE,
- RECIPROCAL,
- RSQRT,
- SELECT,
- EQUAL,
- GREATER,
- GREATER_EQUAL,
- REDUCE_ANY,
- REDUCE_ALL,
- REDUCE_MAX,
- REDUCE_MIN,
- REDUCE_PRODUCT,
- REDUCE_SUM,
- CONCAT,
- PAD,
- RESHAPE,
- REVERSE,
- SLICE,
- TILE,
- TRANSPOSE,
- GATHER,
- SCATTER,
- RESIZE,
- CAST,
- RESCALE,
- CONST,
- IDENTITY,
- CUSTOM,
- COND_IF,
- WHILE_LOOP,
- FFT2D,
- RFFT2D,
- ERF,
- DIM,
-}
-
-union Attribute {
- PoolAttribute,
- ConvAttribute,
- TransposeConvAttribute,
- PadAttribute,
- AxisAttribute,
- ReshapeAttribute,
- SliceAttribute,
- TileAttribute,
- ResizeAttribute,
- ClampAttribute,
- RescaleAttribute,
- MulAttribute,
- ArithmeticRightShiftAttribute,
- CondIfAttribute,
- WhileLoopAttribute,
- TransposeAttribute,
- TableAttribute,
- MatMulAttribute,
- FullyConnectedAttribute,
- NegateAttribute,
- CustomAttribute,
- FFTAttribute,
- RFFTAttribute,
-}
-
-table PoolAttribute {
- pad: [int32];
- kernel: [int32];
- stride: [int32];
- input_zp: int32;
- output_zp: int32;
- accum_dtype: DType;
-}
-
-table ConvAttribute {
- pad: [int32];
- stride: [int32];
- dilation: [int32];
- input_zp: int32;
- weight_zp: int32;
- local_bound: bool;
-}
-
-table TransposeConvAttribute {
- out_pad: [int32];
- stride: [int32];
- output_shape: [int32];
- input_zp: int32;
- weight_zp: int32;
- local_bound: bool;
-}
-
-table PadAttribute {
- padding: [int32];
- pad_const_int: int32;
- pad_const_fp: [ubyte] (force_align: 8);
-}
-
-table AxisAttribute {
- axis: int32;
-}
-
-table ReshapeAttribute {
- new_shape: [int32];
-}
-
-table SliceAttribute {
- start: [int32];
- size: [int32];
-}
-
-table TileAttribute {
- multiples: [int32];
-}
-
-table ResizeAttribute {
- scale: [int16];
- offset: [int16];
- border: [int16];
- mode: ResizeMode;
-}
-
-table ClampAttribute {
- min_int: int32;
- max_int: int32;
- min_fp: [ubyte] (force_align: 8);
- max_fp: [ubyte] (force_align: 8);
-}
-
-table RescaleAttribute {
- input_zp: int32;
- output_zp: int32;
- multiplier: [int32];
- shift: [int32];
- scale32: bool;
- double_round: bool;
- per_channel: bool;
- input_unsigned: bool;
- output_unsigned: bool;
-}
-
-table MulAttribute {
- shift: int32;
-}
-
-table ArithmeticRightShiftAttribute {
- round: bool;
-}
-
-table CondIfAttribute {
- then_branch: string;
- else_branch: string;
-}
-
-table WhileLoopAttribute {
- cond_branch: string;
- body_branch: string;
-}
-
-table TransposeAttribute {
- perms: [int32];
-}
-
-table TableAttribute {
- table: [int16];
-}
-
-table MatMulAttribute {
- a_zp: int32;
- b_zp: int32;
-}
-
-table FullyConnectedAttribute {
- input_zp: int32;
- weight_zp: int32;
-}
-
-table NegateAttribute {
- input1_zp: int32;
- output_zp: int32;
-}
-
-table CustomAttribute {
- operator_name:string;
- domain_name:string;
- implementation_attrs:[ubyte];
-}
-
-table FFTAttribute {
- inverse: bool;
- local_bound: bool;
-}
-
-table RFFTAttribute {
- local_bound: bool;
-}
-
-table Version {
- _major: int32 = -1;
- _minor: int32 = -1;
- _patch: int32 = -1;
- _draft: bool = true;
-}
-
-table TosaTensor {
- name:string; // name of the tensor, used for solving dependency
- shape:[int32]; // shape of the tensor
- type:DType; // data type of the tensor
- data: [ubyte] (force_align: 8); // raw data array if it's a constant tensor.
- variable: bool; // is this a variable tensor
- is_unranked: bool; // whether this is an unranked tensor
- variable_name:string; // name for variable attribute
-}
-
-table TosaOperator {
- op:Op; // operator enum
- attribute:Attribute; // union structure. operator attribute
- inputs:[string]; // list of input tensor names
- outputs:[string]; // list of output tensor names
-}
-
-table TosaBasicBlock {
- name:string; // basic block name
- operators:[TosaOperator]; // operators array
- tensors:[TosaTensor]; // tensors array
- inputs:[string]; // name of graph inputs
- outputs:[string]; // name of graph outputs
-}
-
-table TosaRegion {
- name:string; // name of region
- blocks:[TosaBasicBlock]; // basic blocks array
-}
-
-table TosaGraph {
- version:Version (required);
- regions:[TosaRegion]; // regions array
-}
-
-root_type TosaGraph;
diff --git a/backends/arm/tosa_backend.py b/backends/arm/tosa_backend.py
index 0f03e12c916..7062d68b944 100644
--- a/backends/arm/tosa_backend.py
+++ b/backends/arm/tosa_backend.py
@@ -13,19 +13,18 @@
import logging
from typing import cast, final, List
-import executorch.backends.arm.tosa_specification as tosa_specification
-
-from executorch.backends.arm.arm_backend import get_tosa_spec
+import serializer.tosa_serializer as ts # type: ignore
from executorch.backends.arm.operators.node_visitor import get_node_visitors
+from executorch.backends.arm.tosa_specification import get_tosa_spec
from executorch.backends.arm._passes import (
ArmPassManager,
) # usort: skip
+from executorch.backends.arm.common.debug import debug_fail, debug_tosa_dump
from executorch.backends.arm.process_node import (
process_call_function,
process_output,
process_placeholder,
)
-from executorch.backends.arm.tosa_utils import dbg_fail, dbg_tosa_dump
from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
from executorch.exir.backend.compile_spec_schema import CompileSpec
from torch.export.exported_program import ExportedProgram
@@ -85,15 +84,6 @@ def preprocess( # noqa: C901
# Converted output for this subgraph, serializer needs path early as it emits
# const data directly. Path created and data written only in debug builds.
- if isinstance(tosa_spec, tosa_specification.Tosa_0_80):
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
- elif isinstance(tosa_spec, tosa_specification.Tosa_1_00):
- import serializer.tosa_serializer as ts # type: ignore
- else:
- raise RuntimeError(
- f"Unknown TOSA version {tosa_spec}, no pip package installed to handle serialization to that version."
- )
-
tosa_graph = ts.TosaSerializer(artifact_path)
assert (
@@ -125,12 +115,12 @@ def preprocess( # noqa: C901
# any checking of compatibility.
raise RuntimeError(f"{node.name} is unsupported op {node.op}")
except Exception:
- dbg_fail(node, graph_module, tosa_graph, artifact_path)
+ debug_fail(node, graph_module, tosa_graph, artifact_path)
raise
if artifact_path:
tag = arm_get_first_delegation_tag(graph_module)
- dbg_tosa_dump(
+ debug_tosa_dump(
tosa_graph,
artifact_path,
suffix="{}".format(f"_{tag}" if tag else "") + (f"_{tosa_spec}"),
diff --git a/backends/arm/tosa_mapping.py b/backends/arm/tosa_mapping.py
index 7d662b72328..4c290a962f0 100644
--- a/backends/arm/tosa_mapping.py
+++ b/backends/arm/tosa_mapping.py
@@ -13,12 +13,10 @@
from typing import Any, Optional, Sequence
+import serializer.tosa_serializer as ts # type: ignore
+
import torch
-from executorch.backends.arm.tosa_specification import (
- Tosa_0_80,
- Tosa_1_00,
- TosaSpecification,
-)
+from executorch.backends.arm.tosa_specification import TosaSpecification
UNSUPPORTED_DTYPES = (
torch.float64,
@@ -36,12 +34,6 @@
def map_dtype(data_type: torch.dtype, tosa_spec: TosaSpecification) -> Any:
if data_type in UNSUPPORTED_DTYPES:
raise ValueError(f"Unsupported type: {data_type}")
- if isinstance(tosa_spec, Tosa_0_80):
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
- elif isinstance(tosa_spec, Tosa_1_00):
- import serializer.tosa_serializer as ts # type: ignore
- else:
- raise RuntimeError(f"Unsupported tosa_spec: {tosa_spec}")
dtype_map = {
torch.float32: ts.DType.FP32,
@@ -140,12 +132,6 @@ def __repr__(self):
if self.name is not None:
attrs.append(f"name={self.name!r}")
if self.dtype is not None:
- if isinstance(self.tosa_spec, Tosa_0_80):
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
- elif isinstance(self.tosa_spec, Tosa_1_00):
- import serializer.tosa_serializer as ts # type: ignore
- else:
- raise RuntimeError(f"Unsupported tosa_spec: {self.tosa_spec}")
attrs.append(f"dtype={ts.DTypeNames[self.dtype]}")
if self.shape is not None:
attrs.append(f"shape={self.shape!r}")
diff --git a/backends/arm/tosa_partitioner.py b/backends/arm/tosa_partitioner.py
index 0a0b0f33b6c..3c51f781ea5 100644
--- a/backends/arm/tosa_partitioner.py
+++ b/backends/arm/tosa_partitioner.py
@@ -9,8 +9,8 @@
from typing import Callable, List, Optional, Sequence, Tuple
import torch
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
from executorch.backends.arm.arm_backend import (
- get_tosa_spec,
is_tosa,
) # usort: skip
from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
@@ -18,6 +18,7 @@
tosa_support_factory,
)
from executorch.backends.arm.tosa_backend import TOSABackend
+from executorch.backends.arm.tosa_specification import get_tosa_spec
from executorch.exir.backend.compile_spec_schema import CompileSpec
from executorch.exir.backend.partitioner import (
DelegationSpec,
@@ -25,7 +26,6 @@
PartitionResult,
)
from executorch.exir.backend.utils import tag_constant_data, WhyNoPartitionReporter
-from executorch.exir.dialects._ops import ops as exir_ops
from torch.export.exported_program import ExportedProgram
from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
from torch.fx.passes.operator_support import OperatorSupportBase
@@ -34,22 +34,6 @@
logger = logging.getLogger(__name__)
-def is_quant_node(node: torch.fx.node.Node) -> bool:
- return node.target in {
- exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
- exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
- exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
- }
-
-
-def is_dequant_node(node: torch.fx.node.Node) -> bool:
- return node.target in {
- exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
- exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
- exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
- }
-
-
class TOSAPartitioner(Partitioner):
def __init__(
self,
@@ -99,14 +83,14 @@ def is_partitioned(node: torch.fx.Node, tag=tag) -> bool:
for node in exported_program.graph_module.graph.nodes:
if not is_partitioned(node):
continue
- if is_quant_node(node):
+ if node.target in Q_OPS:
for input in node.all_input_nodes:
if not is_partitioned(input):
del node.meta["delegation_tag"]
break
continue
- if is_dequant_node(node):
+ if node.target in DQ_OPS:
for user in node.users:
if not is_partitioned(user):
del node.meta["delegation_tag"]
@@ -176,6 +160,7 @@ def filter_fn(node: torch.fx.Node) -> bool:
torch.ops.aten.linear.default,
torch.ops.aten.eye.default,
torch.ops.aten.linspace.default,
+ torch.ops.aten.logit.default,
] + ops_to_not_decompose_if_quant_op
tosa_spec = get_tosa_spec(self.delegation_spec.compile_specs)
diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py
index 7246ee74b74..ae549ee9345 100644
--- a/backends/arm/tosa_quant_utils.py
+++ b/backends/arm/tosa_quant_utils.py
@@ -9,39 +9,17 @@
import math
-from typing import Any, cast, NamedTuple, Tuple
-
-import executorch.backends.arm.tosa_specification as tosa_specification
+from typing import Any, Tuple
+import serializer.tosa_serializer as ts # type: ignore
import torch.fx
import torch.fx.node
from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.exir.dialects._ops import ops as exir_ops
-from torch import Tensor
from torch.fx import Node
from tosa.RoundingMode import RoundingMode # type: ignore
-q_ops = (
- exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
- exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
-)
-dq_ops = (
- exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
- exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
-)
-per_tensor_q_dq_ops = (
- exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
- exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
-)
-per_channel_q_dq_ops = (
- exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
- exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
-)
-dq_q_ops = (*q_ops, *dq_ops)
-
-
def insert_rescale_ops_to_int32(
tosa_graph: Any,
inputs: list[TosaArg],
@@ -127,122 +105,6 @@ def insert_rescale_op_to_int8(
)
-class QuantArgs(NamedTuple):
- scale: list[float] | float
- zp: list[int] | int
- qmin: int
- qmax: int
- dtype: torch.dtype
- axis: int = 0
- per_channel: bool = False
-
- def quantize_value(self, x: torch.Tensor | float) -> Tensor:
- """Quantizes the input tensor or value to a quantized tensor. If the input is
- not a tensor, it is converted to a tensor first. If self.per_channel is True,
- the quantization is done per channel, otherwise it is done per tensor.
- """
- if not isinstance(x, torch.Tensor):
- x = torch.Tensor([x])
- x = x.to(torch.float32)
- if self.per_channel:
- q_op = exir_ops.edge.quantized_decomposed.quantize_per_channel.default
- args = (
- x,
- torch.tensor(self.scale),
- torch.tensor(self.zp),
- self.axis,
- self.qmin,
- self.qmax,
- self.dtype,
- )
- else:
- q_op = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
- args = (x, self.scale, self.zp, self.qmin, self.qmax, self.dtype) # type: ignore[assignment]
-
- return q_op(*args)
-
- def dequantize_value(self, qx: torch.Tensor) -> torch.Tensor:
- """Dequantizes the input tensor or value to a dequantized tensor If the input
- is not a tensor, it is converted to a tensor first. If self.per_channel is True,
- the dequantization is done per channel, otherwise it is done per tensor.
- """
- if self.per_channel:
- dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_channel.default
- args = (
- qx,
- torch.tensor(self.scale),
- torch.tensor(self.zp),
- self.axis,
- self.qmin,
- self.qmax,
- self.dtype,
- )
- else:
- dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
- args = (qx, self.scale, self.zp, self.qmin, self.qmax, self.dtype) # type: ignore[assignment]
-
- return dq_op(*args)
-
- @classmethod
- def from_operator(cls, op, args):
- if op in per_tensor_q_dq_ops:
- return cls(
- scale=cast(float, args[1]),
- zp=cast(int, args[2]),
- qmin=cast(int, args[3]),
- qmax=cast(int, args[4]),
- dtype=cast(torch.dtype, args[5]),
- axis=0,
- per_channel=False,
- )
- elif op in per_channel_q_dq_ops:
- return cls(
- scale=cast(list[float], args[1].tolist()),
- zp=cast(list[int], args[2].tolist()),
- axis=cast(int, args[3]),
- qmin=cast(int, args[4]),
- qmax=cast(int, args[5]),
- dtype=cast(torch.dtype, args[6]),
- per_channel=True,
- )
-
- else:
- # We're only handling per tensor and per channel quantization
- raise NotImplementedError(f"Unsupported quantization operation: {op}")
-
- def get_scale_per_tensor(self) -> float:
- if not isinstance(self.scale, float):
- raise TypeError(
- f"Expected scale {self.scale} to be a float but found scale of "
- f"type {type(self.scale)}"
- )
- return self.scale
-
- def get_zp_per_tensor(self) -> int:
- if not isinstance(self.zp, int):
- raise TypeError(
- f"Expected zero point {self.zp} to be an int but found zp of "
- f"type {type(self.zp)}"
- )
- return self.zp
-
- def get_scale_per_channel(self) -> list[float]:
- if not isinstance(self.scale, list):
- raise TypeError(
- f"Expected scale {self.scale} to be a list but found scale of "
- f"type {type(self.scale)}"
- )
- return self.scale
-
- def get_zp_per_channel(self) -> list[int]:
- if not isinstance(self.zp, list):
- raise TypeError(
- f"Expected zero point {self.zp} to be a list but found zp of "
- f"type {type(self.zp)}"
- )
- return self.zp
-
-
# TOSA uses the RESCALE operation to scale between values with differing precision.
# The RESCALE operator is defined using an integer multiply, add, and shift.
# This utility function is for calculating the multier and shift given a scale.
@@ -290,45 +152,6 @@ def compute_multiplier_and_shift(
return multipliers, shifts
-def build_rescale_v0_80(
- tosa_fb: Any,
- scale: list[float],
- input_node: Any,
- output_name: str,
- output_type: Any,
- input_zp: list[int],
- output_zp: list[int],
- is_double_round: bool = False,
- per_channel=False,
-):
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
- import tosa_tools.v0_80.tosa.Op as TosaOp # type: ignore
-
- # Check if scale32 mode is used for given output element type
- is_scale32 = output_type == ts.DType.INT8
- scale_width = 32 if is_scale32 else 16
- multipliers, shifts = compute_multiplier_and_shift(scale, scale_width)
-
- attr_rescale = ts.TosaSerializerAttribute()
- attr_rescale.RescaleAttribute(
- input_zp=input_zp[0],
- output_zp=output_zp[0],
- multiplier=multipliers,
- shift=shifts,
- scale32=is_scale32,
- double_round=is_double_round,
- per_channel=per_channel,
- input_unsigned=False,
- output_unsigned=False,
- )
-
- tosa_fb.addOperator(
- TosaOp.Op().RESCALE, [input_node.name], [output_name], attr_rescale
- )
-
- return
-
-
# For TOSA spec v1.0 RESCALE operator requires multipler, shifts, input_zp and output_zp to be
# const inputs. Create constant operators from the data already initialized.
def create_const_ops_for_rescale(
@@ -422,43 +245,19 @@ def build_rescale_to_int32(
tosa_spec=None,
) -> Any:
input_A_rescaled_to_int32 = None
- if not tosa_spec or isinstance(tosa_spec, tosa_specification.Tosa_0_80):
- # default to TOSA v0.80 until we switch to v1.0
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
- input_A_rescaled_to_int32 = tosa_fb.addIntermediate(
- input_arg.shape, ts.DType.INT32
- )
-
- build_rescale_v0_80(
- tosa_fb=tosa_fb,
- scale=[rescale_scale],
- input_node=input_arg,
- output_name=input_A_rescaled_to_int32.name,
- output_type=ts.DType.INT32,
- input_zp=[input_zp],
- output_zp=[0],
- ) # type: ignore[call-arg]
-
- elif isinstance(tosa_spec, tosa_specification.Tosa_1_00):
- # For TOSA v1.0 multipliers, shifts, input_zp and output_zp are now inputs
- # to the RESCALE op see: https://www.mlplatform.org/tosa/tosa_spec.html#_rescale
- import serializer.tosa_serializer as ts # type: ignore
-
- input_A_rescaled_to_int32 = tosa_fb.addIntermediate(
- input_arg.shape, ts.DType.INT32
- )
+ input_A_rescaled_to_int32 = tosa_fb.addIntermediate(input_arg.shape, ts.DType.INT32)
- build_rescale(
- tosa_fb,
- [rescale_scale],
- input_arg,
- input_A_rescaled_to_int32.name,
- ts.DType.INT32,
- [input_zp],
- [0],
- rounding_mode=RoundingMode.SINGLE_ROUND,
- ) # type: ignore[call-arg]
+ build_rescale(
+ tosa_fb,
+ [rescale_scale],
+ input_arg,
+ input_A_rescaled_to_int32.name,
+ ts.DType.INT32,
+ [input_zp],
+ [0],
+ rounding_mode=RoundingMode.SINGLE_ROUND,
+ ) # type: ignore[call-arg]
return input_A_rescaled_to_int32
@@ -474,35 +273,19 @@ def build_rescale_from_int32(
per_channel: bool = False,
tosa_spec=None,
) -> None:
- if not tosa_spec or isinstance(tosa_spec, tosa_specification.Tosa_0_80):
- # default to TOSA v0.80 until we switch to v1.0
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- build_rescale_v0_80(
- tosa_fb=tosa_fb,
- scale=[rescale_scale],
- input_node=input_node,
- output_name=output_name,
- output_type=ts.DType.INT8,
- input_zp=[0],
- output_zp=[output_zp],
- ) # type: ignore[call-arg]
-
- elif isinstance(tosa_spec, tosa_specification.Tosa_1_00):
- import serializer.tosa_serializer as ts # type: ignore
-
- # For TOSA v1.0 multipliers, shifts, input_zp and output_zp are now inputs
- # to the RESCALE op see: https://www.mlplatform.org/tosa/tosa_spec.html#_rescale
- build_rescale(
- tosa_fb,
- [rescale_scale],
- input_node,
- output_name=output_name,
- output_type=ts.DType.INT8,
- input_zp=[0],
- output_zp=[output_zp],
- rounding_mode=RoundingMode.SINGLE_ROUND,
- ) # type: ignore[call-arg]
+ # For TOSA v1.0 multipliers, shifts, input_zp and output_zp are now inputs
+ # to the RESCALE op see: https://www.mlplatform.org/tosa/tosa_spec.html#_rescale
+ build_rescale(
+ tosa_fb,
+ [rescale_scale],
+ input_node,
+ output_name=output_name,
+ output_type=ts.DType.INT8,
+ input_zp=[0],
+ output_zp=[output_zp],
+ rounding_mode=RoundingMode.SINGLE_ROUND,
+ ) # type: ignore[call-arg]
+
return
@@ -525,31 +308,17 @@ def build_rescale_conv_output(
(inp * w) / out for inp, w, out in zip(input_scale, weight_scale, output_scale)
]
- # Since we assume the input tensor that is being rescaled is int32 date type, zero point must be 0.
- if not tosa_spec or isinstance(tosa_spec, tosa_specification.Tosa_0_80):
- # default to TOSA v0.80 until we switch to v1.0
- build_rescale_v0_80(
- tosa_fb=tosa_fb,
- scale=post_conv2d_scale,
- input_node=op,
- output_name=output_name,
- output_type=output_type,
- input_zp=[0],
- output_zp=output_zp,
- per_channel=isinstance(weight_scale, torch.Tensor),
- ) # type: ignore[call-arg]
- elif isinstance(tosa_spec[0], tosa_specification.Tosa_1_00):
- # For TOSA v1.0 multipliers, shifts, input_zp and output_zp are now inputs
- # to the RESCALE op see: https://www.mlplatform.org/tosa/tosa_spec.html#_rescale
- build_rescale(
- tosa_fb=tosa_fb,
- scale=post_conv2d_scale,
- input_node=op,
- output_name=output_name,
- output_type=output_type,
- input_zp=[0],
- output_zp=output_zp,
- rounding_mode=RoundingMode.SINGLE_ROUND,
- per_channel=isinstance(weight_scale, torch.Tensor),
- ) # type: ignore[call-arg]
+ # For TOSA v1.0 multipliers, shifts, input_zp and output_zp are now inputs
+ # to the RESCALE op see: https://www.mlplatform.org/tosa/tosa_spec.html#_rescale
+ build_rescale(
+ tosa_fb=tosa_fb,
+ scale=post_conv2d_scale,
+ input_node=op,
+ output_name=output_name,
+ output_type=output_type,
+ input_zp=[0],
+ output_zp=output_zp,
+ rounding_mode=RoundingMode.SINGLE_ROUND,
+ per_channel=isinstance(weight_scale, torch.Tensor),
+ ) # type: ignore[call-arg]
return
diff --git a/backends/arm/tosa_specification.py b/backends/arm/tosa_specification.py
index 36fa5daf2f7..92b68955cdd 100644
--- a/backends/arm/tosa_specification.py
+++ b/backends/arm/tosa_specification.py
@@ -15,6 +15,10 @@
import re
from typing import List
+from executorch.exir.backend.compile_spec_schema import ( # type: ignore[import-not-found]
+ CompileSpec,
+)
+
from packaging.version import Version
@@ -23,7 +27,6 @@ class TosaSpecification:
This class implements a representation of TOSA specification
(https://www.mlplatform.org/tosa/tosa_spec.html) with a version, a profile
(with extension) and a level (8k).
- For 0.80 releases the profile is BI or MI, with u55 handled as an inofficial extension
For 1.00 releases the profile is INT or FP, and the extensions are for
INT: int16, int4, var, cf
FP: bf16, fp8e4m3, fp8e5m2, fft, var, cf
@@ -31,8 +34,6 @@ class TosaSpecification:
The TOSA specification is encoded in the string represenatation
TOSA-major.minor.patch+profile[+level][+extensions]
- For 0.80 MI implies BI, while for 1.0 the profiles has to explicitely be specified.
-
Profiles are uppercase letters and extensions and level is lowercase.
"""
@@ -62,10 +63,6 @@ def __init__(self, version: Version, extras: List[str]):
def create_from_string(repr: str) -> "TosaSpecification":
"""
Creates a TOSA specification class from a string representation:
- TOSA-0.80+MI
- TOSA-0.80+BI+8k
- TOSA-0.80+BI+u55 # Ethos-U55 extension to handle TOSA subset
- TOSA-0.90.0+MI
TOSA-1.00.0+INT+FP+int4+cf
"""
@@ -78,8 +75,6 @@ def create_from_string(repr: str) -> "TosaSpecification":
if name != "TOSA":
raise ValueError(f"Malformed TOSA specification representation: {repr}")
match version:
- case _ if version.major == 0 and version.minor == 80:
- return Tosa_0_80(version, extras)
case _ if version.major == 1 and version.minor == 0:
return Tosa_1_00(version, extras)
case _:
@@ -88,55 +83,6 @@ def create_from_string(repr: str) -> "TosaSpecification":
raise ValueError(f"Failed to parse TOSA specification representation: {repr}")
-class Tosa_0_80(TosaSpecification):
- profile: str
- level_8k: bool
- available_profiles = ["BI", "MI"] # MT is not defined
-
- def __init__(self, version: Version, extras: List[str]):
- super().__init__(version, extras)
- assert version >= Version("0.80") and version < Version("0.90")
-
- # Check that we only have one profile in the extensions list
- if [e in Tosa_0_80.available_profiles for e in extras].count(True) != 1:
- raise ValueError(
- f"Bad combination of extras: {extras}, more than one of {Tosa_0_80.available_profiles} found."
- )
-
- # The list contains one profile at most, so pick it
- self.profile = [e for e in extras if e in Tosa_0_80.available_profiles][0]
- extras.remove(self.profile)
-
- self.level_8k = "8k" in extras
- if self.level_8k:
- extras.remove("8k")
-
- if len(extras) > 0:
- raise ValueError(f"Unhandled extras found: {extras}")
-
- def __repr__(self) -> str:
- extensions = ""
- if self.level_8k:
- extensions += "+8k"
- if self.is_U55_subset:
- extensions += "+u55"
- return f"TOSA-{str(self.version)}+{self.profile}{extensions}"
-
- def __hash__(self) -> int:
- return hash(str(self.version) + self.profile)
-
- def __eq__(self, other: object) -> bool:
- if isinstance(other, Tosa_0_80):
- return (self.version == other.version) and (self.profile == other.profile)
- return False
-
- def support_integer(self):
- return True
-
- def support_float(self):
- return self.profile == "MI"
-
-
class Tosa_1_00(TosaSpecification):
profiles: List[str]
level_8k: bool
@@ -216,6 +162,13 @@ def support_integer(self):
def support_float(self):
return "FP" in self.profiles
+ def support_extension(self, extension: str) -> bool:
+ for p in self.profiles:
+ if extension in self.valid_extensions[p] and extension in self.extensions:
+ return True
+
+ return False
+
class TosaLoweringContext:
"""
@@ -246,3 +199,10 @@ def get_context_spec() -> TosaSpecification:
return TosaLoweringContext.tosa_spec_var.get()
except LookupError:
raise RuntimeError("Function must be executed within a TosaLoweringContext")
+
+
+def get_tosa_spec(compile_spec: List[CompileSpec]) -> TosaSpecification:
+ for spec in compile_spec:
+ if spec.key == "tosa_spec":
+ return TosaSpecification.create_from_string(spec.value.decode())
+ raise ValueError("Could not find TOSA version in CompileSpec")
diff --git a/backends/arm/tosa_utils.py b/backends/arm/tosa_utils.py
index 3b56fdd1cbf..fec8f4337a2 100644
--- a/backends/arm/tosa_utils.py
+++ b/backends/arm/tosa_utils.py
@@ -6,25 +6,19 @@
# pyre-unsafe
import logging
-import os
-from typing import Any, Optional
+from typing import Any
import numpy as np
+import serializer.tosa_serializer as ts # type: ignore
import sympy # type: ignore
import torch
-import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-from executorch.backends.arm.tosa_mapping import extract_tensor_meta, TosaArg
+from executorch.backends.arm.tosa_mapping import extract_tensor_meta
-from executorch.backends.arm.tosa_specification import (
- Tosa_0_80,
- Tosa_1_00,
- TosaSpecification,
-)
+from executorch.backends.arm.tosa_specification import TosaSpecification
from executorch.exir.dialects._ops import ops as exir_ops
-from executorch.exir.print_program import inspect_node
from torch._subclasses.fake_tensor import FakeTensor
from torch.fx import Node
@@ -32,98 +26,6 @@
logger = logging.getLogger(__name__)
-def dbg_node(node: torch.fx.Node, graph_module: torch.fx.GraphModule):
- # Debug output of node information
- logger.info(get_node_debug_info(node, graph_module))
-
-
-def get_node_debug_info(
- node: torch.fx.Node, graph_module: torch.fx.GraphModule | None = None
-) -> str:
- output = (
- f" {inspect_node(graph=graph_module.graph, node=node)}\n"
- if graph_module
- else ""
- "-- NODE DEBUG INFO --\n"
- f" Op is {node.op}\n"
- f" Name is {node.name}\n"
- f" Node target is {node.target}\n"
- f" Node args is {node.args}\n"
- f" Node kwargs is {node.kwargs}\n"
- f" Node users is {node.users}\n"
- " Node.meta = \n"
- )
- for k, v in node.meta.items():
- if k == "stack_trace":
- matches = v.split("\n")
- output += " 'stack_trace =\n"
- for m in matches:
- output += f" {m}\n"
- else:
- output += f" '{k}' = {v}\n"
-
- if isinstance(v, list):
- for i in v:
- output += f" {i}\n"
- return output
-
-
-# Output TOSA flatbuffer and test harness file
-def dbg_tosa_dump(tosa_graph: ts.TosaSerializer, path: str, suffix: str = ""):
- filename = f"output{suffix}.tosa"
-
- logger.info(f"Emitting debug output to: {path=}, {suffix=}")
-
- os.makedirs(path, exist_ok=True)
-
- fb = tosa_graph.serialize()
- js = tosa_graph.writeJson(filename)
-
- filepath_tosa_fb = os.path.join(path, filename)
- with open(filepath_tosa_fb, "wb") as f:
- f.write(fb)
- assert os.path.exists(filepath_tosa_fb), "Failed to write TOSA flatbuffer"
-
- filepath_desc_json = os.path.join(path, f"desc{suffix}.json")
- with open(filepath_desc_json, "w") as f:
- f.write(js)
- assert os.path.exists(filepath_desc_json), "Failed to write TOSA JSON"
-
-
-def dbg_fail(
- node,
- graph_module,
- tosa_graph: Optional[ts.TosaSerializer] = None,
- path: Optional[str] = None,
-):
- logger.warning("Internal error due to poorly handled node:")
- if tosa_graph is not None and path is not None:
- dbg_tosa_dump(tosa_graph, path)
- logger.warning(f"Debug output captured in '{path}'.")
- dbg_node(node, graph_module)
-
-
-def getNodeArgs(node: Node, tosa_spec: TosaSpecification) -> list[TosaArg]:
- try:
- return [TosaArg(arg, tosa_spec) for arg in node.args]
- except ValueError as e:
- raise ValueError(f"Failed processing args to op:\n{node}") from e
-
-
-def get_output_node(node: Node) -> Node:
- return list(node.users)[0]
-
-
-""" TOSA reshape returns a tensor with the same type/values as the input.
- No data conversion happens during a reshape operation. """
-
-
-def build_reshape(tosa_fb, input_name, new_shape, output_name):
- attr = ts.TosaSerializerAttribute()
- attr.ReshapeAttribute(new_shape)
- tosa_fb.addOperator(ts.TosaOp.Op().RESHAPE, [input_name], [output_name], attr)
-
-
def are_fake_tensors_broadcastable(
fake_tensors: list[FakeTensor],
) -> tuple[bool, list[int]]:
@@ -187,17 +89,6 @@ def broadcast_tensors(
for broadcast. However this function also performs the broadcast and
does not have a limit on only two input tensors.
"""
- if isinstance(tosa_spec, Tosa_0_80):
- import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
-
- reshape_helper = build_reshape
- elif isinstance(tosa_spec, Tosa_1_00):
- import serializer.tosa_serializer as ts
-
- reshape_helper = build_reshape_tosa_1_0
- else:
- raise ValueError(f"Unsupported TOSA spec: {tosa_spec}")
-
index_fake_tensors = [node.meta["val"] for node in nodes]
broadcastable, common_shape = are_fake_tensors_broadcastable(index_fake_tensors)
if not broadcastable:
@@ -219,35 +110,25 @@ def broadcast_tensors(
tens_dtype,
)
- reshape_helper(tosa_fb, node.name, new_shape, reshaped.name)
+ build_reshape_tosa_1_0(tosa_fb, node.name, new_shape, reshaped.name)
tiled = tosa_fb.addIntermediate(common_shape, tens_dtype)
multipliers = [
comm if curr == 1 else 1 for comm, curr in zip(common_shape, new_shape)
]
- if isinstance(tosa_spec, Tosa_0_80):
- attr = ts.TosaSerializerAttribute()
- attr.TileAttribute(multipliers)
- tosa_fb.addOperator(
- ts.TosaOp.Op().TILE,
- [reshaped.name],
- [tiled.name],
- attr,
- )
- elif isinstance(tosa_spec, Tosa_1_00):
- multiple_shapes = tosa_fb.addConst(
- (len(multipliers),),
- ts.DType.SHAPE,
- multipliers,
- name=f"{node.name}_multiples",
- )
+ multiple_shapes = tosa_fb.addConst(
+ (len(multipliers),),
+ ts.DType.SHAPE,
+ multipliers,
+ name=f"{node.name}_multiples",
+ )
- tosa_fb.addOperator(
- ts.TosaOp.Op().TILE,
- [reshaped.name, multiple_shapes.name],
- [tiled.name],
- None,
- )
+ tosa_fb.addOperator(
+ ts.TosaOp.Op().TILE,
+ [reshaped.name, multiple_shapes.name],
+ [tiled.name],
+ None,
+ )
broadcast_tensors.append(tiled)
@@ -257,64 +138,23 @@ def broadcast_tensors(
def build_reshape_tosa_1_0(
tosa_graph, input_name, new_shape, output_name, shape_name_override=""
):
- import serializer.tosa_serializer as ts_ # type: ignore
-
shape = tosa_graph.addConst(
np.array(new_shape).shape,
- ts_.DType.SHAPE,
+ ts.DType.SHAPE,
np.array(new_shape),
name=shape_name_override if shape_name_override else output_name + "_shape",
)
- attr = ts_.TosaSerializerAttribute()
+ attr = ts.TosaSerializerAttribute()
attr.ReshapeAttribute()
tosa_graph.addOperator(
- ts_.TosaOp.Op().RESHAPE,
+ ts.TosaOp.Op().RESHAPE,
[input_name, shape.name],
[output_name],
attr,
)
-def reshape_for_broadcast(tosa_fb, inputs, dim_order=None):
- assert len(inputs) == 2
- input1 = inputs[0]
- input2 = inputs[1]
-
- def get_new_shape(l_rank_in, h_rank_in):
- rank_diff = len(h_rank_in.shape) - len(l_rank_in.shape)
- new_shape = list(l_rank_in.shape)
-
- for _ in range(rank_diff):
- new_shape.insert(0, 1)
- return tuple(new_shape)
-
- if len(input1.shape) == len(input2.shape):
- return input1, input2
- elif len(input1.shape) > len(input2.shape):
- l_rank_in = input2
- h_rank_in = input1
- elif len(input1.shape) < len(input2.shape):
- l_rank_in = input1
- h_rank_in = input2
-
- new_shape = get_new_shape(l_rank_in, h_rank_in)
- dim_order = h_rank_in.dim_order if dim_order is None else dim_order
- new_shape = tosa_shape(new_shape, dim_order)
-
- reshaped = tosa_fb.addIntermediate(
- new_shape,
- inputs[0].dtype,
- )
-
- build_reshape(tosa_fb, l_rank_in.name, new_shape, reshaped.name)
-
- if len(input1.shape) > len(input2.shape):
- return input1, reshaped
- else:
- return reshaped, input2
-
-
def is_consumer_node_depthwise_conv2d(node: Node):
consumer_node = list(node.users)[0]
if consumer_node.target == exir_ops.edge.aten.convolution.default:
@@ -338,35 +178,6 @@ def tosa_shape(shape, dim_order):
return removed_symints
-def expand_dims(
- tosa_graph: ts.TosaSerializer,
- input_node: TosaArg,
- dtype: int,
- dim: int,
-) -> Any:
- """Inserts TOSA operators into the tosa_graph, that perform the equivalent
- of the expand_dims (a.k.a unsqueeze) operation. A new axis is created at the
- dim location.
-
- Args:
- tosa_graph (ts.TosaSerializer): The TOSA graph to manipulate.
- input_node (TosaArg): The parent node of the expand dim operations.
- dtype (ts.DType): The data type expand dims operations.
- dim (int): The dimension to expand.
-
- Returns:
- Any: The output tensor of the inserted operation in the TOSA graph.
- """
- new_shape = list(input_node.shape)
- new_shape.insert(dim, 1)
-
- intermediate = tosa_graph.addIntermediate(new_shape, dtype)
-
- build_reshape(tosa_graph, input_node.name, new_shape, intermediate.name)
-
- return intermediate
-
-
def get_resize_parameters_1d(
input_size: int | torch.SymInt,
output_size: int | torch.SymInt,
diff --git a/backends/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt
index d541fafe957..47183bed21d 100644
--- a/backends/cadence/CMakeLists.txt
+++ b/backends/cadence/CMakeLists.txt
@@ -22,8 +22,9 @@ endif()
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
# Let files say "include ".
-set(_common_include_directories ${EXECUTORCH_ROOT}/..
- ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+set(_common_include_directories
+ ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+)
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
@@ -38,52 +39,58 @@ if(EXECUTORCH_CADENCE_CPU_RUNNER)
executorch_target_link_options_shared_lib(executorch)
executorch_target_link_options_shared_lib(portable_ops_lib)
- target_include_directories(executorch INTERFACE ${_common_include_directories})
+ target_include_directories(
+ executorch INTERFACE ${_common_include_directories}
+ )
find_package(
- gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../third-party
+ gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../third-party
)
- add_executable(cadence_runner
- ${EXECUTORCH_ROOT}/examples/devtools/example_runner/example_runner.cpp
+ add_executable(
+ cadence_runner
+ ${EXECUTORCH_ROOT}/examples/devtools/example_runner/example_runner.cpp
)
target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED)
target_include_directories(
- etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../devtools/include
- ${EXECUTORCH_ROOT}/third-party/flatcc/include
+ etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../devtools/include
+ ${EXECUTORCH_ROOT}/third-party/flatcc/include
)
target_include_directories(
- cadence_runner PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
- ${_common_include_directories}
+ cadence_runner PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
+ ${_common_include_directories}
)
target_link_libraries(
- cadence_runner
- executorch
- gflags
- etdump
- extension_data_loader
- bundled_program
- cadence_ops_lib
- flatccrt
+ cadence_runner
+ executorch
+ gflags
+ etdump
+ extension_data_loader
+ bundled_program
+ cadence_ops_lib
+ flatccrt
)
endif()
if(EXECUTORCH_NNLIB_OPT)
set(TARGET_DIR hifi)
- add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
- ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+ add_subdirectory(
+ ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
+ ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+ )
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
elseif(EXECUTORCH_FUSION_G3_OPT)
set(TARGET_DIR fusion_g3)
- add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
- ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+ add_subdirectory(
+ ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
+ ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+ )
else()
set(TARGET_DIR reference)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
endif()
-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)
diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS
index 8492bb55877..e257df37c8a 100644
--- a/backends/cadence/aot/TARGETS
+++ b/backends/cadence/aot/TARGETS
@@ -101,6 +101,7 @@ python_library(
":reorder_ops",
":replace_ops",
":simplify_ops",
+ ":type_dispatch",
":utils",
"//caffe2:torch",
"//executorch/exir:pass_base",
@@ -322,6 +323,37 @@ python_library(
],
)
+python_library(
+ name = "type_dispatch",
+ srcs = [
+ "type_dispatch.py",
+ ],
+ typing = True,
+ deps = [
+ "//caffe2:torch",
+ "//executorch/backends/cadence/aot:pass_utils",
+ "//executorch/exir:pass_base",
+ ],
+)
+
+python_unittest(
+ name = "test_type_dispatch_passes",
+ srcs = [
+ "tests/test_type_dispatch_passes.py",
+ ],
+ supports_static_listing = False,
+ typing = True,
+ deps = [
+ ":ops_registrations",
+ ":type_dispatch",
+ "//caffe2:torch",
+ "//executorch/backends/cadence/aot:graph_builder",
+ "//executorch/backends/cadence/aot:pass_utils",
+ "//executorch/exir:pass_base",
+ "//executorch/exir/dialects:lib",
+ ],
+)
+
python_library(
name = "typing_stubs",
srcs = [
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
index 26a0437ac25..eaabc6589b5 100644
--- a/backends/cadence/aot/compiler.py
+++ b/backends/cadence/aot/compiler.py
@@ -54,7 +54,7 @@
# if the quantizer here is different from the quantizer used to convert. It is
# however useful for unit tests to separate the converted model from the fused
# model, to be able to get reference numerics.
-# If this does not apply, please use quantize_and_fuse_pt2 instead.
+# If this does not apply, please use quantize_pt2 instead.
def trace(
model: torch.nn.Module,
inputs: tuple[object, ...],
@@ -85,6 +85,29 @@ def trace(
def prepare_pt2(
+ model: torch.nn.Module,
+ inputs: tuple[object, ...],
+ quantizer: CadenceQuantizer,
+ dump_graphs: bool = False,
+) -> torch.fx.GraphModule:
+ """
+ Trace and Prepare a model using the given quantizer.
+ The quantizer must be supplied and be the same as the one used to
+ fuse the model later, if applicable. If you do not expect that behavior,
+ please use quantize_pt2 instead, which will instantiate a
+ default quantizer for you if needed.
+ Returns a GraphModule with the prepared model.
+ """
+
+ traced_program = trace(model, inputs, dump_graphs=dump_graphs)
+ prepared_program = prepare_traced_pt2(
+ traced_program, quantizer, dump_graphs=dump_graphs
+ )
+
+ return prepared_program
+
+
+def prepare_traced_pt2(
program: ExportedProgram,
quantizer: CadenceQuantizer,
dump_graphs: bool = False,
@@ -93,7 +116,7 @@ def prepare_pt2(
Prepare a model using the given quantizer.
The quantizer must be supplied and be the same as the one used to
fuse the model later, if applicable. If you do not expect that behavior,
- please use quantize_and_fuse_pt2 instead, which will instantiate a
+ please use quantize_pt2 instead, which will instantiate a
default quantizer for you if needed.
Returns a GraphModule with the prepared model.
"""
@@ -137,7 +160,7 @@ def fuse_pt2(
"""
Fuse a converted graph module using the given quantizer.
The quantizer must be the same as the one used to convert the model.
- If you do not expect that behavior, please use quantize_and_fuse_pt2 instead,
+ If you do not expect that behavior, please use quantize_pt2 instead,
which will instantiate a default quantizer for you if needed.
Returns a GraphModule with the fused model.
"""
@@ -149,29 +172,18 @@ def fuse_pt2(
return converted_graph_module
-def quantize_pt2(
+# Note: quantizer is not optional here to force the user to supply a quantizer
+# and ensure consistency is more likely to be maintained.
+def get_fake_quant_model(
model: torch.nn.Module,
inputs: tuple[object, ...],
- quantizer: Optional[CadenceQuantizer] = None,
+ quantizer: CadenceQuantizer,
calibration_data: Optional[list[tuple[object, ...]]] = None,
dump_graphs: bool = False,
-) -> ExportedProgram:
- """
- Trace, prepare, convert and fuse the model using the given quantizer.
- If calibration data is provided, it will be used to calibrate the model. If
- not, the inputs will be used for calibration instead, which is useful for
- unit tests but should not be used for end-to-end use cases.
- Returns a GraphModule with the quantized model.
- Note: this function should not be called directly in general. Please use
- quantize_and_export_to_executorch for most needs.
- """
+) -> torch.fx.GraphModule:
# Make the model inference mode by calling model.eval()
model.eval()
- # Instantiate the quantizer to CadenceQuantizer if not supplied
- if not quantizer:
- quantizer = CadenceDefaultQuantizer()
-
program = trace(model, inputs, dump_graphs=dump_graphs)
if dump_graphs:
@@ -179,7 +191,7 @@ def quantize_pt2(
logging.info(program.graph.print_tabular())
# Get prepared graph module
- prepared_gm = prepare_pt2(program, quantizer, dump_graphs=dump_graphs)
+ prepared_gm = prepare_pt2(model, inputs, quantizer, dump_graphs=dump_graphs)
# Calibrate
# If no calibration data is provided, use the inputs
@@ -191,6 +203,37 @@ def quantize_pt2(
# Get converted graph module
converted_gm = convert_pt2(prepared_gm, dump_graphs=dump_graphs)
+ return converted_gm
+
+
+def quantize_pt2(
+ model: torch.nn.Module,
+ inputs: tuple[object, ...],
+ quantizer: Optional[CadenceQuantizer] = None,
+ calibration_data: Optional[list[tuple[object, ...]]] = None,
+ dump_graphs: bool = False,
+) -> ExportedProgram:
+ """
+ Trace, prepare, convert and fuse the model using the given quantizer.
+ If calibration data is provided, it will be used to calibrate the model. If
+ not, the inputs will be used for calibration instead, which is useful for
+ unit tests but should not be used for end-to-end use cases.
+ Returns a GraphModule with the quantized model.
+ Note: this function should not be called directly in general. Please use
+ quantize_and_export_to_executorch for most needs.
+ """
+ # Instantiate the quantizer to CadenceQuantizer if not supplied
+ if not quantizer:
+ quantizer = CadenceDefaultQuantizer()
+
+ # Get the converted (aka fake quant) graph module
+ converted_gm = get_fake_quant_model(
+ model,
+ inputs,
+ quantizer=quantizer,
+ calibration_data=calibration_data,
+ dump_graphs=dump_graphs,
+ )
# Get fused model
fused_gm = fuse_pt2(converted_gm, quantizer)
@@ -214,7 +257,7 @@ def quantize_pt2(
torch.ops.aten.angle.default,
torch.ops.aten.rms_norm.default,
]
-TO_EDGE_PRESERVE_OPS: list[torch._ops.OpOverload, ...] = [
+TO_EDGE_PRESERVE_OPS: list[torch._ops.OpOverload] = [
torch.ops.aten.rms_norm.default,
]
diff --git a/backends/cadence/aot/compiler_utils.py b/backends/cadence/aot/compiler_utils.py
index cabfb120341..b55d388691f 100644
--- a/backends/cadence/aot/compiler_utils.py
+++ b/backends/cadence/aot/compiler_utils.py
@@ -201,13 +201,6 @@ def contains_node_with_matching_target(
return any(node.target == op_target for node in nodes)
-def is_quantized_tensor(x: torch.Tensor) -> bool:
- """
- Return true if the tensor x is quantized
- """
- return x.is_quantized
-
-
def get_scale(x: torch.Tensor) -> torch.Tensor:
"""
Return the scale of a quantized tensor as a float32 tensor.
diff --git a/backends/cadence/aot/decompose_ops.py b/backends/cadence/aot/decompose_ops.py
index 60514c52902..7ee1bb36fef 100644
--- a/backends/cadence/aot/decompose_ops.py
+++ b/backends/cadence/aot/decompose_ops.py
@@ -7,9 +7,7 @@
# This file contains all the functions that decompose one op into simpler ops in the
-# graph. The functions decomposing ops for models deployed with Jarvis are grouped
-# together in class 'DecomposeOpsInGraph'. Some examples of functions in the class are
-# 1. functions that decompose an ATen gelu op into an equivalent series of simpler ops
+# graph.
# pyre-strict
diff --git a/backends/cadence/aot/export_example.py b/backends/cadence/aot/export_example.py
index 3bf126fb400..14d100ea1f8 100644
--- a/backends/cadence/aot/export_example.py
+++ b/backends/cadence/aot/export_example.py
@@ -19,7 +19,6 @@
export_to_executorch_gen_etrecord,
fuse_pt2,
prepare_pt2,
- trace,
)
from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer
@@ -50,11 +49,8 @@ def export_model(
# Instantiate the quantizer
quantizer = CadenceDefaultQuantizer()
- # Trace the model
- ep = trace(model, example_inputs)
-
# Prepare the model
- prepared_gm = prepare_pt2(ep, quantizer)
+ prepared_gm = prepare_pt2(model, example_inputs, quantizer)
# Calibrate the model
for samples in [example_inputs]:
diff --git a/backends/cadence/aot/functions.yaml b/backends/cadence/aot/functions.yaml
index 9dbf28f3114..196480931e0 100644
--- a/backends/cadence/aot/functions.yaml
+++ b/backends/cadence/aot/functions.yaml
@@ -190,10 +190,15 @@
- arg_meta: null
kernel_name: impl::reference::dequantize_per_tensor_out
-- func: cadence::quantized_conv.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
+- func: cadence::quantized_conv_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
- kernel_name: impl::reference::quantized_conv_out
+ kernel_name: impl::reference::quantized_conv_nchw_out
+
+- func: cadence::quantized_conv_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nhwc_out
- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
kernels:
@@ -209,6 +214,21 @@
- arg_meta: null
kernel_name: impl::reference::quantized_linear_out
+- func: cadence::quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_linear_per_tensor_out
+
+- func: cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_linear_asym8sxasym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_linear_asym8uxasym8u_asym8u_per_tensor_out
+
- func: cadence::quantized_relu.out(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
@@ -219,15 +239,45 @@
- arg_meta: null
kernel_name: impl::reference::quantized_relu_per_tensor_out
+- func: cadence::quantized_relu_asym8s_asym8s.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_relu_asym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_relu_asym8u_asym8u.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_relu_asym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_add.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_add_per_tensor_out
+
+- func: cadence::quantized_add_asym8sxasym8s_asym8s.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_add_asym8sxasym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_add_asym8uxasym8u_asym8u.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_add_asym8uxasym8u_asym8u_per_tensor_out
+
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::reference::quantized_matmul_out
-- func: cadence::quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+- func: cadence::quantized_matmul_asym8sxasym8s_asym8s.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
- kernel_name: impl::reference::quantized_linear_per_tensor_out
+ kernel_name: impl::reference::quantized_matmul_asym8sxasym8s_asym8s_out
+
+- func: cadence::quantized_matmul_asym8uxasym8u_asym8u.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_matmul_asym8uxasym8u_asym8u_out
- func: cadence::im2row.out(Tensor input, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, Tensor in_zero_point, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
kernels:
@@ -239,10 +289,75 @@
- arg_meta: null
kernel_name: impl::reference::im2row_per_tensor_out
-- func: cadence::quantized_conv.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
+- func: cadence::quantized_conv_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nchw_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nhwc_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
- kernel_name: impl::reference::quantized_conv_per_tensor_out
+ kernel_name: impl::reference::quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out
- func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
kernels:
@@ -254,6 +369,16 @@
- arg_meta: null
kernel_name: impl::reference::quantized_fully_connected_per_tensor_out
+- func: cadence::quantized_fully_connected_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_fully_connected_asym8sxasym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_fully_connected_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: impl::reference::quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor_out
+
- func: cadence::requantize.out(Tensor input, Tensor in_scale, Tensor in_zero_point, Tensor out_scale, Tensor out_zero_point, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml
index 04228f40be7..cf4c5a8fffb 100644
--- a/backends/cadence/aot/functions_hifi.yaml
+++ b/backends/cadence/aot/functions_hifi.yaml
@@ -290,10 +290,85 @@
- arg_meta: null
kernel_name: cadence::impl::HiFi::dequantize_per_tensor_out
-- func: cadence::quantized_conv.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
+- func: cadence::quantized_conv_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
- kernel_name: cadence::impl::HiFi::quantized_conv_out
+ kernel_name: cadence::impl::HiFi::quantized_conv_nchw_out
+
+- func: cadence::quantized_conv_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_out
+
+- func: cadence::quantized_conv_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nchw_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out
- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
kernels:
@@ -314,6 +389,16 @@
- arg_meta: null
kernel_name: cadence::impl::HiFi::quantized_linear_per_tensor_out
+- func: cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_linear_asym8sxasym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_linear_asym8uxasym8u_asym8u_per_tensor_out
+
- func: cadence::quantized_relu_per_tensor.out(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
@@ -329,17 +414,57 @@
- arg_meta: null
kernel_name: cadence::impl::HiFi::quantized_relu_per_tensor_out
-- func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+- func: cadence::quantized_relu_asym8s_asym8s.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
- kernel_name: cadence::impl::HiFi::quantized_fully_connected_out
+ kernel_name: cadence::impl::HiFi::quantized_relu_asym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_relu_asym8u_asym8u.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_relu_asym8u_asym8u_per_tensor_out
+
+- func: cadence::quantized_add_asym8sxasym8s_asym8s.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_add_asym8sxasym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_add_asym8uxasym8u_asym8u.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_add_asym8uxasym8u_asym8u_per_tensor_out
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: cadence::impl::HiFi::quantized_matmul_out
+- func: cadence::quantized_matmul_asym8sxasym8s_asym8s.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_matmul_asym8sxasym8s_asym8s_out
+
+- func: cadence::quantized_matmul_asym8uxasym8u_asym8u.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_matmul_asym8uxasym8u_asym8u_out
+
+- func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_fully_connected_out
+
- func: cadence::quantized_fully_connected.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: cadence::impl::HiFi::quantized_fully_connected_per_tensor_out
+
+- func: cadence::quantized_fully_connected_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_fully_connected_asym8sxasym8s_asym8s_per_tensor_out
+
+- func: cadence::quantized_fully_connected_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+ kernels:
+ - arg_meta: null
+ kernel_name: cadence::impl::HiFi::quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor_out
diff --git a/backends/cadence/aot/fuse_ops.py b/backends/cadence/aot/fuse_ops.py
index 16d4dbde32b..dbd19e1d3af 100644
--- a/backends/cadence/aot/fuse_ops.py
+++ b/backends/cadence/aot/fuse_ops.py
@@ -72,11 +72,13 @@ def fuse_mm_with_add(self, graph_module: torch.fx.GraphModule):
fuse it with mm.
"""
graph = graph_module.graph
- for node in graph.nodes:
+ for node in graph.find_nodes(
+ op="call_function", target=exir_ops.edge.aten.mm.default
+ ):
# We want to discover a chain of mm -> add, or mm -> view -> add.
# Only proceed if the current node is an mm node, and has only one
# user/successor.
- if node.target != exir_ops.edge.aten.mm.default or len(node.users) != 1:
+ if len(node.users) != 1:
continue
# Our addmm implementation computes (mat1 * mat2 + bias). So the
@@ -128,6 +130,7 @@ def fuse_mm_with_add(self, graph_module: torch.fx.GraphModule):
mm_arg_shape is None
or bias_arg_shape is None
or not broadcastable(mm_arg_shape, bias_arg_shape)
+ or len(bias_arg_shape) > 2
):
continue
diff --git a/backends/cadence/aot/memory_planning.py b/backends/cadence/aot/memory_planning.py
index 67da42a9d3c..ecf3fcef01c 100644
--- a/backends/cadence/aot/memory_planning.py
+++ b/backends/cadence/aot/memory_planning.py
@@ -116,6 +116,9 @@ def plan_spec(
Greedily place the spec in the first memory that can fit it.
"""
for spec.mem_id in range(1, self.get_num_memories()):
+ if placement_constraints.is_mem_id_in_blocklist(spec, spec.mem_id):
+ # Skip placement for blocked memory id.
+ continue
prev_offset, smallest_gap = 0, float("inf")
for allocated_spec in state.allocated_buffers[spec.mem_id]:
if not Verifier.lifetime_overlap(spec, allocated_spec):
@@ -141,11 +144,11 @@ def plan_spec(
)
if spec.mem_offset is None:
spec.mem_offset = prev_offset
- if not self.is_valid_placement(spec, placement_constraints):
- spec.mem_offset = None
- continue
- else:
- spec.mem_offset = prev_offset
+
+ if not self.is_valid_placement(spec, placement_constraints):
+ # Skip placement for invalid memory id.
+ spec.mem_offset = None
+ continue
state.place_spec(spec)
# A data structure used for maintaining the tensor order
diff --git a/backends/cadence/aot/memory_planning_algo.py b/backends/cadence/aot/memory_planning_algo.py
index 8193b73c9fd..672f48a55fd 100644
--- a/backends/cadence/aot/memory_planning_algo.py
+++ b/backends/cadence/aot/memory_planning_algo.py
@@ -204,7 +204,7 @@ def _place_memory_id_pinned_specs(
for spec, c in spec_with_abs_constraint.items()
if c is not None and c.pinned_memory_id == mem_id and c.offset is None
}
- logging.error(f"Placing specs {mem_id_pinned_specs} for {mem_id=}")
+ logging.debug(f"Placing specs {mem_id_pinned_specs} for {mem_id=}")
with self.block_memories_except(mem_id):
self.plan(
@@ -220,7 +220,7 @@ def _place_memory_id_pinned_specs(
if constraint is None:
continue
- logging.error(f"Placing spec {spec} with {constraint}")
+ logging.debug(f"Placing spec {spec} with {constraint}")
if not state.is_placed(spec):
raise MemoryError(
diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py
index 5713861103c..b88564e3ba5 100644
--- a/backends/cadence/aot/ops_registrations.py
+++ b/backends/cadence/aot/ops_registrations.py
@@ -56,10 +56,26 @@
lib.define(
"quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
)
+lib.define(
+ "quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
+ "int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
+ "int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
+)
lib.define(
"quantized_linear.per_tensor(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, "
"SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset) -> Tensor"
)
+lib.define(
+ "quantized_linear_asym8sxasym8s_asym8s.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
+ "int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_linear_asym8uxasym8u_asym8u.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
+ "int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
+)
lib.define(
"quantized_relu(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Y)"
@@ -69,24 +85,119 @@
)
lib.define(
- "quantized_conv(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False) -> (Tensor Z)"
+ "quantized_conv_nhwc(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)"
)
lib.define(
- "quantized_conv.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)"
+ "quantized_conv_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)"
)
lib.define(
- "quantized_conv.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False) -> (Tensor Z)"
+ "quantized_conv_nhwc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
)
lib.define(
- "quantized_conv.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)"
+ "quantized_conv_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nchw(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nchw.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
)
-
lib.define(
"quantized_matmul(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False) -> (Tensor Z)"
)
lib.define(
"quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False, *, Tensor(a!) out) -> Tensor(a!)"
)
+lib.define(
+ "quantized_matmul_asym8sxasym8s_asym8s(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_matmul_asym8sxasym8s_asym8s.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_matmul_asym8uxasym8u_asym8u(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_matmul_asym8uxasym8u_asym8u.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False, *, Tensor(a!) out) -> Tensor(a!)"
+)
lib.define(
"convolution(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, "
@@ -162,6 +273,14 @@
"quantized_fully_connected.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
)
+lib.define(
+ "quantized_fully_connected_asym8sxasym8s_asym8s.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
+ "int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
+)
+lib.define(
+ "quantized_fully_connected_asym8uxasym8u_asym8u.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
+ "int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
+)
lib.define("where_Scalar(Tensor condition, float self, float other) -> (Tensor Z)")
lib.define(
"where_Scalar.out(Tensor condition, float self, float other, *, Tensor(a!) out) -> Tensor(a!)"
@@ -208,6 +327,20 @@
"quantized_relu.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, "
"int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
)
+lib.define(
+ "quantized_relu_asym8s_asym8s.per_tensor(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, int out_shift) -> Tensor"
+)
+lib.define(
+ "quantized_relu_asym8s_asym8s.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, "
+ "int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_relu_asym8u_asym8u.per_tensor(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, int out_shift) -> Tensor"
+)
+lib.define(
+ "quantized_relu_asym8u_asym8u.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, "
+ "int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
+)
lib.define(
"quantized_add.out(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor Y, Tensor Y_scale, "
"Tensor Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
@@ -216,6 +349,22 @@
"quantized_add.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
"int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
)
+lib.define(
+ "quantized_add_asym8sxasym8s_asym8s.per_tensor(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
+ "int Y_zero_point, float out_scale, int out_zero_point) -> Tensor"
+)
+lib.define(
+ "quantized_add_asym8sxasym8s_asym8s.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
+ "int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_add_asym8uxasym8u_asym8u.per_tensor(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
+ "int Y_zero_point, float out_scale, int out_zero_point) -> Tensor"
+)
+lib.define(
+ "quantized_add_asym8uxasym8u_asym8u.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
+ "int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
+)
lib.define(
"quantized_mul.out(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor Y, Tensor Y_scale, "
"Tensor Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
@@ -240,6 +389,14 @@
"quantized_fully_connected.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
)
+lib.define(
+ "quantized_fully_connected_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
+ "int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+ "quantized_fully_connected_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
+ "int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
+)
lib.define(
"quantized_embedding_byte.out(Tensor weight, Tensor weight_scales, Tensor weight_zero_points, "
"Tensor indices, bool pruned_weights=False, *, Tensor(a!) out) -> Tensor(a!)"
@@ -386,6 +543,36 @@ def quantized_add_per_tensor_meta(
return X.new_empty(out_size, dtype=X.dtype)
+@register_fake("cadence::quantized_add_asym8sxasym8s_asym8s.per_tensor")
+def quantized_add_asym8sxasym8s_asym8s_per_tensor_meta(
+ X: torch.Tensor,
+ X_scale: float,
+ X_zero_point: int,
+ Y: torch.Tensor,
+ Y_scale: float,
+ Y_zero_point: int,
+ out_scale: float,
+ out_zero_point: int,
+) -> torch.Tensor:
+ out_size = torch.broadcast_shapes(X.size(), Y.size())
+ return X.new_empty(out_size, dtype=X.dtype)
+
+
+@register_fake("cadence::quantized_add_asym8uxasym8u_asym8u.per_tensor")
+def quantized_add_asym8uxasym8u_asym8u_per_tensor_meta(
+ X: torch.Tensor,
+ X_scale: float,
+ X_zero_point: int,
+ Y: torch.Tensor,
+ Y_scale: float,
+ Y_zero_point: int,
+ out_scale: float,
+ out_zero_point: int,
+) -> torch.Tensor:
+ out_size = torch.broadcast_shapes(X.size(), Y.size())
+ return X.new_empty(out_size, dtype=X.dtype)
+
+
@register_fake("cadence::quantized_linear")
def quantized_linear_meta(
src: torch.Tensor,
@@ -430,8 +617,52 @@ def quantized_linear_per_tensor_meta(
return src.new_empty(out_size, dtype=src.dtype)
-@register_fake("cadence::quantized_conv")
-def quantized_conv_meta(
+@register_fake("cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor")
+def quantized_linear_asym8sxasym8s_asym8s_per_tensor_meta(
+ src: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ in_zero_point: int,
+ weight_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+ out_zero_point: int,
+ offset: Optional[torch.Tensor],
+) -> torch.Tensor:
+ # src comes in shape [leading_dims, in_dim]
+ # weight comes in shape [out_dim, in_dim]
+ # output comes in empty with shape [leading_dims, out_dim]
+ out_size = list(src.size())
+ weight_size = list(weight.size())
+ assert len(weight_size) == 2
+ out_size[-1] = weight_size[0]
+ return src.new_empty(out_size, dtype=src.dtype)
+
+
+@register_fake("cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor")
+def quantized_linear_asym8uxasym8u_asym8u_per_tensor_meta(
+ src: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ in_zero_point: int,
+ weight_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+ out_zero_point: int,
+ offset: Optional[torch.Tensor],
+) -> torch.Tensor:
+ # src comes in shape [leading_dims, in_dim]
+ # weight comes in shape [out_dim, in_dim]
+ # output comes in empty with shape [leading_dims, out_dim]
+ out_size = list(src.size())
+ weight_size = list(weight.size())
+ assert len(weight_size) == 2
+ out_size[-1] = weight_size[0]
+ return src.new_empty(out_size, dtype=src.dtype)
+
+
+@register_fake("cadence::quantized_conv_nhwc")
+def quantized_conv_nhwc_meta(
input: torch.Tensor,
weight: torch.Tensor,
bias: torch.Tensor,
@@ -446,12 +677,8 @@ def quantized_conv_meta(
output_zero_point: int,
out_multiplier: torch.Tensor,
out_shift: torch.Tensor,
- channel_last: bool = False,
) -> torch.Tensor:
- if channel_last:
- out_channels, *kernel_size, _ = weight.shape
- else:
- out_channels, _, *kernel_size = weight.shape
+ out_channels, *kernel_size, _ = weight.shape
in_size = input.shape
# Assert that the input tensor has at least 3 dimensions, and at most 6
@@ -467,19 +694,19 @@ def quantized_conv_meta(
padding[1],
dilation[1],
kernel_size[0],
- channel_last,
+ True,
)
if len(in_size) == 3
else get_conv2d_output_size(
- in_size, out_channels, stride, padding, dilation, kernel_size, channel_last
+ in_size, out_channels, stride, padding, dilation, kernel_size, True
)
)
return input.new_empty(output_size, dtype=input.dtype)
-@register_fake("cadence::quantized_conv.per_tensor")
-def quantized_conv_per_tensor_meta(
+@register_fake("cadence::quantized_conv_nchw")
+def quantized_conv_nchw_meta(
input: torch.Tensor,
weight: torch.Tensor,
bias: torch.Tensor,
@@ -488,18 +715,14 @@ def quantized_conv_per_tensor_meta(
dilation: Tuple[int],
groups: int,
in_zero_point: int,
- weight_zero_point: int,
- bias_scale: float,
+ weight_zero_point: torch.Tensor,
+ bias_scale: torch.Tensor,
output_scale: float,
output_zero_point: int,
- out_multiplier: int,
- out_shift: int,
- channel_last: bool = False,
+ out_multiplier: torch.Tensor,
+ out_shift: torch.Tensor,
) -> torch.Tensor:
- if channel_last:
- out_channels, *kernel_size, _ = weight.shape
- else:
- out_channels, _, *kernel_size = weight.shape
+ out_channels, _, *kernel_size = weight.shape
in_size = input.shape
# Assert that the input tensor has at least 3 dimensions, and at most 6
@@ -515,48 +738,664 @@ def quantized_conv_per_tensor_meta(
padding[1],
dilation[1],
kernel_size[0],
- channel_last,
+ False,
)
if len(in_size) == 3
else get_conv2d_output_size(
- in_size, out_channels, stride, padding, dilation, kernel_size, channel_last
+ in_size, out_channels, stride, padding, dilation, kernel_size, False
)
)
return input.new_empty(output_size, dtype=input.dtype)
-@register_fake("cadence::quantized_layer_norm")
-def quantized_layer_norm_meta(
+@register_fake("cadence::quantized_conv_nchw.per_tensor")
+def quantized_conv_nchw_per_tensor_meta(
input: torch.Tensor,
- X_scale: torch.Tensor,
- X_zero_point: torch.Tensor,
- normalized_shape: int,
weight: torch.Tensor,
bias: torch.Tensor,
- eps: float,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
output_scale: float,
output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
) -> torch.Tensor:
- return input.new_empty(input.size(), dtype=input.dtype)
+ out_channels, _, *kernel_size = weight.shape
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
-@register_fake("cadence::quantized_layer_norm.per_tensor")
-def quantized_layer_norm_per_tensor_meta(
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ False,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, False
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nhwc.per_tensor")
+def quantized_conv_nhwc_per_tensor_meta(
input: torch.Tensor,
- X_scale: float,
- X_zero_point: int,
- normalized_shape: int,
weight: torch.Tensor,
bias: torch.Tensor,
- eps: float,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
output_scale: float,
output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
) -> torch.Tensor:
- return input.new_empty(input.size(), dtype=input.dtype)
+ out_channels, *kernel_size, _ = weight.shape
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
-@register_fake("cadence::quantized_relu")
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ True,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, True
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor")
+def quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, _, *kernel_size = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ False,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, False
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor")
+def quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, _, *kernel_size = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ False,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, False
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor")
+def quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, *kernel_size, _ = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ True,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, True
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor")
+def quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, *kernel_size, _ = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ True,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, True
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor")
+def quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, _, *kernel_size = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ False,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, False
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor")
+def quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, _, *kernel_size = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ False,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, False
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor")
+def quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, *kernel_size, _ = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ True,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, True
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor")
+def quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, *kernel_size, _ = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ True,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, True
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor")
+def quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, _, *kernel_size = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ False,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, False
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor")
+def quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, _, *kernel_size = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ False,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, False
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor")
+def quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, *kernel_size, _ = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ True,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, True
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor")
+def quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_meta(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ stride: Tuple[int],
+ padding: Tuple[int],
+ dilation: Tuple[int],
+ groups: int,
+ in_zero_point: int,
+ weight_zero_point: int,
+ bias_scale: float,
+ output_scale: float,
+ output_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ out_channels, *kernel_size, _ = weight.shape
+
+ in_size = input.shape
+ # Assert that the input tensor has at least 3 dimensions, and at most 6
+ assert len(in_size) > 2
+ assert len(in_size) < 6
+
+ # Compute the output tensor size
+ output_size = (
+ get_conv1d_output_size(
+ in_size,
+ out_channels,
+ stride[1],
+ padding[1],
+ dilation[1],
+ kernel_size[0],
+ True,
+ )
+ if len(in_size) == 3
+ else get_conv2d_output_size(
+ in_size, out_channels, stride, padding, dilation, kernel_size, True
+ )
+ )
+
+ return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_layer_norm")
+def quantized_layer_norm_meta(
+ input: torch.Tensor,
+ X_scale: torch.Tensor,
+ X_zero_point: torch.Tensor,
+ normalized_shape: int,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ eps: float,
+ output_scale: float,
+ output_zero_point: int,
+) -> torch.Tensor:
+ return input.new_empty(input.size(), dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_layer_norm.per_tensor")
+def quantized_layer_norm_per_tensor_meta(
+ input: torch.Tensor,
+ X_scale: float,
+ X_zero_point: int,
+ normalized_shape: int,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ eps: float,
+ output_scale: float,
+ output_zero_point: int,
+) -> torch.Tensor:
+ return input.new_empty(input.size(), dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_relu")
def quantized_relu_meta(
X: torch.Tensor,
X_zero_point: torch.Tensor,
@@ -610,6 +1449,92 @@ def quantized_matmul_meta(
return X.new_empty(out_size, dtype=X.dtype)
+@register_fake("cadence::quantized_matmul_asym8sxasym8s_asym8s")
+def quantized_matmul_asym8sxasym8s_asym8s_meta(
+ X: torch.Tensor,
+ X_zero_point: int,
+ Y: torch.Tensor,
+ Y_zero_point: int,
+ bias: Optional[torch.Tensor],
+ out_multiplier: int,
+ out_shift: int,
+ out_zero_point: int,
+ transposed: bool = False,
+) -> torch.Tensor:
+ X_size = list(X.size())
+ Y_size = list(Y.size())
+
+ # Get the batch dimensions for both tensors
+ X_batch_dims = X_size[:-2]
+ Y_batch_dims = Y_size[:-2]
+
+ # If they don't match, check that they're compatible
+ if X_batch_dims != Y_batch_dims:
+ assert prod(X_batch_dims) == prod(
+ Y_batch_dims
+ ), f"Batch dimensions of X and Y do not match: {X_batch_dims} vs {Y_batch_dims}"
+
+ # Get the matmul output size
+ if transposed:
+ assert X_size[-1] == Y_size[-1], "matrices cannot be multiplied"
+ mat_size = [X_size[-2], Y_size[-2]]
+ else:
+ assert X_size[-1] == Y_size[-2], "matrices cannot be multiplied"
+ mat_size = [X_size[-2], Y_size[-1]]
+
+ # Combine the larger batch dimensions with the matmul output size
+ out_size = (
+ X_batch_dims + mat_size
+ if len(X_batch_dims) > len(Y_batch_dims)
+ else Y_batch_dims + mat_size
+ )
+
+ return X.new_empty(out_size, dtype=X.dtype)
+
+
+@register_fake("cadence::quantized_matmul_asym8uxasym8u_asym8u")
+def quantized_matmul_asym8uxasym8u_asym8u_meta(
+ X: torch.Tensor,
+ X_zero_point: int,
+ Y: torch.Tensor,
+ Y_zero_point: int,
+ bias: Optional[torch.Tensor],
+ out_multiplier: int,
+ out_shift: int,
+ out_zero_point: int,
+ transposed: bool = False,
+) -> torch.Tensor:
+ X_size = list(X.size())
+ Y_size = list(Y.size())
+
+ # Get the batch dimensions for both tensors
+ X_batch_dims = X_size[:-2]
+ Y_batch_dims = Y_size[:-2]
+
+ # If they don't match, check that they're compatible
+ if X_batch_dims != Y_batch_dims:
+ assert prod(X_batch_dims) == prod(
+ Y_batch_dims
+ ), f"Batch dimensions of X and Y do not match: {X_batch_dims} vs {Y_batch_dims}"
+
+ # Get the matmul output size
+ if transposed:
+ assert X_size[-1] == Y_size[-1], "matrices cannot be multiplied"
+ mat_size = [X_size[-2], Y_size[-2]]
+ else:
+ assert X_size[-1] == Y_size[-2], "matrices cannot be multiplied"
+ mat_size = [X_size[-2], Y_size[-1]]
+
+ # Combine the larger batch dimensions with the matmul output size
+ out_size = (
+ X_batch_dims + mat_size
+ if len(X_batch_dims) > len(Y_batch_dims)
+ else Y_batch_dims + mat_size
+ )
+
+ return X.new_empty(out_size, dtype=X.dtype)
+
+
@register_fake("cadence::im2row")
def im2row_meta(
input: torch.Tensor,
@@ -694,6 +1619,28 @@ def quantized_relu_per_tensor_meta(
return input.new_empty(input.size(), dtype=input.dtype)
+@register_fake("cadence::quantized_relu_asym8s_asym8s.per_tensor")
+def quantized_relu_asym8s_asym8s_per_tensor_meta(
+ input: torch.Tensor,
+ in_zero_point: int,
+ out_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ return input.new_empty(input.size(), dtype=input.dtype)
+
+
+@register_fake("cadence::quantized_relu_asym8u_asym8u.per_tensor")
+def quantized_relu_asym8u_asym8u_per_tensor_meta(
+ input: torch.Tensor,
+ in_zero_point: int,
+ out_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+) -> torch.Tensor:
+ return input.new_empty(input.size(), dtype=input.dtype)
+
+
@register_fake("cadence::fully_connected")
def fully_connected_meta(
src: torch.Tensor,
@@ -754,6 +1701,50 @@ def quantized_fully_connected_per_tensor_meta(
return src.new_empty(out_size, dtype=src.dtype)
+@register_fake("cadence::quantized_fully_connected_asym8sxasym8s_asym8s.per_tensor")
+def quantized_fully_connected_asym8sxasym8s_asym8s_per_tensor_meta(
+ src: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ in_zero_point: int,
+ weight_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+ out_zero_point: int,
+ offset: Optional[torch.Tensor],
+) -> torch.Tensor:
+ # src comes in shape [leading_dims, in_dim]
+ # weight comes in shape [out_dim, in_dim]
+ # output comes in empty with shape [leading_dims, out_dim]
+ out_size = list(src.size())
+ weight_size = list(weight.size())
+ assert len(weight_size) == 2
+ out_size[-1] = weight_size[0]
+ return src.new_empty(out_size, dtype=src.dtype)
+
+
+@register_fake("cadence::quantized_fully_connected_asym8uxasym8u_asym8u.per_tensor")
+def quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor_meta(
+ src: torch.Tensor,
+ weight: torch.Tensor,
+ bias: torch.Tensor,
+ in_zero_point: int,
+ weight_zero_point: int,
+ out_multiplier: int,
+ out_shift: int,
+ out_zero_point: int,
+ offset: Optional[torch.Tensor],
+) -> torch.Tensor:
+ # src comes in shape [leading_dims, in_dim]
+ # weight comes in shape [out_dim, in_dim]
+ # output comes in empty with shape [leading_dims, out_dim]
+ out_size = list(src.size())
+ weight_size = list(weight.size())
+ assert len(weight_size) == 2
+ out_size[-1] = weight_size[0]
+ return src.new_empty(out_size, dtype=src.dtype)
+
+
@register_fake("cadence::convolution")
def convolution_meta(
input: torch.Tensor,
@@ -808,7 +1799,7 @@ def transposed_convolution_meta(
) -> torch.Tensor:
# The native definition of torch transposed conv will have weight shape as
# (in_channels, out_channels/groups, *kernel_size).
- # However, the two channel position is flipped in the Jarvis pass of replacing it
+ # However, the two channel position is flipped in the Cadence pass of replacing it
# with cadence::transposed_convolution here: https://fburl.com/code/d2s7pkyy
out_channels, _input_channels, *kernel_size = weight.shape
out_channels *= groups
diff --git a/backends/cadence/aot/pass_utils.py b/backends/cadence/aot/pass_utils.py
index b004f714f2b..9aedef2ce2f 100644
--- a/backends/cadence/aot/pass_utils.py
+++ b/backends/cadence/aot/pass_utils.py
@@ -13,7 +13,7 @@
from executorch.backends.cadence.aot.utils import get_edge_overload_packet
from executorch.exir.dialects.edge._ops import EdgeOpOverload, EdgeOpOverloadPacket
-from executorch.exir.pass_base import PassBase
+from executorch.exir.pass_base import PassBase, PassResult
from torch._ops import OpOverloadPacket
@@ -224,3 +224,8 @@ def set_arg(
node.update_arg(idx, value)
else:
node.update_kwarg(kwarg_name, value)
+
+
+def none_throws(x: Optional[PassResult]) -> PassResult:
+ assert x is not None
+ return x
diff --git a/backends/cadence/aot/passes.py b/backends/cadence/aot/passes.py
index d7c692f12e9..bb4a8f065d5 100644
--- a/backends/cadence/aot/passes.py
+++ b/backends/cadence/aot/passes.py
@@ -33,6 +33,7 @@
ReplaceMulTensorWithMulAndFullOpsPass,
)
from executorch.backends.cadence.aot.simplify_ops import CadenceSimplifyOpsInGraph
+from executorch.backends.cadence.aot.type_dispatch import CompileTimeTypeDispatchPass
from executorch.exir import EdgeProgramManager
from executorch.exir.pass_base import ExportPass, PassResult
from executorch.exir.pass_manager import PassManager, PassType
@@ -90,6 +91,7 @@ def get_passes_in_default_order() -> list[Type[ExportPass]]:
FuseFullThenReshapePass,
FuseTransposeOrPermuteOpPairsPass,
RemoveNopSliceOrViewOpPass,
+ CompileTimeTypeDispatchPass,
]
return pytree.tree_flatten(passes)[0]
diff --git a/backends/cadence/aot/program_builder.py b/backends/cadence/aot/program_builder.py
index 0bb71c95a4a..d73cc9fcfbf 100644
--- a/backends/cadence/aot/program_builder.py
+++ b/backends/cadence/aot/program_builder.py
@@ -2,14 +2,15 @@
# pyre-strict
+from enum import auto, Enum
from typing import Optional
from executorch.backends.cadence.aot.graph_builder import GraphBuilder
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
from executorch.exir.pass_base import ProxyValue
from executorch.exir.verification.verifier import EXIREdgeDialectVerifier
-
from torch import Tensor
+from torch._export.verifier import Verifier
from torch.export import ExportedProgram
from torch.export.graph_signature import (
ExportGraphSignature,
@@ -21,14 +22,20 @@
)
+class IrMode(Enum):
+ EXIR = auto()
+ ATEN = auto()
+
+
class ProgramBuilder(GraphBuilder):
"""Utility class to build a program from a graph module."""
- def __init__(self) -> None:
+ def __init__(self, mode: Optional[IrMode] = None) -> None:
self.input_specs: list[InputSpec] = []
self.output_specs: list[OutputSpec] = []
self.constants: dict[str, Tensor] = {}
self.state_dict: dict[str, Tensor] = {}
+ self.mode: IrMode = mode or IrMode.EXIR
super().__init__()
def insert_input_spec(
@@ -68,6 +75,16 @@ def output(
)
return super().output(results)
+ def get_verifiers(self) -> Optional[list[Verifier]]:
+ if self.mode == IrMode.ATEN:
+ return None
+ return [
+ EXIREdgeDialectVerifier(
+ edge_compile_config=EdgeCompileConfig(_check_ir_validity=False),
+ class_only=True,
+ )
+ ]
+
def get_program(self) -> ExportedProgram:
gm = self.get_graph_module()
return ExportedProgram(
@@ -81,12 +98,8 @@ def get_program(self) -> ExportedProgram:
state_dict=self.state_dict,
range_constraints={},
module_call_graph=[],
- verifiers=[
- EXIREdgeDialectVerifier(
- edge_compile_config=EdgeCompileConfig(_check_ir_validity=False),
- class_only=True,
- )
- ],
+ # pyre-ignore[6]: Incompatible parameter type.
+ verifiers=self.get_verifiers(),
)
def get_edge_program(self) -> EdgeProgramManager:
diff --git a/backends/cadence/aot/quantizer/fusion_pass.py b/backends/cadence/aot/quantizer/fusion_pass.py
index a726f6c7fba..729056ea2c8 100644
--- a/backends/cadence/aot/quantizer/fusion_pass.py
+++ b/backends/cadence/aot/quantizer/fusion_pass.py
@@ -331,7 +331,6 @@ def get_args_and_kwargs_conv(
"out_zero_point": quant_node.args[2],
"out_multiplier": out_multiplier_,
"out_shift": out_shift_,
- "channel_last": False,
}
return args, kwargs
diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py
index 88c16139733..74987f8b38d 100644
--- a/backends/cadence/aot/quantizer/patterns.py
+++ b/backends/cadence/aot/quantizer/patterns.py
@@ -247,7 +247,7 @@ def get_anchors(
)
def replacement_op(self) -> OpOverload:
- return torch.ops.cadence.quantized_conv.default
+ return torch.ops.cadence.quantized_conv_nchw.default
class Conv2dPattern(QuantizationPattern):
@@ -286,7 +286,7 @@ def get_anchors(
)
def replacement_op(self) -> OpOverload:
- return torch.ops.cadence.quantized_conv.default
+ return torch.ops.cadence.quantized_conv_nchw.default
class LayerNormPattern(QuantizationPattern):
diff --git a/backends/cadence/aot/remove_ops.py b/backends/cadence/aot/remove_ops.py
index 4721e5a1926..663c5825e52 100644
--- a/backends/cadence/aot/remove_ops.py
+++ b/backends/cadence/aot/remove_ops.py
@@ -7,16 +7,6 @@
# pyre-strict
-# This file contains functions to remove operators from the graph. The removed
-# ops should belong to either of the following categories:
-# 1. The op should be redundant for inference (e.g., dropout). Such ops are grouped
-# together in 'RemoveRedundantOps'. Anyone running inference can add this class
-# in their pass list, and it should semantic-preserving transformation.
-# 2. The op should be redundant for Jarvis (e.g., contiguous). Such ops are grouped
-# together in 'CadenceRemoveNops'. The ops removed in this class might not be nop
-# in a context outside of Jarvis', so exercise caution while invoking this in a
-# pass list outside of Jarvis.
-
import logging
from dataclasses import dataclass, field
from typing import cast, List, Optional, Sequence, Set
@@ -152,7 +142,7 @@ def call_operator(
@register_cadence_pass(CadencePassAttribute(opt_level=0))
class RemoveToOpsPass(ExportPass):
- # aten.to.* as of now are all nops for Jarvis
+ # aten.to.* as of now are all nops
def call_operator(
self,
op, # pyre-ignore
@@ -413,7 +403,7 @@ def call_operator(
class RemoveAliasCopyOpPass(ExportPass):
"""
- alias_copy is a no-op for Jarvis and can be removed.
+ alias_copy is a no-op and can be removed.
"""
def call_operator(
@@ -936,10 +926,6 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
return super().call(graph_module)
-# The following class consolidates functions to remove ops that are redundant
-# in Jarvis. Currently, each function in this class iterates over each node of
-# the graph module once. In future, we could consolidate them into a monolithic
-# function.
class CadenceRemoveNops:
passes = [
SimplifySliceOpPass,
diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py
index 8e6516cadba..7f493e1645d 100644
--- a/backends/cadence/aot/replace_ops.py
+++ b/backends/cadence/aot/replace_ops.py
@@ -7,12 +7,7 @@
# This file contains all the functions that replace one op with another in the
-# graph. The functions replacing ops for models deployed with Jarvis are grouped
-# together in class 'ReplaceOpsInGraph'. Some examples of functions in the class are
-# 1. functions that replace an ATen op with a custom op that accepts extra arguments
-# 2. functions that replace in-place variants of ATen ops with out-of-place version.
-# 3. functions that replace an ATen op with another semantically equivalent ATen op.
-# 4. functions that concretize optional args.
+# graph.
# pyre-unsafe
@@ -20,17 +15,15 @@
import math
import operator
from operator import neg
-from typing import cast, Dict, Iterable, Optional, Sequence, Set, Tuple
+from typing import cast, Dict, Iterable, Optional, Sequence, Tuple
import torch
import torch.fx
from executorch.backends.cadence.aot.compiler_utils import (
get_shape,
get_tensor_from_attr,
- get_transposed_dims,
get_zero_point,
is_node_with_op,
- is_quantized_tensor,
quantize_tensor_multiplier,
)
from executorch.backends.cadence.aot.fuse_ops import (
@@ -39,6 +32,7 @@
)
from executorch.backends.cadence.aot.pass_utils import (
CadencePassAttribute,
+ none_throws,
register_cadence_pass,
)
from executorch.backends.cadence.aot.remove_ops import RemoveNopSelectOpPass
@@ -53,7 +47,7 @@
from torch.fx.node import Argument
# A map to represent ops that:
-# (a) are functionally equivalent wrt. Jarvis; and
+# (a) are functionally equivalent; and
# (b) have identical arguments
# An op whose target is 'key' in this dict can be replaced by the functionally euivalent
# op whose target is 'value'. The replacement would just involve changing the op target.
@@ -649,7 +643,7 @@ def call_operator(self, op, args, kwargs, meta):
# Make that pass runnable standalone at opt level 0.
@register_cadence_pass(CadencePassAttribute(opt_level=0))
-class ReplaceAtenConvolutionWithJarvisConvolutionPass(ExportPass):
+class ReplaceAtenConvolutionWithCadenceConvolutionPass(ExportPass):
"""
Replace aten convolution op with jarvis-specific convolution op, since the
aten version is not supported by jarvis.
@@ -776,186 +770,6 @@ def call_operator(self, op, args, kwargs, meta):
return super().call_operator(target, new_args, kwargs, meta)
-# TODO(matthiascremon): this is a fuse op, not a replace op
-class ReplaceConvWithChannelLastConv:
- """
- Convolution op in pytorch expects NCHW layout for input, weight, and output
- tensors. However, if the input and output to the convolution op are originally
- in NWHC layout, and are then permuted to conform to NCHW layout, we can fuse
- the two permute ops with the convolution op, and call the NHWC layout
- convolution op in Jarvis.
- """
-
- def __init__(self):
- self.counter = 0
- self.graph_module = None
-
- def __call__(self, graph_module: torch.fx.GraphModule):
- self.replace_conv_with_nhwc_conv(graph_module)
-
- def conv_layout_is_nhwc(self, node: torch.fx.Node) -> bool:
- """
- Return true if the convolution input and output are connected to permute
- ops, and the input/output to/from the permute ops is NHWC layout tensor.
- """
- # There must only be a single user of the output node (which must be a
- # permute/tranpsose op). The input of the convolution must be connected
- # to a permute op, and that permute op should have a single user.
- conv_inp = node.args[0]
- assert isinstance(conv_inp, torch.fx.Node)
- if len(node.users) != 1 or len(conv_inp.users) != 1:
- return False
-
- # Get the input and output (permute/transpose) nodes of the convolution
- conv_user = list(node.users.keys())[0]
- assert isinstance(conv_user, torch.fx.Node)
- pt_nodes: Set[torch.fx.Node] = {conv_inp, conv_user}
-
- # Any node in pt_nodes must not be a placeholder.
- if contains_placeholder_or_param(pt_nodes):
- return False
-
- # Determine if the convolution is 1d or 2d. The output tensor must be
- # 3- or 4-dimensional
- out_shape = get_shape(self.graph_module, node)
- assert out_shape is not None
- out_dims = len(out_shape)
- assert out_dims in {3, 4}, "Jarvis only supports conv1d and conv2d"
- conv1d = out_dims == 3
-
- # Get the possible targets for the nodes in pt_nodes. Since conv1d has
- # 3-dimensional input and output tensors, the nodes in pt_nodes could
- # be either permute or transpose op. For conv2d, the nodes in pt_nodes
- # must be permute ops.
- p_target = exir_ops.edge.aten.permute_copy.default
- t_target = exir_ops.edge.aten.transpose_copy.int
- pt_targets = [p_target] + ([t_target] if conv1d else [])
-
- # If any node in pt_nodes is not permute op (or tranpose op for conv1d),
- # bail.
- if any(x.target not in pt_targets for x in pt_nodes):
- return False
-
- # Now we need to determine the dimension permutations:
- # If the input had NHWC layout, which was then permuted/transposed
- # by a permute/transpose op to NCHW layout, the permutation must be
- # [0, 3, 2, 1] (or [0, 2, 1] for conv1d).
- # If the output had NCHW layout, and was then permuted to NHWC layout,
- # the permutation must be [0, 2, 3, 1] (or [0, 2, 1] for conv1d).
- nhwc_permute_order = {
- node.args[0]: [0, 2, 1] if conv1d else [0, 3, 1, 2],
- list(node.users.keys())[0]: [0, 2, 1] if conv1d else [0, 2, 3, 1],
- }
- for x in pt_nodes:
- order = (
- x.args[1]
- if x.target == p_target
- else get_transposed_dims(x, list(range(out_dims)))
- )
- if order != nhwc_permute_order[x]:
- return False
-
- return True
-
- def replace_conv_with_nhwc_conv(self, graph_module: torch.fx.GraphModule):
- self.graph_module = graph_module
- graph = graph_module.graph
- for node in graph.nodes:
- # We are only interested in convolution nodes that have NHWC layout
- if node.target not in {
- exir_ops.edge.cadence.quantized_conv.default,
- exir_ops.edge.cadence.convolution.default,
- exir_ops.edge.cadence.quantized_transposed_conv.default,
- exir_ops.edge.cadence.transposed_convolution.default,
- } or not self.conv_layout_is_nhwc(node):
- continue
-
- # Get the args of convolution op
- args = list(node.args)
- # The input is connected to a permute/transpose op that converts the
- # NHWC layout to NCHW layout. The input of the permute op will become
- # this convolution op's input.
- in_tp = args[0]
- args[0] = in_tp.args[0]
- # The weight is in NHWC layout. Permute it to NHWC layout.
- weight_tensor = get_tensor_from_attr(graph_module, args[1])
- assert isinstance(weight_tensor, torch.Tensor)
- # We cannot directly permute a per-channel quantized tensor. We will
- # dequantize it, permute the fp32 tensor, and then requantize the
- # permuted tensor.
- if (
- is_quantized_tensor(weight_tensor)
- and weight_tensor.qscheme() == torch.per_channel_affine
- ):
- # We have already asserted during quantizing conv op that the
- # quantization axis is 0.
- dequant_weight = weight_tensor.dequantize()
- dequant_weight = (
- dequant_weight.permute([0, 2, 1])
- if dequant_weight.dim() == 3
- else dequant_weight.permute([0, 2, 3, 1])
- )
- weight_tensor = torch.quantize_per_channel(
- dequant_weight.contiguous(),
- weight_tensor.q_per_channel_scales(),
- weight_tensor.q_per_channel_zero_points(),
- 0,
- weight_tensor.dtype,
- )
- else:
- weight_tensor = (
- weight_tensor.permute([0, 2, 1])
- if weight_tensor.dim() == 3
- else weight_tensor.permute([0, 2, 3, 1])
- )
- # Make the weight tensor contiguous, since we have permuted it.
- weight_tensor = weight_tensor.contiguous()
- # Add the permuted weight into the graph, and update the weight in
- # args.
- with graph.inserting_before(node):
- weight_name = f"_weight_nhwc_{self.counter}"
- graph_module.register_buffer(weight_name, weight_tensor)
- weight = graph.get_attr(weight_name)
- args[1] = weight
-
- # The 'channel_last' arg is True. It is the last arg.
- args[-1] = True
- # Now update the convolution node args to mark it as NHWC convolution
- node.args = tuple(args)
-
- # Replace all the uses of the permute op connected to the output op
- # with this convolution.
- out_tp = list(node.users.keys())[0]
- out_tp.replace_all_uses_with(node)
- node.meta = out_tp.meta
-
- # Erase the permute ops connected to the input and output of the
- # convolution op.
- graph.erase_node(in_tp)
- graph.erase_node(out_tp)
- self.counter += 1
-
- graph_module.recompile()
-
-
-# This pass needs to be reworked to be compatible with PT2. It is an optimization
-# pass anyway, so move it to opt level 2.
-# TODO: T213724613 update and improve this pass.
-# @register_cadence_pass(CadencePassAttribute(opt_level=2))
-class ReplaceConvWithChannelLastConvPass(ExportPass):
- """
- Replace the ATen convolution op with custom conv op with NCHW or NHWC layout
- input tensors, depending on the presence of permute/transpose ops connected
- to the input tensor.
- """
-
- def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
- result = ReplaceAtenConvolutionWithJarvisConvolutionPass()(graph_module)
- assert result is not None
- ReplaceConvWithChannelLastConv()(result.graph_module)
- return result
-
-
@register_cadence_pass(CadencePassAttribute(opt_level=2))
class ReplaceTrivialConvWithLinear(ExportPass):
"""
@@ -973,7 +787,8 @@ class ReplaceTrivialConvWithLinear(ExportPass):
trivial_conv_op_to_linear_op: Dict[EdgeOpOverload, EdgeOpOverload] = {
exir_ops.edge.cadence.convolution.default: exir_ops.edge.aten.linear.default,
- exir_ops.edge.cadence.quantized_conv.default: exir_ops.edge.cadence.quantized_linear.default,
+ exir_ops.edge.cadence.quantized_conv_nchw.default: exir_ops.edge.cadence.quantized_linear.default,
+ exir_ops.edge.cadence.quantized_conv_nhwc.default: exir_ops.edge.cadence.quantized_linear.default,
}
def call_operator(self, op, args, kwargs, meta):
@@ -984,7 +799,10 @@ def call_operator(self, op, args, kwargs, meta):
# and quantized_conv have the same first 8 args. The quantized op has
# extra args holding at least the zero point and scale of input, weight, bias,
# and output tensor.
- quantized_op = op == exir_ops.edge.cadence.quantized_conv.default
+ quantized_op = (
+ op == exir_ops.edge.cadence.quantized_conv_nchw.default
+ or op == exir_ops.edge.cadence.quantized_conv_nhwc.default
+ )
assert (len(args) == 8 and not quantized_op) or (
len(args) >= 12 and quantized_op
), "Inconsistent args for convolution"
@@ -1131,7 +949,7 @@ def transpose_dims(
@register_cadence_pass(CadencePassAttribute(opt_level=3))
-class ForceChannelLastForConvPass(ExportPassWithTransposeHelper):
+class ReplaceConvWithChannelLastConvPass(ExportPassWithTransposeHelper):
def change_nchw_to_nhwc(self, proxy: ProxyValue, meta: NodeMetadata) -> ProxyValue:
shape = proxy.to_tensor().shape
if len(shape) == 3:
@@ -1161,35 +979,38 @@ def call_operator(
) -> ProxyValue:
if op not in {
exir_ops.edge.cadence.convolution.default,
- exir_ops.edge.cadence.quantized_conv.default,
+ exir_ops.edge.cadence.quantized_conv_nchw.default,
}:
return super().call_operator(op, args, kwargs, meta)
- quantized_op = op == exir_ops.edge.cadence.quantized_conv.default
- channel_last_arg_index = 14 if quantized_op else 7
- channel_last = (
- args[channel_last_arg_index]
- if len(args) > channel_last_arg_index
- # Default is false (NCHW).
- else False
- )
- if channel_last:
+ quantized_op = op == exir_ops.edge.cadence.quantized_conv_nchw.default
+
+ if not quantized_op and len(args) == 8 and args[-1] is True:
+ # Already in NHWC layout.
return super().call_operator(op, args, kwargs, meta)
+ new_op = (
+ exir_ops.edge.cadence.quantized_conv_nhwc.default
+ if quantized_op
+ else exir_ops.edge.cadence.convolution.default
+ )
+
input_proxy = cast(ProxyValue, args[0])
weight_proxy = cast(ProxyValue, args[1])
input_proxy = self.change_nchw_to_nhwc(input_proxy, meta)
weight_proxy = self.change_nchw_to_nhwc(weight_proxy, meta)
+ # Non-quantized ops still need to set the last optional argument to True.
+ channel_last_arg = [] if quantized_op else [True]
+
new_args = (
# Transposed input/weights.
(input_proxy, weight_proxy)
# All other args (bias, quant params, etc)
- + tuple(args[2:channel_last_arg_index])
- # Channel last.
- + (True,)
+ + tuple(args[2:])
+ + tuple(channel_last_arg)
)
- output_proxy = super().call_operator(op, new_args, kwargs, meta)
+ output_proxy = super().call_operator(new_op, new_args, kwargs, meta)
nchw_proxy = self.change_nhwc_to_nchw(output_proxy, meta)
return nchw_proxy
@@ -1246,7 +1067,8 @@ class ReplaceConvWithIm2RowAndLinear(ExportPass):
# decompose to.
conv_op_to_linear_op: Dict[EdgeOpOverload, EdgeOpOverload] = {
exir_ops.edge.cadence.convolution.default: exir_ops.edge.aten.linear.default,
- exir_ops.edge.cadence.quantized_conv.default: exir_ops.edge.cadence.quantized_linear.default,
+ exir_ops.edge.cadence.quantized_conv_nchw.default: exir_ops.edge.cadence.quantized_linear.default,
+ exir_ops.edge.cadence.quantized_conv_nhwc.default: exir_ops.edge.cadence.quantized_linear.default,
}
def call_operator(self, op, args, kwargs, meta):
@@ -1254,7 +1076,10 @@ def call_operator(self, op, args, kwargs, meta):
return super().call_operator(op, args, kwargs, meta)
# Get the relevant args from convolution node.
- quantized_op = op == exir_ops.edge.cadence.quantized_conv.default
+ quantized_op = (
+ op == exir_ops.edge.cadence.quantized_conv_nchw.default
+ or op == exir_ops.edge.cadence.quantized_conv_nhwc.default
+ )
assert (len(args) == 8 and not quantized_op) or (
len(args) >= 12 and quantized_op
), "Inconsistent args for convolution"
@@ -1285,9 +1110,7 @@ def call_operator(self, op, args, kwargs, meta):
# channel_last layout is specified by the channel_last arg of conv
# op, which is either the last argument (15th) or implicitely False
# if the op is quantized, or the last argument if not.
- channel_last = (
- (args[14] if len(args) == 15 else False) if quantized_op else args[-1]
- )
+ channel_last = op == exir_ops.edge.cadence.quantized_conv_nhwc.default
# The weight tensor is [out_channels, in_channels, X] for NCHW layout,
# and [out_channels, X, in_channels] for NHWC layout. Here, X is the
# kernel_width for conv1d, and X = kernel_height * kernel_width for
@@ -1661,8 +1484,8 @@ def call_operator(self, op, args, kwargs, meta):
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
result = super().call(graph_module)
- result = FuseCascadedViewOps()(result.graph_module)
- assert result is not None
+ fuse_cascaded_result = none_throws(FuseCascadedViewOps()(result.graph_module))
+ result = none_throws(ExportPass()(fuse_cascaded_result.graph_module))
return result
@@ -1699,7 +1522,6 @@ def call_operator(self, op, args, kwargs, meta):
)
-# pyre-ignore[6]: Incompatible parameter type (doesn't get the inheritance)
register_cadence_pass(CadencePassAttribute(opt_level=0))(ReplaceScalarWithTensorArgPass)
@@ -1800,8 +1622,12 @@ class ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass(ExportPass):
exir_ops.edge.cadence.quantized_add.per_tensor,
[1, 2, 4, 5],
),
- exir_ops.edge.cadence.quantized_conv: (
- exir_ops.edge.cadence.quantized_conv.per_tensor,
+ exir_ops.edge.cadence.quantized_conv_nchw: (
+ exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
+ [8, 9, 12, 13],
+ ),
+ exir_ops.edge.cadence.quantized_conv_nhwc: (
+ exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
[8, 9, 12, 13],
),
exir_ops.edge.cadence.quantized_fully_connected: (
@@ -1870,9 +1696,9 @@ def call_operator(self, op, args, kwargs, meta):
@register_cadence_pass(CadencePassAttribute(opt_level=0))
-class ReplaceAtenAvgPoolWithJarvisAvgPoolPass(ExportPass):
+class ReplaceAtenAvgPoolWithCadenceAvgPoolPass(ExportPass):
"""
- Replace the aten avg_pool op with the jarvis custom avg_pool2d op.
+ Replace the aten avg_pool op with the cadence custom avg_pool2d op.
"""
def call_operator(self, op, args, kwargs, meta):
@@ -2326,10 +2152,16 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
# Cast the const_arg to the dtype of the x_arg
full_arg = self.resolve_full_arg(x_arg, const_arg)
+ full_output_dtype = (
+ torch.int32 if isinstance(full_arg, int) else torch.float32
+ )
+
# Extract an argument to a separate full op.
with graph_module.graph.inserting_before(mul_node):
full_node = graph_module.graph.call_function(
- torch.ops.aten.full.default, args=([1], full_arg)
+ torch.ops.aten.full.default,
+ args=([1], full_arg),
+ kwargs={"dtype": full_output_dtype},
)
full_node.meta = mul_node.meta
full_node.meta["val"] = [1]
@@ -2427,9 +2259,8 @@ class CadenceReplaceOpsInGraph:
ReplaceRepeatWithCatPass,
ReplacePadWithCatPass,
ReplaceConstantPadNdWithSlicePass,
+ ReplaceAtenConvolutionWithCadenceConvolutionPass,
ReplaceConvWithChannelLastConvPass,
- ReplaceAtenConvolutionWithJarvisConvolutionPass,
- ForceChannelLastForConvPass,
ReplaceTrivialConvWithLinear,
ReplaceConvWithIm2RowAndLinear,
ReplaceTransposedConvWithLinearPass,
@@ -2447,7 +2278,7 @@ class CadenceReplaceOpsInGraph:
ReplacePT2DequantWithCadenceDequantPass,
ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
ReplaceAdaptiveAvgPoolWithAtenAvgPoolPass,
- ReplaceAtenAvgPoolWithJarvisAvgPoolPass,
+ ReplaceAtenAvgPoolWithCadenceAvgPoolPass,
ReplaceWhereWithFullArgsWithWhereScalar,
ReplaceAtenApproxGeluWithApproxGeluPass,
ReplaceSplitWithSlicePass,
diff --git a/backends/cadence/aot/tests/test_fusion_ops_passes.py b/backends/cadence/aot/tests/test_fusion_ops_passes.py
index 556c227b38d..d160a02721a 100644
--- a/backends/cadence/aot/tests/test_fusion_ops_passes.py
+++ b/backends/cadence/aot/tests/test_fusion_ops_passes.py
@@ -40,7 +40,29 @@ def check_op_counts(
self.assertTrue(op_counts_match(graph_module, expected_op_counts))
-class TestFusionPasses(TestFusionPassesBase):
+class TestFuseMMWithAddPass(TestFusionPassesBase):
+ def test_no_fuse_for_3d_bias(self) -> None:
+ builder = GraphBuilder()
+ x = builder.placeholder("x", torch.randn(4, 3, dtype=torch.float32))
+ y = builder.placeholder("y", torch.randn(3, 5, dtype=torch.float32))
+ z = builder.placeholder("z", torch.randn(1, 4, 5, dtype=torch.float32))
+ mm = builder.call_operator(
+ op=exir_ops.edge.aten.mm.default,
+ args=(x, y),
+ )
+ output = builder.call_operator(op=exir_ops.edge.aten.add.Tensor, args=(mm, z))
+ builder.output([output])
+ original_graph = builder.get_graph_module()
+
+ p = FuseMMWithAdd()
+ converted_graph = cast(PassResult, p(original_graph)).graph_module
+ converted_graph.graph.eliminate_dead_code()
+ self.assertEqual(
+ count_node(converted_graph, exir_ops.edge.aten.addmm.default), 0
+ )
+ self.assertEqual(count_node(converted_graph, exir_ops.edge.aten.mm.default), 1)
+ self.assertEqual(count_node(converted_graph, exir_ops.edge.aten.add.Tensor), 1)
+
def test_fuse_mm_with_add(self) -> None:
builder = GraphBuilder()
x = builder.placeholder("x", torch.randn(3, 5, dtype=torch.float32))
@@ -176,6 +198,8 @@ def test_keep_mm_add_with_multiple_users(self) -> None:
self.assertEqual(count_node(converted_graph, exir_ops.edge.aten.mm.default), 1)
self.assertEqual(count_node(converted_graph, exir_ops.edge.aten.add.Tensor), 3)
+
+class TestFusionPasses(TestFusionPassesBase):
def test_permute_transpose_fusion(self) -> None:
builder = GraphBuilder()
x = builder.placeholder("x", torch.randn(3, 1, 3, 1, 4, dtype=torch.float32))
diff --git a/backends/cadence/aot/tests/test_memory_passes.py b/backends/cadence/aot/tests/test_memory_passes.py
index a1da8ede61e..41f903ccf06 100644
--- a/backends/cadence/aot/tests/test_memory_passes.py
+++ b/backends/cadence/aot/tests/test_memory_passes.py
@@ -1044,7 +1044,7 @@ class DummyMemIdBlockConstraintGen(PassBase):
mul: blocks 1, 3
"""
- def __init__(self, memory_constraints: MemoryConfig):
+ def __init__(self, memory_constraints: MemConstraints):
self.memory_constraints = memory_constraints
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
diff --git a/backends/cadence/aot/tests/test_program_builder.py b/backends/cadence/aot/tests/test_program_builder.py
index f2c138dce80..a16d42e2378 100644
--- a/backends/cadence/aot/tests/test_program_builder.py
+++ b/backends/cadence/aot/tests/test_program_builder.py
@@ -1,10 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# pyre-strict
-
import torch
-from executorch.backends.cadence.aot.program_builder import ProgramBuilder
+from executorch.backends.cadence.aot.program_builder import IrMode, ProgramBuilder
+from executorch.exir.dialects._ops import ops as exir_ops
from later.unittest import TestCase
+from torch._export.verifier import SpecViolationError
from torch.export.graph_signature import InputKind, OutputKind
@@ -120,3 +121,102 @@ def test_user_input_mutation(self) -> None:
self.assertEqual(
program.graph_signature.output_specs[0].kind, OutputKind.USER_INPUT_MUTATION
)
+
+ def test_get_verifier_exir_mode(self) -> None:
+ """Test that get_verifier returns EXIREdgeDialectVerifier for EXIR mode."""
+ builder = ProgramBuilder(mode=IrMode.EXIR)
+ verifiers = builder.get_verifiers()
+ self.assertIsNotNone(verifiers)
+ self.assertEqual(len(verifiers), 1)
+
+ def test_get_verifier_aten_mode(self) -> None:
+ """Test that get_verifier returns None for ATEN mode."""
+ builder = ProgramBuilder(mode=IrMode.ATEN)
+ verifiers = builder.get_verifiers()
+ self.assertIsNone(verifiers)
+
+ def test_get_verifier_default_mode(self) -> None:
+ """Test that get_verifier returns EXIREdgeDialectVerifier for default mode."""
+ builder = ProgramBuilder() # Should default to EXIR
+ self.assertEqual(builder.mode, IrMode.EXIR)
+ verifiers = builder.get_verifiers()
+ self.assertIsNotNone(verifiers)
+ self.assertEqual(len(verifiers), 1)
+
+ def test_aten_add_tensor_exir_mode(self) -> None:
+ """Test using torch.ops.aten.add.Tensor with EXIR mode."""
+ inp = torch.randn([3, 5])
+ buffer = torch.randn([5])
+
+ builder = ProgramBuilder(mode=IrMode.EXIR)
+ inp_proxy = builder.placeholder("inp", inp)
+ buffer_proxy = builder.placeholder(
+ "buffer", buffer, input_kind=InputKind.BUFFER
+ )
+ add = builder.call_operator(
+ torch.ops.aten.add.Tensor, (inp_proxy, buffer_proxy)
+ )
+ builder.output([add])
+ builder.get_program()
+
+ def test_aten_add_tensor_aten_mode(self) -> None:
+ """Test using torch.ops.aten.add.Tensor with ATEN mode."""
+ inp = torch.randn([3, 5])
+ buffer = torch.randn([5])
+
+ builder = ProgramBuilder(mode=IrMode.ATEN)
+ inp_proxy = builder.placeholder("inp", inp)
+ buffer_proxy = builder.placeholder(
+ "buffer", buffer, input_kind=InputKind.BUFFER
+ )
+ add = builder.call_operator(
+ torch.ops.aten.add.Tensor, (inp_proxy, buffer_proxy)
+ )
+ builder.output([add])
+ program = builder.get_program()
+
+ # Verify the program was created successfully
+ self.assertEqual(len(program.graph_signature.input_specs), 2)
+ self.assertEqual(len(program.graph_signature.output_specs), 1)
+ self.assertEqual(builder.mode, IrMode.ATEN)
+
+ def test_exir_edge_aten_add_tensor_exir_mode(self) -> None:
+ """Test using exir_ops.edge.aten.add.Tensor with EXIR mode."""
+ inp = torch.randn([3, 5])
+ buffer = torch.randn([5])
+
+ builder_exir = ProgramBuilder(mode=IrMode.EXIR)
+ inp_proxy_exir = builder_exir.placeholder("inp", inp)
+ buffer_proxy_exir = builder_exir.placeholder(
+ "buffer", buffer, input_kind=InputKind.BUFFER
+ )
+ add_exir = builder_exir.call_operator(
+ exir_ops.edge.aten.add.Tensor, (inp_proxy_exir, buffer_proxy_exir)
+ )
+ builder_exir.output([add_exir])
+ program_exir = builder_exir.get_program()
+
+ # Verify the program was created successfully
+ self.assertEqual(len(program_exir.graph_signature.input_specs), 2)
+ self.assertEqual(len(program_exir.graph_signature.output_specs), 1)
+ self.assertEqual(builder_exir.mode, IrMode.EXIR)
+
+ def test_exir_edge_aten_add_tensor_aten_mode(self) -> None:
+ """Test using exir_ops.edge.aten.add.Tensor with ATEN mode."""
+ inp = torch.randn([3, 5])
+ buffer = torch.randn([5])
+
+ builder_aten = ProgramBuilder(mode=IrMode.ATEN)
+ inp_proxy_aten = builder_aten.placeholder("inp", inp)
+ buffer_proxy_aten = builder_aten.placeholder(
+ "buffer", buffer, input_kind=InputKind.BUFFER
+ )
+ add_aten = builder_aten.call_operator(
+ exir_ops.edge.aten.add.Tensor, (inp_proxy_aten, buffer_proxy_aten)
+ )
+ builder_aten.output([add_aten])
+
+ with self.assertRaises(
+ SpecViolationError, msg="Operator '"
+ ):
+ builder_aten.get_program()
diff --git a/backends/cadence/aot/tests/test_replace_ops_passes.py b/backends/cadence/aot/tests/test_replace_ops_passes.py
index d778cd5b898..bd02cb0ae11 100644
--- a/backends/cadence/aot/tests/test_replace_ops_passes.py
+++ b/backends/cadence/aot/tests/test_replace_ops_passes.py
@@ -17,14 +17,14 @@
)
from executorch.backends.cadence.aot.pass_utils import count_node, op_counts_match
from executorch.backends.cadence.aot.replace_ops import (
- ForceChannelLastForConvPass,
MakeSliceAndCatDimOutermostPass,
ReplaceAdaptiveAvgPoolWithAtenAvgPoolPass,
ReplaceAddMMWithLinearPass,
ReplaceAtenApproxGeluWithApproxGeluPass,
- ReplaceAtenConvolutionWithJarvisConvolutionPass,
+ ReplaceAtenConvolutionWithCadenceConvolutionPass,
ReplaceConstantPadNdWithSlicePass,
ReplaceConvolutionOptionalArgsWithConcreteArgsPass,
+ ReplaceConvWithChannelLastConvPass,
ReplaceConvWithIm2RowAndLinear,
ReplaceEmptyTensorsWithFullPass,
ReplaceFunctionallyEquivalentOpTargets,
@@ -411,7 +411,7 @@ def test_replace_transposed_conv_with_linear(
builder.output([convolution])
original_gm = builder.get_graph_module()
- p1 = ReplaceAtenConvolutionWithJarvisConvolutionPass()
+ p1 = ReplaceAtenConvolutionWithCadenceConvolutionPass()
p2 = ReplaceTransposedConvWithLinearPass()
graph_after_passes = cast(
PassResult, p2(cast(PassResult, p1(original_gm)).graph_module)
@@ -969,7 +969,7 @@ def test_replace_conv1d_with_linear(self) -> None:
args=(x, weights, bias, [1], [0], [1], 1, False),
)
# First, replace the aten convolution with a cadence.convolution op
- p1 = ReplaceAtenConvolutionWithJarvisConvolutionPass()
+ p1 = ReplaceAtenConvolutionWithCadenceConvolutionPass()
temp_graph = cast(PassResult, p1(original_gm)).graph_module
# temp_graph = p1(original_gm).graph_module
self.assertIsNotNone(temp_graph)
@@ -1003,7 +1003,7 @@ def test_replace_conv2d_with_linear(self) -> None:
args=(x, weights, bias, [1, 1], [0, 0], [1, 1], 1, False),
)
# First, replace the aten convolution with a cadence.convolution op
- p1 = ReplaceAtenConvolutionWithJarvisConvolutionPass()
+ p1 = ReplaceAtenConvolutionWithCadenceConvolutionPass()
temp_graph = cast(PassResult, p1(original_gm)).graph_module
self.assertIsNotNone(temp_graph)
@@ -1454,7 +1454,7 @@ def test_replace_linear_like_conv(self) -> None:
)
-class TestForceChannelLastForConvPass(unittest.TestCase):
+class TestReplaceConvWithChannelLastConvPass(unittest.TestCase):
def create_conv1d_graphmodule(
self, channels_last: Optional[bool] = None
) -> torch.fx.GraphModule:
@@ -1489,7 +1489,7 @@ def test_conv1d_default_channel_last(self) -> None:
self.assertEqual(count_node(gm, exir_ops.edge.aten.transpose_copy.int), 0)
# Apply replacement pass.
- p = ForceChannelLastForConvPass()
+ p = ReplaceConvWithChannelLastConvPass()
gm_after_replacement = p.call(gm).graph_module
# Check that no replacement was made.
self.assertEqual(
@@ -1514,7 +1514,7 @@ def test_conv1d_no_transpose_if_already_channel_last(self) -> None:
self.assertEqual(count_node(gm, exir_ops.edge.cadence.convolution.default), 1)
# Apply replacement pass.
- p = ForceChannelLastForConvPass()
+ p = ReplaceConvWithChannelLastConvPass()
gm_after_replacement = p.call(gm).graph_module
# Check that no replacement was made.
self.assertEqual(
@@ -1566,7 +1566,7 @@ def test_convolution_default_channel_last(self) -> None:
self.assertEqual(count_node(gm, exir_ops.edge.aten.permute_copy.default), 0)
# Apply replacement pass.
- p = ForceChannelLastForConvPass()
+ p = ReplaceConvWithChannelLastConvPass()
gm_after_replacement = p.call(gm).graph_module
# Check that no replacement was made.
self.assertEqual(
@@ -1591,7 +1591,7 @@ def test_no_transpose_if_already_channel_last(self) -> None:
self.assertEqual(count_node(gm, exir_ops.edge.cadence.convolution.default), 1)
# Apply replacement pass.
- p = ForceChannelLastForConvPass()
+ p = ReplaceConvWithChannelLastConvPass()
gm_after_replacement = p.call(gm).graph_module
# Check that no replacement was made.
self.assertEqual(
@@ -1655,28 +1655,49 @@ def create_quantized_convolution_graph_module(
out_shift,
)
if channels_last is not None:
- args = args + (channels_last,)
- return single_op_builder(
- placeholders=(x, w, b, w_zero_point, b_scale, out_multiplier, out_shift),
- op=exir_ops.edge.cadence.quantized_conv.default,
- args=args,
- )
+ return single_op_builder(
+ placeholders=(
+ x,
+ w,
+ b,
+ w_zero_point,
+ b_scale,
+ out_multiplier,
+ out_shift,
+ ),
+ op=exir_ops.edge.cadence.quantized_conv_nhwc.default,
+ args=args,
+ )
+ else:
+ return single_op_builder(
+ placeholders=(
+ x,
+ w,
+ b,
+ w_zero_point,
+ b_scale,
+ out_multiplier,
+ out_shift,
+ ),
+ op=exir_ops.edge.cadence.quantized_conv_nchw.default,
+ args=args,
+ )
def test_quantized_convolution_default_channel_last(self) -> None:
# Create a graph with a single convolution node.
gm = self.create_quantized_convolution_graph_module()
self.assertEqual(
- count_node(gm, exir_ops.edge.cadence.quantized_conv.default), 1
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.default), 1
)
self.assertEqual(count_node(gm, exir_ops.edge.aten.permute_copy.default), 0)
# Apply replacement pass.
- p = ForceChannelLastForConvPass()
+ p = ReplaceConvWithChannelLastConvPass()
gm_after_replacement = p.call(gm).graph_module
# Check that no replacement was made.
self.assertEqual(
count_node(
- gm_after_replacement, exir_ops.edge.cadence.quantized_conv.default
+ gm_after_replacement, exir_ops.edge.cadence.quantized_conv_nhwc.default
),
1,
)
@@ -1685,12 +1706,6 @@ def test_quantized_convolution_default_channel_last(self) -> None:
count_node(gm_after_replacement, exir_ops.edge.aten.permute_copy.default),
3,
)
- for node in gm_after_replacement.graph.nodes:
- if node.target != exir_ops.edge.cadence.quantized_conv.default:
- continue
- # Check that the channel_last argument is set to True.
- self.assertEqual(len(node.args), 15, f"{node=}")
- self.assertTrue(node.args[14])
def test_no_transpose_if_already_quantized_conv_channel_last(self) -> None:
# Create a graph with a single im2row node.
@@ -1698,26 +1713,20 @@ def test_no_transpose_if_already_quantized_conv_channel_last(self) -> None:
# Check if graph module is valid by running exportpass on it.
gm = ExportPass().call(gm).graph_module
self.assertEqual(
- count_node(gm, exir_ops.edge.cadence.quantized_conv.default), 1
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.default), 1
)
# Apply replacement pass.
- p = ForceChannelLastForConvPass()
+ p = ReplaceConvWithChannelLastConvPass()
gm_after_replacement = p.call(gm).graph_module
# Check that no replacement was made.
self.assertEqual(
count_node(
- gm_after_replacement, exir_ops.edge.cadence.quantized_conv.default
+ gm_after_replacement, exir_ops.edge.cadence.quantized_conv_nhwc.default
),
1,
)
self.assertEqual(count_node(gm, exir_ops.edge.aten.permute_copy.default), 0)
- for node in gm_after_replacement.graph.nodes:
- if node.target != exir_ops.edge.cadence.quantized_conv.default:
- continue
- # Check that the channel_last argument is set to True.
- self.assertEqual(len(node.args), 15, f"{node=}")
- self.assertTrue(node.args[14])
class TestMakeSliceAndCatDimOutermostPass(unittest.TestCase):
diff --git a/backends/cadence/aot/tests/test_type_dispatch_passes.py b/backends/cadence/aot/tests/test_type_dispatch_passes.py
new file mode 100644
index 00000000000..f180c138ca4
--- /dev/null
+++ b/backends/cadence/aot/tests/test_type_dispatch_passes.py
@@ -0,0 +1,673 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-strict
+
+import unittest
+from typing import cast
+
+import executorch.backends.cadence.aot.ops_registrations # noqa
+import torch
+from executorch.backends.cadence.aot.graph_builder import single_op_builder
+from executorch.backends.cadence.aot.pass_utils import count_node
+from executorch.backends.cadence.aot.type_dispatch import CompileTimeTypeDispatchPass
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch.fx.passes.infra.pass_base import PassResult
+
+
+class TestTypeDispatchPasses(unittest.TestCase):
+ def test_int8_dispatch_quantized_fully_connected(self) -> None:
+ """Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant"""
+ x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
+ w = torch.randint(-128, 127, (4, 3), dtype=torch.int8)
+ b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_fully_connected.per_tensor,
+ args=(x, w, b, 0, 0, 1, 0, 0, None),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_fully_connected.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_fully_connected_asym8sxasym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_fully_connected(self) -> None:
+ """Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant"""
+ x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
+ w = torch.randint(0, 255, (4, 3), dtype=torch.uint8)
+ b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_fully_connected.per_tensor,
+ args=(x, w, b, 0, 0, 1, 0, 0, None),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_fully_connected.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_fully_connected_asym8uxasym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_linear(self) -> None:
+ """Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant for quantized_linear"""
+ x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
+ w = torch.randint(-128, 127, (4, 3), dtype=torch.int8)
+ b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_linear.per_tensor,
+ args=(x, w, b, 0, 0, 1, 0, 0, None),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_linear.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_linear_asym8sxasym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_quantized_linear_dispatch(self) -> None:
+ """Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant for quantized_linear"""
+ x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
+ w = torch.randint(0, 255, (4, 3), dtype=torch.uint8)
+ b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_linear.per_tensor,
+ args=(x, w, b, 0, 0, 1, 0, 0, None),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_linear.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_linear_asym8uxasym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_mixed_types_error(self) -> None:
+ """Test mixed int8/uint8 inputs should raise RuntimeError"""
+ x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
+ w = torch.randint(0, 255, (4, 3), dtype=torch.uint8)
+ b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_fully_connected.per_tensor,
+ args=(x, w, b, 0, 0, 1, 0, 0, None),
+ )
+ p = CompileTimeTypeDispatchPass()
+ # Mixed types should raise RuntimeError
+ with self.assertRaises(RuntimeError) as context:
+ cast(PassResult, p(gm)).graph_module
+ self.assertIn("Unsupported input types", str(context.exception))
+
+ def test_int8_dispatch_quantized_relu(self) -> None:
+ """Test int8 input should dispatch to asym8s_asym8s variant for quantized_relu"""
+ x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
+ gm = single_op_builder(
+ placeholders=(x,),
+ op=exir_ops.edge.cadence.quantized_relu.per_tensor,
+ args=(x, 0, 0, 1, 0),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_relu.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_relu_asym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_relu(self) -> None:
+ """Test uint8 input should dispatch to asym8u_asym8u variant for quantized_relu"""
+ x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
+ gm = single_op_builder(
+ placeholders=(x,),
+ op=exir_ops.edge.cadence.quantized_relu.per_tensor,
+ args=(x, 0, 0, 1, 0),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_relu.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_relu_asym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_matmul(self) -> None:
+ """Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant for quantized_matmul"""
+ x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
+ y = torch.randint(-128, 127, (3, 4), dtype=torch.int8)
+ bias = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, y, bias),
+ op=exir_ops.edge.cadence.quantized_matmul.default,
+ args=(x, 0, y, 0, bias, 1, 0, 0, False),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_matmul.default),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_matmul_asym8sxasym8s_asym8s.default,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_matmul(self) -> None:
+ """Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant for quantized_matmul"""
+ x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
+ y = torch.randint(0, 255, (3, 4), dtype=torch.uint8)
+ bias = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, y, bias),
+ op=exir_ops.edge.cadence.quantized_matmul.default,
+ args=(x, 0, y, 0, bias, 1, 0, 0, False),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_matmul.default),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_matmul_asym8uxasym8u_asym8u.default,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_conv_nchw(self) -> None:
+ """Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant for quantized_conv_nchw"""
+ x = torch.randint(-128, 127, (1, 3, 8, 8), dtype=torch.int8)
+ w = torch.randint(-128, 127, (16, 3, 3, 3), dtype=torch.int8)
+ b = torch.randint(-2147483648, 2147483647, (16,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
+ args=(x, w, b, [1, 1], [0, 0], [1, 1], 1, 0, 0, 1.0, 1.0, 0, 1, 1),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_conv_nchw(self) -> None:
+ """Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant for quantized_conv_nchw"""
+ x = torch.randint(0, 255, (1, 3, 8, 8), dtype=torch.uint8)
+ w = torch.randint(0, 255, (16, 3, 3, 3), dtype=torch.uint8)
+ b = torch.randint(-2147483648, 2147483647, (16,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
+ args=(x, w, b, [1, 1], [0, 0], [1, 1], 1, 0, 0, 1.0, 1.0, 0, 1, 1),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_conv_nhwc(self) -> None:
+ """Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant for quantized_conv_nhwc"""
+ x = torch.randint(-128, 127, (1, 8, 8, 3), dtype=torch.int8)
+ w = torch.randint(-128, 127, (16, 3, 3, 3), dtype=torch.int8)
+ b = torch.randint(-2147483648, 2147483647, (16,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
+ args=(x, w, b, [1, 1], [0, 0], [1, 1], 1, 0, 0, 1.0, 1.0, 0, 1, 1),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_conv_nhwc(self) -> None:
+ """Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant for quantized_conv_nhwc"""
+ x = torch.randint(0, 255, (1, 8, 8, 3), dtype=torch.uint8)
+ w = torch.randint(0, 255, (16, 3, 3, 3), dtype=torch.uint8)
+ b = torch.randint(-2147483648, 2147483647, (16,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
+ args=(x, w, b, [1, 1], [0, 0], [1, 1], 1, 0, 0, 1.0, 1.0, 0, 1, 1),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_conv_nchw_dilated(self) -> None:
+ """Test int8 x int8 inputs with dilation should dispatch to dilated_asym8sxasym8s_asym8s variant for quantized_conv_nchw_dilated"""
+ x = torch.randint(-128, 127, (1, 3, 8, 8), dtype=torch.int8)
+ w = torch.randint(-128, 127, (16, 3, 3, 3), dtype=torch.int8)
+ b = torch.randint(-2147483648, 2147483647, (16,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
+ args=(x, w, b, [1, 1], [0, 0], [2, 2], 1, 0, 0, 1.0, 1.0, 0, 1, 1),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_conv_nchw_dilated(self) -> None:
+ """Test uint8 x uint8 inputs with dilation should dispatch to dilated_asym8uxasym8u_asym8u variant for quantized_conv_nchw"""
+ x = torch.randint(0, 255, (1, 3, 8, 8), dtype=torch.uint8)
+ w = torch.randint(0, 255, (16, 3, 3, 3), dtype=torch.uint8)
+ b = torch.randint(-2147483648, 2147483647, (16,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
+ args=(x, w, b, [1, 1], [0, 0], [2, 2], 1, 0, 0, 1.0, 1.0, 0, 1, 1),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_conv_nhwc_dilated(self) -> None:
+ """Test int8 x int8 inputs with dilation should dispatch to dilated_asym8sxasym8s_asym8s variant for quantized_conv_nhwc"""
+ x = torch.randint(-128, 127, (1, 8, 8, 3), dtype=torch.int8)
+ w = torch.randint(-128, 127, (16, 3, 3, 3), dtype=torch.int8)
+ b = torch.randint(-2147483648, 2147483647, (16,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
+ args=(x, w, b, [1, 1], [0, 0], [2, 2], 1, 0, 0, 1.0, 1.0, 0, 1, 1),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_conv_nhwc_dilated(self) -> None:
+ """Test uint8 x uint8 inputs with dilation should dispatch to dilated_asym8uxasym8u_asym8u variant for quantized_conv_nhwc"""
+ x = torch.randint(0, 255, (1, 8, 8, 3), dtype=torch.uint8)
+ w = torch.randint(0, 255, (16, 3, 3, 3), dtype=torch.uint8)
+ b = torch.randint(-2147483648, 2147483647, (16,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
+ args=(x, w, b, [1, 1], [0, 0], [2, 2], 1, 0, 0, 1.0, 1.0, 0, 1, 1),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_add(self) -> None:
+ """Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant for quantized_add"""
+ x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
+ y = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
+ gm = single_op_builder(
+ placeholders=(x, y),
+ op=exir_ops.edge.cadence.quantized_add.per_tensor,
+ args=(x, 1.0, 0, y, 1.0, 0, 1.0, 0),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_add.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_add_asym8sxasym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_add(self) -> None:
+ """Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant for quantized_add"""
+ x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
+ y = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
+ gm = single_op_builder(
+ placeholders=(x, y),
+ op=exir_ops.edge.cadence.quantized_add.per_tensor,
+ args=(x, 1.0, 0, y, 1.0, 0, 1.0, 0),
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_add.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_add_asym8uxasym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_conv_nchw_depthwise(self) -> None:
+ """Test int8 x int8 inputs with depthwise should dispatch to depthwise_asym8sxsym8s_asym8s variant for quantized_conv_nchw"""
+ # Depthwise convolution: groups == input_channels
+ x = torch.randint(-128, 127, (1, 3, 8, 8), dtype=torch.int8)
+ w = torch.randint(
+ -128, 127, (3, 1, 3, 3), dtype=torch.int8
+ ) # groups=3, input_channels=3
+ b = torch.randint(-2147483648, 2147483647, (3,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
+ args=(
+ x,
+ w,
+ b,
+ [1, 1],
+ [0, 0],
+ [1, 1],
+ 3,
+ 0,
+ 0,
+ 1.0,
+ 1.0,
+ 0,
+ 1,
+ 1,
+ ), # groups=3
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 depthwise specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_conv_nchw_depthwise(self) -> None:
+ """Test uint8 x uint8 inputs with depthwise should dispatch to depthwise_asym8uxasym8u_asym8u variant for quantized_conv_nchw"""
+ # Depthwise convolution: groups == input_channels
+ x = torch.randint(0, 255, (1, 3, 8, 8), dtype=torch.uint8)
+ w = torch.randint(
+ 0, 255, (3, 1, 3, 3), dtype=torch.uint8
+ ) # groups=3, input_channels=3
+ b = torch.randint(-2147483648, 2147483647, (3,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
+ args=(
+ x,
+ w,
+ b,
+ [1, 1],
+ [0, 0],
+ [1, 1],
+ 3,
+ 0,
+ 0,
+ 1.0,
+ 1.0,
+ 0,
+ 1,
+ 1,
+ ), # groups=3
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 depthwise specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_int8_dispatch_quantized_conv_nhwc_depthwise(self) -> None:
+ """Test int8 x int8 inputs with depthwise should dispatch to depthwise_asym8sxsym8s_asym8s variant for quantized_conv_nhwc"""
+ # Depthwise convolution: groups == input_channels
+ x = torch.randint(-128, 127, (1, 8, 8, 3), dtype=torch.int8)
+ w = torch.randint(
+ -128, 127, (3, 3, 3, 1), dtype=torch.int8
+ ) # groups=3, input_channels=3
+ b = torch.randint(-2147483648, 2147483647, (3,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
+ args=(
+ x,
+ w,
+ b,
+ [1, 1],
+ [0, 0],
+ [1, 1],
+ 3,
+ 0,
+ 0,
+ 1.0,
+ 1.0,
+ 0,
+ 1,
+ 1,
+ ), # groups=3
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor),
+ 0,
+ )
+ # Should be replaced with int8 depthwise specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor,
+ ),
+ 1,
+ )
+
+ def test_uint8_dispatch_quantized_conv_nhwc_depthwise(self) -> None:
+ """Test uint8 x uint8 inputs with depthwise should dispatch to depthwise_asym8uxasym8u_asym8u variant for quantized_conv_nhwc"""
+ # Depthwise convolution: groups == input_channels
+ x = torch.randint(0, 255, (1, 8, 8, 3), dtype=torch.uint8)
+ w = torch.randint(
+ 0, 255, (3, 3, 3, 1), dtype=torch.uint8
+ ) # groups=3, input_channels=3
+ b = torch.randint(-2147483648, 2147483647, (3,), dtype=torch.int32)
+ gm = single_op_builder(
+ placeholders=(x, w, b),
+ op=exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
+ args=(
+ x,
+ w,
+ b,
+ [1, 1],
+ [0, 0],
+ [1, 1],
+ 3,
+ 0,
+ 0,
+ 1.0,
+ 1.0,
+ 0,
+ 1,
+ 1,
+ ), # groups=3
+ )
+ p = CompileTimeTypeDispatchPass()
+ gm = cast(PassResult, p(gm)).graph_module
+ # Original op should be replaced
+ self.assertEqual(
+ count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor),
+ 0,
+ )
+ # Should be replaced with uint8 depthwise specific variant
+ self.assertEqual(
+ count_node(
+ gm,
+ exir_ops.edge.cadence.quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor,
+ ),
+ 1,
+ )
diff --git a/backends/cadence/aot/type_dispatch.py b/backends/cadence/aot/type_dispatch.py
new file mode 100644
index 00000000000..ec9cecb03ed
--- /dev/null
+++ b/backends/cadence/aot/type_dispatch.py
@@ -0,0 +1,152 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from dataclasses import dataclass
+from typing import Optional
+
+import torch
+from executorch.backends.cadence.aot.pass_utils import (
+ CadencePassAttribute,
+ register_cadence_pass,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, NodeMetadata, ProxyValue
+from torch._ops import OpOverload
+from torch.fx.node import Argument
+
+
+@dataclass
+class OpConfig:
+ """Configuration for type dispatch operations."""
+
+ base_name: str
+ type_dispatch_suffixes: dict[tuple[torch.dtype, ...], str]
+ weight_arg_idx: Optional[int] = None
+ variant: str = "per_tensor"
+
+
+@register_cadence_pass(CadencePassAttribute(opt_level=4))
+class CompileTimeTypeDispatchPass(ExportPass):
+ """
+ Replaces generic ops with ops that have explicit types.
+ """
+
+ _SUPPORTED_OPS: dict[OpOverload, OpConfig] = {
+ exir_ops.edge.cadence.quantized_fully_connected.per_tensor: OpConfig(
+ "quantized_fully_connected",
+ type_dispatch_suffixes={
+ (torch.int8, torch.int8): "asym8sxasym8s_asym8s",
+ (torch.uint8, torch.uint8): "asym8uxasym8u_asym8u",
+ },
+ weight_arg_idx=1,
+ ),
+ exir_ops.edge.cadence.quantized_linear.per_tensor: OpConfig(
+ "quantized_linear",
+ type_dispatch_suffixes={
+ (torch.int8, torch.int8): "asym8sxasym8s_asym8s",
+ (torch.uint8, torch.uint8): "asym8uxasym8u_asym8u",
+ },
+ weight_arg_idx=1,
+ ),
+ exir_ops.edge.cadence.quantized_matmul.default: OpConfig(
+ "quantized_matmul",
+ type_dispatch_suffixes={
+ (torch.int8, torch.int8): "asym8sxasym8s_asym8s",
+ (torch.uint8, torch.uint8): "asym8uxasym8u_asym8u",
+ },
+ weight_arg_idx=2,
+ variant="default",
+ ),
+ exir_ops.edge.cadence.quantized_conv_nchw.per_tensor: OpConfig(
+ "quantized_conv_nchw",
+ type_dispatch_suffixes={
+ (torch.int8, torch.int8): "asym8sxsym8s_asym8s",
+ (torch.uint8, torch.uint8): "asym8uxsym8u_asym8u",
+ },
+ weight_arg_idx=1,
+ ),
+ exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor: OpConfig(
+ "quantized_conv_nhwc",
+ type_dispatch_suffixes={
+ (torch.int8, torch.int8): "asym8sxsym8s_asym8s",
+ (torch.uint8, torch.uint8): "asym8uxsym8u_asym8u",
+ },
+ weight_arg_idx=1,
+ ),
+ exir_ops.edge.cadence.quantized_relu.per_tensor: OpConfig(
+ "quantized_relu",
+ type_dispatch_suffixes={
+ (torch.int8,): "asym8s_asym8s",
+ (torch.uint8,): "asym8u_asym8u",
+ },
+ ),
+ exir_ops.edge.cadence.quantized_add.per_tensor: OpConfig(
+ "quantized_add",
+ type_dispatch_suffixes={
+ (torch.int8, torch.int8): "asym8sxasym8s_asym8s",
+ (torch.uint8, torch.uint8): "asym8uxasym8u_asym8u",
+ },
+ weight_arg_idx=3,
+ ),
+ }
+
+ def call_operator(
+ self,
+ op: OpOverload,
+ args: tuple[Argument, ...],
+ kwargs: dict[str, Argument],
+ meta: NodeMetadata,
+ ) -> ProxyValue:
+ if op not in self._SUPPORTED_OPS:
+ return super().call_operator(op, args, kwargs, meta)
+
+ config = self._SUPPORTED_OPS[op]
+
+ # pyre-ignore[16]: None has no attribute `to_tensor`.
+ input_dtype = args[0].to_tensor().dtype
+
+ if config.weight_arg_idx is not None:
+ weight_dtype = args[config.weight_arg_idx].to_tensor().dtype
+ dtype_key = (input_dtype, weight_dtype)
+ else:
+ dtype_key = (input_dtype,)
+
+ if dtype_key not in config.type_dispatch_suffixes:
+ raise RuntimeError(f"Unsupported input types for {op}: {dtype_key}")
+
+ type_suffix = config.type_dispatch_suffixes[dtype_key]
+ base_name = config.base_name
+
+ if op in [
+ exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
+ exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
+ ]:
+ groups = args[6]
+ input_channels = (
+ args[0].to_tensor().shape[1]
+ if op == exir_ops.edge.cadence.quantized_conv_nchw.per_tensor
+ else args[0].to_tensor().shape[-1]
+ )
+ is_depthwise = groups == input_channels
+
+ dilation = args[5]
+ # pyre-ignore[16]: None has no attribute '__iter__'.
+ is_dilated = any(d > 1 for d in dilation)
+
+ if is_dilated:
+ type_suffix = f"dilated_{type_suffix}"
+ elif is_depthwise:
+ type_suffix = f"depthwise_{type_suffix}"
+
+ typed_op_name = f"{base_name}_{type_suffix}"
+
+ typed_op = getattr(
+ getattr(exir_ops.edge.cadence, typed_op_name), config.variant
+ )
+
+ return super().call_operator(typed_op, args, kwargs, meta)
diff --git a/backends/cadence/aot/utils.py b/backends/cadence/aot/utils.py
index 379e3b24dd8..b711d45994b 100644
--- a/backends/cadence/aot/utils.py
+++ b/backends/cadence/aot/utils.py
@@ -29,6 +29,26 @@ class MemoryPlanningAlgoFailure(Exception):
pass
+class TypeMismatchError(Exception):
+ pass
+
+
+class NumericalMismatchError(Exception):
+ def __init__(self, msg: str, rms_value: Optional[float] = None) -> None:
+ self.rms_value = rms_value
+ super().__init__(msg)
+
+
+class NumericalMismatchExpectedError(Exception):
+ def __init__(self, rms_expected_value: float) -> None:
+ self.rms_expected_value = rms_expected_value
+ super().__init__()
+
+
+class ISSRuntimeFailure(Exception):
+ pass
+
+
# Get the output size of a 1D convolution given the input size and parameters
def get_conv1d_output_size(
in_size: torch.Size,
diff --git a/backends/cadence/build_cadence_fusionG3.sh b/backends/cadence/build_cadence_fusionG3.sh
index 1c84ae99364..93295bc9aa5 100644
--- a/backends/cadence/build_cadence_fusionG3.sh
+++ b/backends/cadence/build_cadence_fusionG3.sh
@@ -36,7 +36,7 @@ if $STEPWISE_BUILD; then
-Bcmake-out .
echo "Building any Cadence-specific binaries on top"
- CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
+ CXXFLAGS="-fno-exceptions -fno-rtti" cmake \
-DCMAKE_TOOLCHAIN_FILE=/home/zonglinpeng/ws/zonglinpeng/executorch/backends/cadence/cadence.cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
@@ -57,7 +57,7 @@ if $STEPWISE_BUILD; then
else
echo "Building Cadence toolchain with ExecuTorch packages"
cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
- CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
+ CXXFLAGS="-fno-exceptions -fno-rtti" cmake \
-DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
-DHAVE_SYS_STAT_H=ON \
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
diff --git a/backends/cadence/build_cadence_hifi4.sh b/backends/cadence/build_cadence_hifi4.sh
index e0a48da4074..33078b7ff2f 100644
--- a/backends/cadence/build_cadence_hifi4.sh
+++ b/backends/cadence/build_cadence_hifi4.sh
@@ -35,7 +35,7 @@ if $STEPWISE_BUILD; then
-Bcmake-out .
echo "Building any Cadence-specific binaries on top"
- CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
+ CXXFLAGS="-fno-exceptions -fno-rtti" cmake \
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
@@ -56,7 +56,7 @@ if $STEPWISE_BUILD; then
else
echo "Building Cadence toolchain with ExecuTorch packages"
cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
- CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
+ CXXFLAGS="-fno-exceptions -fno-rtti" cmake \
-DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
diff --git a/backends/cadence/cadence.cmake b/backends/cadence/cadence.cmake
index 0fa55c6a65b..a0e5ea86da1 100644
--- a/backends/cadence/cadence.cmake
+++ b/backends/cadence/cadence.cmake
@@ -43,7 +43,7 @@ set(CMAKE_CXX_COMPILER ${TOOLCHAIN_HOME}/bin/${CROSS_COMPILE_TARGET}-clang++)
set(CMAKE_C_FLAGS_INIT "-stdlib=libc++ -mtext-section-literals -mlongcalls")
set(CMAKE_CXX_FLAGS_INIT "-stdlib=libc++ -mtext-section-literals -mlongcalls")
-#workaround for larger compilation time
+# workaround for larger compilation time
set(CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT} -fno-strict-aliasing")
set(CMAKE_SYSROOT ${TOOLCHAIN_HOME}/${SYSROOT_TARGET})
diff --git a/backends/cadence/fusion_g3/operators/CMakeLists.txt b/backends/cadence/fusion_g3/operators/CMakeLists.txt
index c29ffa91af9..a9501c687bb 100644
--- a/backends/cadence/fusion_g3/operators/CMakeLists.txt
+++ b/backends/cadence/fusion_g3/operators/CMakeLists.txt
@@ -69,16 +69,20 @@ target_link_libraries(aten_ops_cadence PUBLIC executorch)
target_link_libraries(aten_ops_cadence PRIVATE xa_nnlib)
# Let files say "include ".
-set(_common_include_directories ${EXECUTORCH_ROOT}/..
-${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+set(_common_include_directories
+ ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+)
target_include_directories(
- aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
- ${_common_include_directories}
- ${EXECUTORCH_ROOT}/backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3/xa_nnlib/algo/common/include/
- ${EXECUTORCH_ROOT}/backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3/xa_nnlib/include/nnlib
- ${EXECUTORCH_ROOT}/backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3/xa_nnlib/include
- ${EXECUTORCH_ROOT}/backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3/xa_nnlib/algo/kernels/tables/include
+ aten_ops_cadence
+ PUBLIC
+ ${ROOT_DIR}/..
+ ${CMAKE_BINARY_DIR}
+ ${_common_include_directories}
+ ${EXECUTORCH_ROOT}/backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3/xa_nnlib/algo/common/include/
+ ${EXECUTORCH_ROOT}/backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3/xa_nnlib/include/nnlib
+ ${EXECUTORCH_ROOT}/backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3/xa_nnlib/include
+ ${EXECUTORCH_ROOT}/backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3/xa_nnlib/algo/kernels/tables/include
)
# Generate C++ bindings to register kernels into both PyTorch (for AOT) and
@@ -93,6 +97,4 @@ generate_bindings_for_kernels(
)
message("Generated files ${gen_command_sources}")
-gen_operators_lib(
- LIB_NAME "cadence_ops_lib" KERNEL_LIBS DEPS aten_ops_cadence
-)
+gen_operators_lib(LIB_NAME "cadence_ops_lib" KERNEL_LIBS DEPS aten_ops_cadence)
diff --git a/backends/cadence/fusion_g3/operators/op_clamp.cpp b/backends/cadence/fusion_g3/operators/op_clamp.cpp
index 9f3f72a674f..92fb97b1260 100644
--- a/backends/cadence/fusion_g3/operators/op_clamp.cpp
+++ b/backends/cadence/fusion_g3/operators/op_clamp.cpp
@@ -45,6 +45,7 @@ bool is_out_of_bounds(CTYPE_VAL val) {
}
ET_NODISCARD bool check_bounds(
+ KernelRuntimeContext& ctx,
const Scalar& val_scalar,
const ScalarType& val_type,
const ScalarType& out_type,
@@ -107,14 +108,14 @@ Tensor& clamp_out(
if (has_min) {
ET_KERNEL_CHECK(
ctx,
- check_bounds(min_opt.value(), min_type, out_type, "minimum"),
+ check_bounds(ctx, min_opt.value(), min_type, out_type, "minimum"),
InvalidArgument,
out);
}
if (has_max) {
ET_KERNEL_CHECK(
ctx,
- check_bounds(max_opt.value(), max_type, out_type, "maximum"),
+ check_bounds(ctx, max_opt.value(), max_type, out_type, "maximum"),
InvalidArgument,
out);
}
diff --git a/backends/cadence/hifi/kernels/CMakeLists.txt b/backends/cadence/hifi/kernels/CMakeLists.txt
index 972bb4b7ab1..936e28e2241 100644
--- a/backends/cadence/hifi/kernels/CMakeLists.txt
+++ b/backends/cadence/hifi/kernels/CMakeLists.txt
@@ -28,8 +28,9 @@ add_library(
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_transpose_32.c
)
# Let files say "include ".
-set(_common_include_directories ${EXECUTORCH_ROOT}/..
-${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+set(_common_include_directories
+ ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+)
target_include_directories(
cadence_kernels
@@ -39,7 +40,7 @@ target_include_directories(
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include/nnlib
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/
- ${_common_include_directories}
+ ${_common_include_directories}
)
target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp
index bf4a2d143fd..d2cf6dd5057 100644
--- a/backends/cadence/hifi/kernels/kernels.cpp
+++ b/backends/cadence/hifi/kernels/kernels.cpp
@@ -21,8 +21,19 @@ memcpy(void* dst, const void* src, size_t num_bytes) {
}
void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) {
+ ET_LOG(Info, "Attempting to allocate %zu bytes of temp memory", size);
Result temp_mem_res = ctx.allocate_temp(size);
- return temp_mem_res.ok() ? temp_mem_res.get() : nullptr;
+ if (temp_mem_res.ok()) {
+ void* ptr = temp_mem_res.get();
+ ET_LOG(Info, "Successfully allocated temp memory at %p", ptr);
+ return ptr;
+ } else {
+ ET_LOG(
+ Error,
+ "Failed to allocate temp memory, error: 0x%x",
+ static_cast(temp_mem_res.error()));
+ return nullptr;
+ }
}
// Quantize a fp32 value to an int8_t/uint8_t value
diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt
index 806e2e41ff5..6bd63c6d9f6 100644
--- a/backends/cadence/hifi/operators/CMakeLists.txt
+++ b/backends/cadence/hifi/operators/CMakeLists.txt
@@ -72,14 +72,15 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/select_copy_util.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/slice_util.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/delinearize_index.cpp"
- )
+)
add_library(aten_ops_cadence ${_aten_ops__srcs})
target_link_libraries(aten_ops_cadence PUBLIC executorch)
target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels)
# Let files say "include ".
-set(_common_include_directories ${EXECUTORCH_ROOT}/..
-${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+set(_common_include_directories
+ ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+)
target_include_directories(
aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
@@ -88,9 +89,16 @@ target_include_directories(
# Custom ops that are needed to run the test model.
add_library(
- custom_ops "op_quantized_linear_out.cpp" "op_quantized_layer_norm.cpp" "op_quantized_matmul_out.cpp"
- "op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp"
- "op_quantized_conv_out.cpp" "op_quantized_fully_connected_out"
+ custom_ops
+ "op_quantized_linear_out.cpp"
+ "op_quantized_layer_norm.cpp"
+ "op_quantized_matmul_out.cpp"
+ "op_quantize_per_tensor.cpp"
+ "op_quantized_relu_out.cpp"
+ "op_dequantize_per_tensor.cpp"
+ "op_quantized_conv_nchw_out.cpp"
+ "op_quantized_conv_nhwc_out.cpp"
+ "op_quantized_fully_connected_out"
)
target_include_directories(
custom_ops PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
diff --git a/backends/cadence/hifi/operators/op_cat.cpp b/backends/cadence/hifi/operators/op_cat.cpp
index 8ad52753de3..d4fd51871ce 100644
--- a/backends/cadence/hifi/operators/op_cat.cpp
+++ b/backends/cadence/hifi/operators/op_cat.cpp
@@ -126,29 +126,25 @@ Tensor& cat_out(
const size_t outer = getLeadingDims(out, dim);
const size_t dim_stride = getTrailingDims(out, dim);
const size_t ninputs = tensors.size();
+ const size_t element_size = out.element_size();
+ char* out_ptr = static_cast(out.mutable_data_ptr());
- const auto out_type = out.scalar_type();
- ET_SWITCH_REALHB_TYPES(out_type, ctx, name, CTYPE_OUT, [&] {
- CTYPE_OUT* out_ptr = out.mutable_data_ptr();
- for (size_t i = 0; i < outer; ++i) {
- for (size_t j = 0; j < ninputs; ++j) {
- const auto in_type = tensors[j].scalar_type();
- ET_SWITCH_REALHB_TYPES(in_type, ctx, name, CTYPE_IN, [&] {
- if (tensors[j].numel() == 0) {
- return;
- }
- size_t inner = tensors[j].size(dim) * dim_stride;
- const CTYPE_IN* const in_ptr =
- tensors[j].const_data_ptr() + i * inner;
-
- for (size_t k = 0; k < inner; ++k) {
- out_ptr[k] = static_cast(in_ptr[k]);
- }
- out_ptr += inner;
- });
+ for (size_t i = 0; i < outer; ++i) {
+ for (size_t j = 0; j < ninputs; ++j) {
+ if (tensors[j].numel() == 0) {
+ continue;
}
+ size_t inner_elements = tensors[j].size(dim) * dim_stride;
+ size_t contiguous_bytes = inner_elements * element_size;
+
+ const char* const in_ptr =
+ static_cast(tensors[j].const_data_ptr()) +
+ i * contiguous_bytes;
+
+ std::memcpy(out_ptr, in_ptr, contiguous_bytes);
+ out_ptr += contiguous_bytes;
}
- });
+ }
return out;
}
@@ -156,4 +152,4 @@ Tensor& cat_out(
} // namespace native
} // namespace HiFi
} // namespace impl
-} // namespace cadence
\ No newline at end of file
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_mm.cpp b/backends/cadence/hifi/operators/op_mm.cpp
index abb53a7ad7c..9cf922cbf56 100644
--- a/backends/cadence/hifi/operators/op_mm.cpp
+++ b/backends/cadence/hifi/operators/op_mm.cpp
@@ -79,6 +79,15 @@ Tensor& mm_out(
(WORD32* __restrict__)kernels::allocate_temp_memory(
ctx, (n * p) * sizeof(WORD32));
+ // Allocate zero-initialized bias for matmul function (it doesn't accept
+ // NULL)
+ FLOAT32* __restrict__ p_bias_zero =
+ (FLOAT32* __restrict__)kernels::allocate_temp_memory(
+ ctx, m * sizeof(FLOAT32));
+
+ // Initialize bias to zero since mm operation has no bias
+ memset(p_bias_zero, 0, m * sizeof(FLOAT32));
+
WORD32 p_inp_shape[2];
p_inp_shape[0] = n;
p_inp_shape[1] = p;
@@ -109,11 +118,13 @@ Tensor& mm_out(
const FLOAT32* __restrict__ p_vec = (const FLOAT32* __restrict__)p_o;
+ // mm will always be converted to addmm and to linear, and move transpose to
+ // graph
WORD32 val = xa_nn_matmul_f32xf32_f32(
p_out,
p_mat1,
p_vec,
- NULL,
+ p_bias_zero,
rows,
cols1,
row_stride1,
@@ -121,7 +132,6 @@ Tensor& mm_out(
vec_offset,
out_offset,
out_stride);
-
return out;
}
diff --git a/backends/cadence/hifi/operators/op_permute_copy.cpp b/backends/cadence/hifi/operators/op_permute_copy.cpp
index 1d56d79dfd5..c5f33435733 100644
--- a/backends/cadence/hifi/operators/op_permute_copy.cpp
+++ b/backends/cadence/hifi/operators/op_permute_copy.cpp
@@ -70,8 +70,6 @@ Tensor& permute_copy_out(
out);
const auto in_type = out.scalar_type();
-
- constexpr auto name = "permute_copy.out";
constexpr int kNnlibMaxDim = 16;
bool optimized = false;
@@ -150,18 +148,22 @@ Tensor& permute_copy_out(
size_t trailing_dims_memo[kTensorDimensionLimit];
executorch::runtime::memoizeTrailingDims(in, trailing_dims_memo);
- // in and out must be the same dtype
- ET_SWITCH_ALL_TYPES(in_type, ctx, name, CTYPE, [&] {
- const CTYPE* const in_data = in.const_data_ptr();
- CTYPE* const out_data = out.mutable_data_ptr();
+ const char* const in_data = static_cast(in.const_data_ptr());
+ char* const out_data = static_cast(out.mutable_data_ptr());
+ const size_t element_size = out.element_size();
- for (size_t i = 0; i < out.numel(); ++i) {
- out_data[i] =
- in_data[executorch::runtime::coordinateToIndexWithTrailingDimsMemo(
- in, in_coord, trailing_dims_memo)];
- increment_coordinate_permuted(in, in_coord, dims);
- }
- });
+ for (size_t i = 0; i < out.numel(); ++i) {
+ const size_t in_index =
+ executorch::runtime::coordinateToIndexWithTrailingDimsMemo(
+ in, in_coord, trailing_dims_memo);
+
+ std::memcpy(
+ out_data + i * element_size,
+ in_data + in_index * element_size,
+ element_size);
+
+ increment_coordinate_permuted(in, in_coord, dims);
+ }
return out;
}
@@ -169,4 +171,4 @@ Tensor& permute_copy_out(
} // namespace native
} // namespace HiFi
} // namespace impl
-} // namespace cadence
\ No newline at end of file
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_add_asym8sxasym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_add_asym8sxasym8s_asym8s_per_tensor_out.cpp
new file mode 100644
index 00000000000..fa84a877c56
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_add_asym8sxasym8s_asym8s_per_tensor_out.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::KernelRuntimeContext;
+
+void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
+ KernelRuntimeContext& ctx,
+ const Tensor& X,
+ double X_scale,
+ int64_t X_zero_point,
+ const Tensor& Y,
+ double Y_scale,
+ int64_t Y_zero_point,
+ double out_scale,
+ int64_t out_zero_point,
+ Tensor& out) {
+ const int8_t* __restrict__ X_data = X.const_data_ptr();
+ const int8_t* __restrict__ Y_data = Y.const_data_ptr();
+ int8_t* __restrict__ out_data = out.mutable_data_ptr();
+
+ ssize_t Y_numel = Y.numel();
+ ssize_t X_numel = X.numel();
+ ssize_t out_numel = out.numel();
+
+ float X_scale_f = static_cast(X_scale);
+ float Y_scale_f = static_cast(Y_scale);
+ float out_scale_f = static_cast(out_scale);
+ int32_t X_zero_point_i32 = static_cast(X_zero_point);
+ int32_t Y_zero_point_i32 = static_cast(Y_zero_point);
+ int32_t out_zero_point_i32 = static_cast(out_zero_point);
+
+ float inv_out_scale = 1.0f / out_scale_f;
+ constexpr float min_val =
+ static_cast(std::numeric_limits::min());
+ constexpr float max_val =
+ static_cast(std::numeric_limits::max());
+
+ /* Tensor X exactly matches Y in shape, no broadcasting */
+ if (X_numel == Y_numel && Y_numel == out_numel) {
+ for (size_t i = 0; i < X_numel; ++i) {
+ float x = X_scale_f * (X_data[i] - X_zero_point_i32);
+ float y = Y_scale_f * (Y_data[i] - Y_zero_point_i32);
+ float z = x + y;
+ float tmp = roundf(z * inv_out_scale + out_zero_point_i32);
+ out_data[i] =
+ static_cast(std::max(std::min(tmp, max_val), min_val));
+ }
+ } /* if Y is a scalar Tensor */
+ else if (Y_numel == 1) {
+ float y =
+ kernels::dequantize(Y_data[0], Y_scale_f, Y_zero_point_i32);
+ for (size_t i = 0; i < X_numel; ++i) {
+ float x =
+ kernels::dequantize(X_data[i], X_scale_f, X_zero_point_i32);
+ float z = x + y;
+ out_data[i] =
+ kernels::quantize(z, inv_out_scale, out_zero_point_i32);
+ }
+ } /* if X is a scalar Tensor */
+ else if (X_numel == 1) {
+ float x =
+ kernels::dequantize(X_data[0], X_scale_f, X_zero_point_i32);
+ for (size_t i = 0; i < Y_numel; ++i) {
+ float y =
+ kernels::dequantize(Y_data[i], Y_scale_f, Y_zero_point_i32);
+ float z = x + y;
+ out_data[i] =
+ kernels::quantize(z, inv_out_scale, out_zero_point_i32);
+ }
+ } /* other broadcasting cases */
+ else {
+ /* Broadcasting implementation */
+ ssize_t X_dim = X.dim();
+ ssize_t Y_dim = Y.dim();
+ ssize_t out_dim = out.dim();
+
+ /* Precompute strides for X and Y tensors */
+ constexpr size_t max_dim = executorch::runtime::kTensorDimensionLimit;
+ size_t X_strides[max_dim] = {0};
+ size_t Y_strides[max_dim] = {0};
+ size_t X_stride_val = 1;
+ size_t Y_stride_val = 1;
+
+ /* Calculate strides from last dimension to first */
+ for (int d = out_dim - 1; d >= 0 && d >= out_dim - max_dim; --d) {
+ int idx = out_dim - 1 - d; /* Index into the fixed-size array */
+ if (d >= out_dim - X_dim) {
+ size_t X_d = d - (out_dim - X_dim);
+ X_strides[idx] = X_stride_val;
+ X_stride_val *= X.size(X_d);
+ }
+
+ if (d >= out_dim - Y_dim) {
+ size_t Y_d = d - (out_dim - Y_dim);
+ Y_strides[idx] = Y_stride_val;
+ Y_stride_val *= Y.size(Y_d);
+ }
+ }
+
+ /* Iterate over output tensor */
+ for (ssize_t i = 0; i < out_numel; ++i) {
+ size_t out_idx = i;
+ size_t X_idx = 0;
+ size_t Y_idx = 0;
+
+ /* Compute corresponding indices in input tensors */
+ for (int d = out_dim - 1; d >= 0; --d) {
+ size_t out_dim_idx = out_idx % out.size(d);
+ out_idx /= out.size(d);
+
+ /* Compute X index */
+ if (d >= out_dim - X_dim) {
+ size_t X_d = d - (out_dim - X_dim);
+ size_t X_dim_idx = out_dim_idx % X.size(X_d);
+ if (d >= out_dim - max_dim) {
+ int idx = out_dim - 1 - d;
+ X_idx += X_dim_idx * X_strides[idx];
+ } else {
+ size_t X_stride = 1;
+ for (int k = out_dim - 1; k > d; --k) {
+ if (k >= out_dim - X_dim) {
+ size_t X_k = k - (out_dim - X_dim);
+ X_stride *= X.size(X_k);
+ }
+ }
+ X_idx += X_dim_idx * X_stride;
+ }
+ }
+
+ /* Compute Y index */
+ if (d >= out_dim - Y_dim) {
+ size_t Y_d = d - (out_dim - Y_dim);
+ size_t Y_dim_idx = out_dim_idx % Y.size(Y_d);
+ if (d >= out_dim - max_dim) {
+ int idx = out_dim - 1 - d;
+ Y_idx += Y_dim_idx * Y_strides[idx];
+ } else {
+ size_t Y_stride = 1;
+ for (int k = out_dim - 1; k > d; --k) {
+ if (k >= out_dim - Y_dim) {
+ size_t Y_k = k - (out_dim - Y_dim);
+ Y_stride *= Y.size(Y_k);
+ }
+ }
+ Y_idx += Y_dim_idx * Y_stride;
+ }
+ }
+ }
+
+ /* Apply the operation */
+ float x = kernels::dequantize(
+ X_data[X_idx], X_scale_f, X_zero_point_i32);
+ float y = kernels::dequantize(
+ Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
+ float z = x + y;
+ out_data[i] =
+ kernels::quantize(z, inv_out_scale, out_zero_point_i32);
+ }
+ }
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_add_asym8uxasym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_add_asym8uxasym8u_asym8u_per_tensor_out.cpp
new file mode 100644
index 00000000000..b7c453dda2b
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_add_asym8uxasym8u_asym8u_per_tensor_out.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::KernelRuntimeContext;
+
+void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
+ KernelRuntimeContext& ctx,
+ const Tensor& X,
+ double X_scale,
+ int64_t X_zero_point,
+ const Tensor& Y,
+ double Y_scale,
+ int64_t Y_zero_point,
+ double out_scale,
+ int64_t out_zero_point,
+ Tensor& out) {
+ const uint8_t* __restrict__ X_data = X.const_data_ptr();
+ const uint8_t* __restrict__ Y_data = Y.const_data_ptr();
+ uint8_t* __restrict__ out_data = out.mutable_data_ptr();
+
+ ssize_t Y_numel = Y.numel();
+ ssize_t X_numel = X.numel();
+ ssize_t out_numel = out.numel();
+
+ float X_scale_f = static_cast(X_scale);
+ float Y_scale_f = static_cast(Y_scale);
+ float out_scale_f = static_cast(out_scale);
+ int32_t X_zero_point_i32 = static_cast(X_zero_point);
+ int32_t Y_zero_point_i32 = static_cast(Y_zero_point);
+ int32_t out_zero_point_i32 = static_cast(out_zero_point);
+
+ float inv_out_scale = 1.0f / out_scale_f;
+ constexpr float min_val =
+ static_cast(std::numeric_limits::min());
+ constexpr float max_val =
+ static_cast(std::numeric_limits::max());
+
+ /* Tensor X exactly matches Y in shape, no broadcasting */
+ if (X_numel == Y_numel && Y_numel == out_numel) {
+ for (size_t i = 0; i < X_numel; ++i) {
+ float x = X_scale_f * (X_data[i] - X_zero_point_i32);
+ float y = Y_scale_f * (Y_data[i] - Y_zero_point_i32);
+ float z = x + y;
+ float tmp = roundf(z * inv_out_scale + out_zero_point_i32);
+ out_data[i] =
+ static_cast(std::max(std::min(tmp, max_val), min_val));
+ }
+ } /* if Y is a scalar Tensor */
+ else if (Y_numel == 1) {
+ float y =
+ kernels::dequantize(Y_data[0], Y_scale_f, Y_zero_point_i32);
+ for (size_t i = 0; i < X_numel; ++i) {
+ float x =
+ kernels::dequantize(X_data[i], X_scale_f, X_zero_point_i32);
+ float z = x + y;
+ out_data[i] =
+ kernels::quantize(z, inv_out_scale, out_zero_point_i32);
+ }
+ } /* if X is a scalar Tensor */
+ else if (X_numel == 1) {
+ float x =
+ kernels::dequantize(X_data[0], X_scale_f, X_zero_point_i32);
+ for (size_t i = 0; i < Y_numel; ++i) {
+ float y =
+ kernels::dequantize(Y_data[i], Y_scale_f, Y_zero_point_i32);
+ float z = x + y;
+ out_data[i] =
+ kernels::quantize(z, inv_out_scale, out_zero_point_i32);
+ }
+ } /* other broadcasting cases */
+ else {
+ /* Broadcasting implementation */
+ ssize_t X_dim = X.dim();
+ ssize_t Y_dim = Y.dim();
+ ssize_t out_dim = out.dim();
+
+ /* Precompute strides for X and Y tensors */
+ constexpr size_t max_dim = executorch::runtime::kTensorDimensionLimit;
+ size_t X_strides[max_dim] = {0};
+ size_t Y_strides[max_dim] = {0};
+ size_t X_stride_val = 1;
+ size_t Y_stride_val = 1;
+
+ /* Calculate strides from last dimension to first */
+ for (int d = out_dim - 1; d >= 0 && d >= out_dim - max_dim; --d) {
+ int idx = out_dim - 1 - d; /* Index into the fixed-size array */
+ if (d >= out_dim - X_dim) {
+ size_t X_d = d - (out_dim - X_dim);
+ X_strides[idx] = X_stride_val;
+ X_stride_val *= X.size(X_d);
+ }
+
+ if (d >= out_dim - Y_dim) {
+ size_t Y_d = d - (out_dim - Y_dim);
+ Y_strides[idx] = Y_stride_val;
+ Y_stride_val *= Y.size(Y_d);
+ }
+ }
+
+ /* Iterate over output tensor */
+ for (ssize_t i = 0; i < out_numel; ++i) {
+ size_t out_idx = i;
+ size_t X_idx = 0;
+ size_t Y_idx = 0;
+
+ /* Compute corresponding indices in input tensors */
+ for (int d = out_dim - 1; d >= 0; --d) {
+ size_t out_dim_idx = out_idx % out.size(d);
+ out_idx /= out.size(d);
+
+ /* Compute X index */
+ if (d >= out_dim - X_dim) {
+ size_t X_d = d - (out_dim - X_dim);
+ size_t X_dim_idx = out_dim_idx % X.size(X_d);
+ if (d >= out_dim - max_dim) {
+ int idx = out_dim - 1 - d;
+ X_idx += X_dim_idx * X_strides[idx];
+ } else {
+ size_t X_stride = 1;
+ for (int k = out_dim - 1; k > d; --k) {
+ if (k >= out_dim - X_dim) {
+ size_t X_k = k - (out_dim - X_dim);
+ X_stride *= X.size(X_k);
+ }
+ }
+ X_idx += X_dim_idx * X_stride;
+ }
+ }
+
+ /* Compute Y index */
+ if (d >= out_dim - Y_dim) {
+ size_t Y_d = d - (out_dim - Y_dim);
+ size_t Y_dim_idx = out_dim_idx % Y.size(Y_d);
+ if (d >= out_dim - max_dim) {
+ int idx = out_dim - 1 - d;
+ Y_idx += Y_dim_idx * Y_strides[idx];
+ } else {
+ size_t Y_stride = 1;
+ for (int k = out_dim - 1; k > d; --k) {
+ if (k >= out_dim - Y_dim) {
+ size_t Y_k = k - (out_dim - Y_dim);
+ Y_stride *= Y.size(Y_k);
+ }
+ }
+ Y_idx += Y_dim_idx * Y_stride;
+ }
+ }
+ }
+
+ /* Apply the operation */
+ float x = kernels::dequantize(
+ X_data[X_idx], X_scale_f, X_zero_point_i32);
+ float y = kernels::dequantize(
+ Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
+ float z = x + y;
+ out_data[i] =
+ kernels::quantize(z, inv_out_scale, out_zero_point_i32);
+ }
+ }
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp
new file mode 100644
index 00000000000..6e09b995126
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Optimized NCHW convolution for int8 x int8 -> int8
+void xa_opt_quantized_conv_nchw_asym8sxsym8s_asym8s(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int16_t groups,
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ constexpr int kNnlibMaxDim = 4;
+
+ WORD8* __restrict__ p_out =
+ (WORD8* __restrict__)out.mutable_data_ptr();
+ WORD8* __restrict__ p_inp =
+ (WORD8* __restrict__)input.const_data_ptr();
+ WORD8* __restrict__ p_kernel =
+ (WORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
+
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 kernel_channels = weight.size(1);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+ WORD32 dilation_width = dilation[1];
+ WORD32 dilation_height = dilation[0];
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 kernel_zero_bias = -weight_zero_point;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+ WORD32 kernel_precision = 8;
+ pVOID p_scratch = nullptr;
+ WORD32* ptr_scratch;
+
+ WORD32 scratch_size = 0;
+
+ if (groups == 1) {
+ WORD32 out_data_format = 1;
+
+ WORD8* ptr1 = (WORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((batches * input_channels * input_height * input_width) + 8) *
+ sizeof(WORD8));
+
+ WORD8* ptr2 = (WORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((out_channels * kernel_channels * kernel_height * kernel_width) + 8) *
+ sizeof(WORD8));
+
+ WORD8* pin = (WORD8*)ALIGN_PTR(ptr1, 8);
+ WORD8* pkernel = (WORD8*)ALIGN_PTR(ptr2, 8);
+
+ WORD32 p_inp_shape[kNnlibMaxDim];
+ p_inp_shape[0] = input.size(0);
+ p_inp_shape[1] = input_channels;
+ p_inp_shape[2] = input_height;
+ p_inp_shape[3] = input_width;
+
+ WORD32 p_out_shape[kNnlibMaxDim];
+ p_out_shape[0] = input.size(0);
+ p_out_shape[1] = input_height;
+ p_out_shape[2] = input_width;
+ p_out_shape[3] = input_channels;
+
+ WORD32 p_permute_vec[kNnlibMaxDim] = {0, 2, 3, 1};
+
+ xa_nn_transpose_8_8(
+ pin,
+ p_out_shape,
+ p_inp,
+ p_inp_shape,
+ p_permute_vec,
+ kNnlibMaxDim,
+ kNnlibMaxDim);
+
+ WORD32 p_inp_shape1[kNnlibMaxDim];
+ p_inp_shape1[0] = out_channels;
+ p_inp_shape1[1] = kernel_channels;
+ p_inp_shape1[2] = kernel_height;
+ p_inp_shape1[3] = kernel_width;
+
+ WORD32 p_out_shape1[kNnlibMaxDim];
+ p_out_shape1[0] = out_channels;
+ p_out_shape1[1] = kernel_height;
+ p_out_shape1[2] = kernel_width;
+ p_out_shape1[3] = kernel_channels;
+
+ xa_nn_transpose_8_8(
+ pkernel,
+ p_out_shape1,
+ p_kernel,
+ p_inp_shape1,
+ p_permute_vec,
+ kNnlibMaxDim,
+ kNnlibMaxDim);
+
+ scratch_size = xa_nn_conv2d_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ y_stride,
+ y_padding,
+ x_stride,
+ x_padding,
+ out_height,
+ out_width,
+ out_channels,
+ inp_precision,
+ kernel_precision,
+ out_data_format);
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ ptr_scratch = (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+
+ p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ WORD8* in_batch = pin + _n * input_channels * input_height * input_width;
+ WORD8* out_batch = p_out + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_per_chan_sym8sxasym8s(
+ out_batch,
+ in_batch,
+ pkernel,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ out_channels,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ out_data_format,
+ p_scratch);
+ }
+ return;
+ }
+
+ // Depthwise convolutions are now handled by specialized operators
+ ET_CHECK_MSG(groups == 1, "Only groups=1 supported for regular convolution");
+}
+
+void quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ xa_opt_quantized_conv_nchw_asym8sxsym8s_asym8s(
+ ctx,
+ input,
+ weight,
+ bias,
+ stride,
+ padding,
+ dilation,
+ groups,
+ in_zero_point,
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ output_zero_point,
+ out);
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp
new file mode 100644
index 00000000000..ccbf70e1d2d
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Optimized NCHW convolution for uint8 x uint8 -> uint8
+void xa_opt_quantized_conv_nchw_asym8uxsym8u_asym8u(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int16_t groups,
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ constexpr int kNnlibMaxDim = 4;
+
+ UWORD8* __restrict__ p_out =
+ (UWORD8* __restrict__)out.mutable_data_ptr();
+ UWORD8* __restrict__ p_inp =
+ (UWORD8* __restrict__)input.const_data_ptr();
+ UWORD8* __restrict__ p_kernel =
+ (UWORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
+
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 kernel_channels = weight.size(1);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+ WORD32 dilation_width = dilation[1];
+ WORD32 dilation_height = dilation[0];
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 kernel_zero_bias = -weight_zero_point;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+ WORD32 kernel_precision = 8;
+ pVOID p_scratch = nullptr;
+ WORD32* ptr_scratch;
+
+ WORD32 scratch_size = 0;
+
+ if (groups == 1) {
+ WORD32 out_data_format = 1;
+
+ UWORD8* ptr1 = (UWORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((batches * input_channels * input_height * input_width) + 8) *
+ sizeof(UWORD8));
+
+ UWORD8* ptr2 = (UWORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((out_channels * kernel_channels * kernel_height * kernel_width) + 8) *
+ sizeof(UWORD8));
+
+ UWORD8* pin = (UWORD8*)ALIGN_PTR(ptr1, 8);
+ UWORD8* pkernel = (UWORD8*)ALIGN_PTR(ptr2, 8);
+
+ WORD32 p_inp_shape[kNnlibMaxDim];
+ p_inp_shape[0] = input.size(0);
+ p_inp_shape[1] = input_channels;
+ p_inp_shape[2] = input_height;
+ p_inp_shape[3] = input_width;
+
+ WORD32 p_out_shape[kNnlibMaxDim];
+ p_out_shape[0] = input.size(0);
+ p_out_shape[1] = input_height;
+ p_out_shape[2] = input_width;
+ p_out_shape[3] = input_channels;
+
+ WORD32 p_permute_vec[kNnlibMaxDim] = {0, 2, 3, 1};
+
+ xa_nn_transpose_8_8(
+ (WORD8*)pin,
+ p_out_shape,
+ (WORD8*)p_inp,
+ p_inp_shape,
+ p_permute_vec,
+ kNnlibMaxDim,
+ kNnlibMaxDim);
+
+ WORD32 p_inp_shape1[kNnlibMaxDim];
+ p_inp_shape1[0] = out_channels;
+ p_inp_shape1[1] = kernel_channels;
+ p_inp_shape1[2] = kernel_height;
+ p_inp_shape1[3] = kernel_width;
+
+ WORD32 p_out_shape1[kNnlibMaxDim];
+ p_out_shape1[0] = out_channels;
+ p_out_shape1[1] = kernel_height;
+ p_out_shape1[2] = kernel_width;
+ p_out_shape1[3] = kernel_channels;
+
+ xa_nn_transpose_8_8(
+ (WORD8*)pkernel,
+ p_out_shape1,
+ (WORD8*)p_kernel,
+ p_inp_shape1,
+ p_permute_vec,
+ kNnlibMaxDim,
+ kNnlibMaxDim);
+
+ scratch_size = xa_nn_conv2d_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ y_stride,
+ y_padding,
+ x_stride,
+ x_padding,
+ out_height,
+ out_width,
+ out_channels,
+ inp_precision,
+ kernel_precision,
+ out_data_format);
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ ptr_scratch = (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+
+ p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ UWORD8* in_batch = pin + _n * input_channels * input_height * input_width;
+ UWORD8* out_batch = p_out + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_per_chan_sym8sxasym8s(
+ (WORD8*)out_batch,
+ (WORD8*)in_batch,
+ (WORD8*)pkernel,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ out_channels,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ out_data_format,
+ p_scratch);
+ }
+ return;
+ }
+
+ // Depthwise convolutions are now handled by specialized operators
+ ET_CHECK_MSG(groups == 1, "Only groups=1 supported for regular convolution");
+}
+
+void quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ xa_opt_quantized_conv_nchw_asym8uxsym8u_asym8u(
+ ctx,
+ input,
+ weight,
+ bias,
+ stride,
+ padding,
+ dilation,
+ groups,
+ in_zero_point,
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ output_zero_point,
+ out);
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp
new file mode 100644
index 00000000000..3e2c9c58401
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Specialized depthwise NCHW convolution for int8 x int8 -> int8
+void xa_opt_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int16_t groups,
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ constexpr int kNnlibMaxDim = 4;
+
+ WORD8* __restrict__ p_out =
+ (WORD8* __restrict__)out.mutable_data_ptr();
+ WORD8* __restrict__ p_inp =
+ (WORD8* __restrict__)input.const_data_ptr();
+ WORD8* __restrict__ p_kernel =
+ (WORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
+
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+
+ WORD32 channels_multiplier = out_channels / input_channels;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 scratch_size = xa_nn_conv2d_depthwise_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ inp_precision,
+ 1); // NCHW
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ WORD32* ptr_scratch =
+ (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+ pVOID p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ WORD8* ptr1 = (WORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((batches * out_channels * out_height * out_width) + 8) * sizeof(WORD8));
+
+ WORD8* p_out_temp = (WORD8*)ALIGN_PTR(ptr1, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ WORD8* in_batch = p_inp + _n * input_channels * input_height * input_width;
+ WORD8* out_batch = p_out_temp + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_depthwise_per_chan_sym8sxasym8s(
+ out_batch,
+ p_kernel,
+ in_batch,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ 1, // NCHW
+ 0, // NHWC
+ p_scratch);
+ }
+
+ WORD32 p_inp_shape[kNnlibMaxDim];
+ p_inp_shape[0] = batches;
+ p_inp_shape[1] = out_height;
+ p_inp_shape[2] = out_width;
+ p_inp_shape[3] = out_channels;
+
+ WORD32 p_out_shape[kNnlibMaxDim];
+ p_out_shape[0] = batches;
+ p_out_shape[1] = out_channels;
+ p_out_shape[2] = out_height;
+ p_out_shape[3] = out_width;
+
+ WORD32 p_permute_vec[kNnlibMaxDim] = {0, 3, 1, 2};
+
+ xa_nn_transpose_8_8(
+ p_out,
+ p_out_shape,
+ p_out_temp,
+ p_inp_shape,
+ p_permute_vec,
+ kNnlibMaxDim,
+ kNnlibMaxDim);
+}
+
+void quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ xa_opt_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s(
+ ctx,
+ input,
+ weight,
+ bias,
+ stride,
+ padding,
+ dilation,
+ groups,
+ in_zero_point,
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ output_zero_point,
+ out);
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp
new file mode 100644
index 00000000000..103ce9568c5
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Specialized depthwise NCHW convolution for uint8 x uint8 -> uint8
+void xa_opt_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int16_t groups,
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ constexpr int kNnlibMaxDim = 4;
+
+ UWORD8* __restrict__ p_out =
+ (UWORD8* __restrict__)out.mutable_data_ptr();
+ UWORD8* __restrict__ p_inp =
+ (UWORD8* __restrict__)input.const_data_ptr();
+ UWORD8* __restrict__ p_kernel =
+ (UWORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
+
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+
+ WORD32 channels_multiplier = out_channels / input_channels;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 scratch_size = xa_nn_conv2d_depthwise_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ inp_precision,
+ 1); // NCHW
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ WORD32* ptr_scratch =
+ (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+ pVOID p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ UWORD8* ptr1 = (UWORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((batches * out_channels * out_height * out_width) + 8) * sizeof(UWORD8));
+
+ UWORD8* p_out_temp = (UWORD8*)ALIGN_PTR(ptr1, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ UWORD8* in_batch = p_inp + _n * input_channels * input_height * input_width;
+ UWORD8* out_batch = p_out_temp + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_depthwise_per_chan_sym8sxasym8s(
+ (WORD8*)out_batch,
+ (WORD8*)p_kernel,
+ (WORD8*)in_batch,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ 1, // NCHW
+ 0, // NHWC
+ p_scratch);
+ }
+
+ WORD32 p_inp_shape[kNnlibMaxDim];
+ p_inp_shape[0] = batches;
+ p_inp_shape[1] = out_height;
+ p_inp_shape[2] = out_width;
+ p_inp_shape[3] = out_channels;
+
+ WORD32 p_out_shape[kNnlibMaxDim];
+ p_out_shape[0] = batches;
+ p_out_shape[1] = out_channels;
+ p_out_shape[2] = out_height;
+ p_out_shape[3] = out_width;
+
+ WORD32 p_permute_vec[kNnlibMaxDim] = {0, 3, 1, 2};
+
+ xa_nn_transpose_8_8(
+ (WORD8*)p_out,
+ p_out_shape,
+ (WORD8*)p_out_temp,
+ p_inp_shape,
+ p_permute_vec,
+ kNnlibMaxDim,
+ kNnlibMaxDim);
+}
+
+void quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ xa_opt_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u(
+ ctx,
+ input,
+ weight,
+ bias,
+ stride,
+ padding,
+ dilation,
+ groups,
+ in_zero_point,
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ output_zero_point,
+ out);
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp
new file mode 100644
index 00000000000..cdc1ecd8526
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Dilated fallback implementation for int8 x int8 -> int8 quantized 2d conv
+// kernel for NCHW layout. This variant is optimized for asymmetric int8 inputs,
+// weights, and outputs. The input is of shape [n x c x h x w] The weight is of
+// shape [oc x wc x wh x ww], where wc == c The output is of shape [n x oc x oh
+// x ow] The bias is of shape [oc]
+template
+__attribute__((noinline)) void conv2d_nchw_dilated_asym8sxsym8s_asym8s_core(
+ // All the arrays
+ const int8_t* __restrict__ p_in,
+ const int8_t* __restrict__ p_weight,
+ const int32_t* __restrict__ p_bias,
+ int8_t* __restrict__ p_out,
+ // The array sizes
+ int32_t n,
+ int32_t c,
+ int32_t h,
+ int32_t w,
+ int32_t oc,
+ int32_t wc,
+ int32_t wh,
+ int32_t ww,
+ int32_t oh,
+ int32_t ow,
+ // Stride
+ int16_t s0,
+ int16_t s1,
+ // Padding
+ int16_t p0,
+ int16_t p1,
+ // Dilation
+ int16_t d0,
+ int16_t d1,
+ // Group for depthwise conv
+ int16_t groups,
+ // Quantization parameters
+ int8_t in_zero_point = 0,
+ int32_t weight_zero_point = 0,
+ float bias_scale = 1,
+ float out_scale = 1,
+ int8_t out_zero_point = 0) {
+ float inv_out_scale = 1. / out_scale;
+
+ // Compute the number of in and out channels per group
+ const int ocpg = oc / groups;
+ const int icpg = c / groups;
+
+ // Iterate over all the output batches (i.e., n)
+ for (int _n = 0; _n < n; ++_n) {
+ const int8_t* in_batch = p_in + _n * c * h * w;
+ int8_t* out_batch = p_out + _n * oc * oh * ow;
+ // Compute separable convolution for each group
+ for (int _g = 0; _g < groups; ++_g) {
+ // Identify the input and output channels involved in the computation
+ // of this group
+ int sic = _g * icpg;
+ int soc = _g * ocpg;
+ // Populate all the output channels in the group
+ for (int _oc = soc; _oc < soc + ocpg; ++_oc) {
+ int8_t* out_plane = out_batch + _oc * oh * ow;
+ const int8_t* weight_batch = p_weight + _oc * wc * wh * ww;
+ // We compute one output channel at a time. The computation can be
+ // thought of as a stencil computation: we iterate over an input of size
+ // icpg x h x w, with a stencil of size icpg x wh x ww, to compute an
+ // output channel of size 1 x oh x ow.
+ for (int _h = 0, _oh = 0; _oh < oh; _h += s0, ++_oh) {
+ for (int _w = 0, _ow = 0; _ow < ow; _w += s1, ++_ow) {
+ float acc = p_bias[_oc];
+ // Below is the stencil computation that performs the hadamard
+ // product+accumulation of each input channel (contributing to the
+ // output channel being computed) with the corresponding weight
+ // channel.
+ // General path for dilated convolutions with padding support
+ for (int _ic = sic; _ic < sic + icpg; ++_ic) {
+ const int8_t* in_plane = in_batch + _ic * h * w;
+ const int8_t* weight_plane = weight_batch + (_ic - sic) * wh * ww;
+ for (int _wh = 0; _wh < wh; ++_wh) {
+ for (int _ww = 0; _ww < ww; ++_ww) {
+ int input_h = _h + d0 * _wh - p0;
+ int input_w = _w + d1 * _ww - p1;
+ if ((input_h >= 0) && (input_h < h) && (input_w >= 0) &&
+ (input_w < w)) {
+ int ioff = input_h * w + input_w;
+ int woff = _wh * ww + _ww;
+ float lhs = static_cast(in_plane[ioff]) -
+ static_cast(in_zero_point);
+ float rhs = static_cast(weight_plane[woff]) -
+ static_cast(weight_zero_point);
+ acc += lhs * rhs;
+ }
+ }
+ }
+ }
+ // Quantize the accumulated result
+ float val = bias_scale * acc;
+ out_plane[_oh * ow + _ow] =
+ kernels::quantize(val, inv_out_scale, out_zero_point);
+ }
+ }
+ }
+ }
+ }
+}
+
+void quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ // input = [n, c, h, w]
+ const int n = input.size(0);
+ const int c = input.size(1);
+ const int h = conv1d ? 1 : input.size(2);
+ const int w = conv1d ? input.size(2) : input.size(3);
+ // weight = [oc, wc, wh, ww]
+ const int oc = weight.size(0);
+ const int wc = weight.size(1);
+ const int wh = conv1d ? 1 : weight.size(2);
+ const int ww = conv1d ? weight.size(2) : weight.size(3);
+ // output = [n, oc, oh, ow]
+ const int oh = conv1d ? 1 : out.size(2);
+ const int ow = conv1d ? out.size(2) : out.size(3);
+
+ conv2d_nchw_dilated_asym8sxsym8s_asym8s_core(
+ input.const_data_ptr(),
+ weight.const_data_ptr(),
+ bias.const_data_ptr(),
+ out.mutable_data_ptr(),
+ n,
+ c,
+ h,
+ w,
+ oc,
+ wc,
+ wh,
+ ww,
+ oh,
+ ow,
+ stride[0],
+ stride[1],
+ padding[0],
+ padding[1],
+ dilation[0],
+ dilation[1],
+ groups,
+ static_cast(in_zero_point),
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ static_cast(output_zero_point));
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp
new file mode 100644
index 00000000000..9281dcea496
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Dilated fallback implementation for uint8 x uint8 -> uint8 quantized 2d conv
+// kernel for NCHW layout. This variant is optimized for asymmetric uint8
+// inputs, weights, and outputs. The input is of shape [n x c x h x w] The
+// weight is of shape [oc x wc x wh x ww], where wc == c The output is of shape
+// [n x oc x oh x ow] The bias is of shape [oc]
+template
+__attribute__((noinline)) void conv2d_nchw_dilated_asym8uxsym8u_asym8u_core(
+ // All the arrays
+ const uint8_t* __restrict__ p_in,
+ const uint8_t* __restrict__ p_weight,
+ const int32_t* __restrict__ p_bias,
+ uint8_t* __restrict__ p_out,
+ // The array sizes
+ int32_t n,
+ int32_t c,
+ int32_t h,
+ int32_t w,
+ int32_t oc,
+ int32_t wc,
+ int32_t wh,
+ int32_t ww,
+ int32_t oh,
+ int32_t ow,
+ // Stride
+ int16_t s0,
+ int16_t s1,
+ // Padding
+ int16_t p0,
+ int16_t p1,
+ // Dilation
+ int16_t d0,
+ int16_t d1,
+ // Group for depthwise conv
+ int16_t groups,
+ // Quantization parameters
+ uint8_t in_zero_point = 0,
+ int32_t weight_zero_point = 0,
+ float bias_scale = 1,
+ float out_scale = 1,
+ uint8_t out_zero_point = 0) {
+ float inv_out_scale = 1. / out_scale;
+
+ // Compute the number of in and out channels per group
+ const int ocpg = oc / groups;
+ const int icpg = c / groups;
+
+ // Iterate over all the output batches (i.e., n)
+ for (int _n = 0; _n < n; ++_n) {
+ const uint8_t* in_batch = p_in + _n * c * h * w;
+ uint8_t* out_batch = p_out + _n * oc * oh * ow;
+ // Compute separable convolution for each group
+ for (int _g = 0; _g < groups; ++_g) {
+ // Identify the input and output channels involved in the computation
+ // of this group
+ int sic = _g * icpg;
+ int soc = _g * ocpg;
+ // Populate all the output channels in the group
+ for (int _oc = soc; _oc < soc + ocpg; ++_oc) {
+ uint8_t* out_plane = out_batch + _oc * oh * ow;
+ const uint8_t* weight_batch = p_weight + _oc * wc * wh * ww;
+ // We compute one output channel at a time. The computation can be
+ // thought of as a stencil computation: we iterate over an input of size
+ // icpg x h x w, with a stencil of size icpg x wh x ww, to compute an
+ // output channel of size 1 x oh x ow.
+ for (int _h = 0, _oh = 0; _oh < oh; _h += s0, ++_oh) {
+ for (int _w = 0, _ow = 0; _ow < ow; _w += s1, ++_ow) {
+ float acc = p_bias[_oc];
+ // Below is the stencil computation that performs the hadamard
+ // product+accumulation of each input channel (contributing to the
+ // output channel being computed) with the corresponding weight
+ // channel.
+ // General path for dilated convolutions with padding support
+ for (int _ic = sic; _ic < sic + icpg; ++_ic) {
+ const uint8_t* in_plane = in_batch + _ic * h * w;
+ const uint8_t* weight_plane =
+ weight_batch + (_ic - sic) * wh * ww;
+ for (int _wh = 0; _wh < wh; ++_wh) {
+ for (int _ww = 0; _ww < ww; ++_ww) {
+ int input_h = _h + d0 * _wh - p0;
+ int input_w = _w + d1 * _ww - p1;
+ if ((input_h >= 0) && (input_h < h) && (input_w >= 0) &&
+ (input_w < w)) {
+ int ioff = input_h * w + input_w;
+ int woff = _wh * ww + _ww;
+ float lhs = static_cast(in_plane[ioff]) -
+ static_cast(in_zero_point);
+ float rhs = static_cast(weight_plane[woff]) -
+ static_cast(weight_zero_point);
+ acc += lhs * rhs;
+ }
+ }
+ }
+ }
+ // Quantize the accumulated result
+ float val = bias_scale * acc;
+ out_plane[_oh * ow + _ow] =
+ kernels::quantize(val, inv_out_scale, out_zero_point);
+ }
+ }
+ }
+ }
+ }
+}
+
+void quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ // input = [n, c, h, w]
+ const int n = input.size(0);
+ const int c = input.size(1);
+ const int h = conv1d ? 1 : input.size(2);
+ const int w = conv1d ? input.size(2) : input.size(3);
+ // weight = [oc, wc, wh, ww]
+ const int oc = weight.size(0);
+ const int wc = weight.size(1);
+ const int wh = conv1d ? 1 : weight.size(2);
+ const int ww = conv1d ? weight.size(2) : weight.size(3);
+ // output = [n, oc, oh, ow]
+ const int oh = conv1d ? 1 : out.size(2);
+ const int ow = conv1d ? out.size(2) : out.size(3);
+
+ conv2d_nchw_dilated_asym8uxsym8u_asym8u_core(
+ input.const_data_ptr(),
+ weight.const_data_ptr(),
+ bias.const_data_ptr(),
+ out.mutable_data_ptr(),
+ n,
+ c,
+ h,
+ w,
+ oc,
+ wc,
+ wh,
+ ww,
+ oh,
+ ow,
+ stride[0],
+ stride[1],
+ padding[0],
+ padding[1],
+ dilation[0],
+ dilation[1],
+ groups,
+ static_cast(in_zero_point),
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ static_cast(output_zero_point));
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/reference/operators/quantized_conv_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp
similarity index 56%
rename from backends/cadence/reference/operators/quantized_conv_out.cpp
rename to backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp
index 87ff264a258..297fd30e446 100644
--- a/backends/cadence/reference/operators/quantized_conv_out.cpp
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp
@@ -6,17 +6,21 @@
* LICENSE file in the root directory of this source tree.
*/
-#include
-#include
+#include
+#include
+#include
-namespace impl {
-namespace reference {
-namespace native {
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
using ::executorch::aten::IntArrayRef;
-using ::executorch::aten::ScalarType;
-using ::executorch::aten::Tensor;
-using ::executorch::runtime::KernelRuntimeContext;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
// This implements a generic 2d conv kernel that operates on raw pointers.
// The version handles both quantized and fp32 convolutions.
@@ -141,8 +145,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic(
if (quantized) {
float val = bias_scale * acc;
out_plane[_oh * ow + _ow] =
- ::impl::reference::kernels::quantize(
- val, inv_out_scale, out_zero_point);
+ kernels::quantize(val, inv_out_scale, out_zero_point);
} else {
out_plane[_oh * ow + _ow] = acc;
}
@@ -153,128 +156,286 @@ __attribute__((noinline)) void conv2d_nchw_core_generic(
}
}
-template <
- typename IT = float,
- typename WT = IT,
- typename BT = IT,
- typename OT = IT,
- bool quantized = false>
-__attribute__((noinline)) void conv2d_nhwc_core_generic(
- // All the arrays
- const IT* __restrict__ p_in,
- const WT* __restrict__ p_weight,
- const BT* __restrict__ p_bias,
- OT* __restrict__ p_out,
- // The array sizes
- int32_t n,
- int32_t h,
- int32_t w,
- int32_t c,
- int32_t oc,
- int32_t wh,
- int32_t ww,
- int32_t wc,
- int32_t oh,
- int32_t ow,
- // Stride
- int16_t s0,
- int16_t s1,
- // Padding
- int16_t p0,
- int16_t p1,
- // Dilation
- int16_t d0,
- int16_t d1,
- // Group for depthwise conv
+void xa_opt_quantized_conv_nchw(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
int16_t groups,
- // Optional args that are only relevant for quantized convolution
- // input zero point
- IT in_zero_point = 0,
- // weight zero point
- int32_t weight_zero_point = 0,
- float bias_scale = 1,
- float out_scale = 1,
- OT out_zero_point = 0) {
- float inv_out_scale = 1. / out_scale;
- bool zero_pad_unit_dilation = d0 == 1 && d1 == 1 && p0 == 0 && p1 == 0;
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ constexpr int kNnlibMaxDim = 4;
- // Compute the number of in and out channels per group
- const int ocpg = oc / groups;
- const int icpg = c / groups;
+ if (input.scalar_type() == ScalarType::Char) {
+ WORD8* __restrict__ p_out =
+ (WORD8* __restrict__)out.mutable_data_ptr();
+ WORD8* __restrict__ p_inp =
+ (WORD8* __restrict__)input.const_data_ptr();
+ WORD8* __restrict__ p_kernel =
+ (WORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
- // Iterate over all the output batches (i.e., n)
- for (int _n = 0; _n < n; ++_n) {
- const IT* in_batch = p_in + _n * h * w * c;
- OT* out_batch = p_out + _n * oh * ow * oc;
- for (int _h = 0, _oh = 0; _oh < oh; _h += s0, ++_oh) {
- for (int _w = 0, _ow = 0; _ow < ow; _w += s1, ++_ow) {
- OT* out_line = out_batch + (_oh * ow + _ow) * oc;
- // Compute separable convolution for each group
- for (int _g = 0; _g < groups; ++_g) {
- // Identify the input and output channels involved in the computation
- // of this group
- int sic = _g * icpg;
- int soc = _g * ocpg;
- // Populate all the output channels in the group
- for (int _oc = soc; _oc < soc + ocpg; ++_oc) {
- const WT* weight_batch = p_weight + _oc * wh * ww * wc;
- // We compute one output channel at a time. The computation can be
- // thought of as a stencil computation: we iterate over an input of
- // size h x w x icpg, with a stencil of size wh x ww x icpg, to
- // compute an output channel of size oh x ow x 1.
- float acc = p_bias[_oc];
- // Below is the stencil computation that performs the hadamard
- // product+accumulation of each input channel (contributing to
- // the output channel being computed) with the corresponding
- // weight channel. If the padding is 0, and dilation is 1, then
- // we can remove the unnecessary checks, and simplify the code
- // so that it can be vectorized by Tensilica compiler.x``
- if (zero_pad_unit_dilation) {
- for (int _wh = 0; _wh < wh; ++_wh) {
- for (int _ww = 0; _ww < ww; ++_ww) {
- const IT* in_line =
- in_batch + (_h + _wh) * w * c + (_w + _ww) * c;
- const WT* weight_line =
- weight_batch + _wh * ww * wc + _ww * wc;
- for (int _ic = sic; _ic < sic + icpg; ++_ic) {
- float lhs = in_line[_ic] - in_zero_point;
- float rhs = weight_line[_ic - sic] -
- (quantized ? weight_zero_point : 0);
- acc += lhs * rhs;
- }
- }
- }
- } else {
- for (int _wh = 0; _wh < wh; ++_wh) {
- for (int _ww = 0; _ww < ww; ++_ww) {
- if (((_h + d0 * _wh - p0) >= 0) &&
- ((_h + d0 * _wh - p0) < h) &&
- ((_w + d1 * _ww - p1) >= 0) &&
- ((_w + d1 * _ww - p1 < w))) {
- const IT* in_line = in_batch +
- (_h + d0 * _wh - p0) * w * c + (_w + d1 * _ww - p1) * c;
- const WT* weight_line =
- weight_batch + _wh * ww * wc + _ww * wc;
- for (int _ic = sic; _ic < sic + icpg; ++_ic) {
- float lhs = in_line[_ic] - in_zero_point;
- float rhs = weight_line[_ic - sic] -
- (quantized ? weight_zero_point : 0);
- acc += lhs * rhs;
- }
- }
- }
- }
- }
- if (quantized) {
- float val = bias_scale * acc;
- out_line[_oc] = ::impl::reference::kernels::quantize(
- val, inv_out_scale, out_zero_point);
- } else {
- out_line[_oc] = acc;
- }
- }
- }
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 kernel_channels = weight.size(1);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+ WORD32 dilation_width = dilation[1];
+ WORD32 dilation_height = dilation[0];
+
+ // WORD32* kernel_bias_ptr =
+ // (WORD32*)weight_zero_point.const_data_ptr();
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 kernel_zero_bias = -weight_zero_point;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+ WORD32 kernel_precision = 8;
+ pVOID p_scratch = nullptr;
+ WORD32* ptr_scratch;
+
+ WORD32 scratch_size = 0;
+
+ if (groups == 1) {
+ WORD32 out_data_format = 1;
+
+ WORD8* ptr1 = (WORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((batches * input_channels * input_height * input_width) + 8) *
+ sizeof(WORD8));
+
+ WORD8* ptr2 = (WORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((out_channels * kernel_channels * kernel_height * kernel_width) +
+ 8) *
+ sizeof(WORD8));
+
+ WORD8* pin = (WORD8*)ALIGN_PTR(ptr1, 8);
+ WORD8* pkernel = (WORD8*)ALIGN_PTR(ptr2, 8);
+
+ WORD32 p_inp_shape[kNnlibMaxDim];
+ p_inp_shape[0] = input.size(0);
+ p_inp_shape[1] = input_channels;
+ p_inp_shape[2] = input_height;
+ p_inp_shape[3] = input_width;
+
+ WORD32 p_out_shape[kNnlibMaxDim];
+ p_out_shape[0] = input.size(0);
+ p_out_shape[1] = input_height;
+ p_out_shape[2] = input_width;
+ p_out_shape[3] = input_channels;
+
+ WORD32 p_permute_vec[kNnlibMaxDim] = {0, 2, 3, 1};
+
+ xa_nn_transpose_8_8(
+ pin,
+ p_out_shape,
+ p_inp,
+ p_inp_shape,
+ p_permute_vec,
+ kNnlibMaxDim, // input dimensions
+ kNnlibMaxDim); // output dimensions
+
+ WORD32 p_inp_shape1[kNnlibMaxDim];
+ p_inp_shape1[0] = out_channels;
+ p_inp_shape1[1] = kernel_channels;
+ p_inp_shape1[2] = kernel_height;
+ p_inp_shape1[3] = kernel_width;
+
+ WORD32 p_out_shape1[kNnlibMaxDim];
+ p_out_shape1[0] = out_channels;
+ p_out_shape1[1] = kernel_height;
+ p_out_shape1[2] = kernel_width;
+ p_out_shape1[3] = kernel_channels;
+
+ xa_nn_transpose_8_8(
+ pkernel,
+ p_out_shape1,
+ p_kernel,
+ p_inp_shape1,
+ p_permute_vec,
+ kNnlibMaxDim, // input dimensions
+ kNnlibMaxDim); // output dimensions
+
+ scratch_size = xa_nn_conv2d_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ y_stride,
+ y_padding,
+ x_stride,
+ x_padding,
+ out_height,
+ out_width,
+ out_channels,
+ inp_precision,
+ kernel_precision,
+ out_data_format);
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ ptr_scratch = (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+
+ p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ WORD8* in_batch =
+ pin + _n * input_channels * input_height * input_width;
+ WORD8* out_batch = p_out + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_per_chan_sym8sxasym8s(
+ out_batch,
+ in_batch,
+ pkernel,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ out_channels,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ out_data_format,
+ p_scratch);
+ }
+ return;
+ }
+
+ if (groups == input_channels) {
+ WORD32 channels_multiplier = out_channels / input_channels;
+
+ scratch_size = xa_nn_conv2d_depthwise_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ inp_precision,
+ 1); // NCHW
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ ptr_scratch = (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+
+ p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ WORD8* ptr1 = (WORD8*)kernels::allocate_temp_memory(
+ ctx,
+ ((batches * out_channels * out_height * out_width) + 8) *
+ sizeof(WORD8));
+
+ WORD8* p_out_temp = (WORD8*)ALIGN_PTR(ptr1, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ WORD8* in_batch =
+ p_inp + _n * input_channels * input_height * input_width;
+ WORD8* out_batch =
+ p_out_temp + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_depthwise_per_chan_sym8sxasym8s(
+ out_batch,
+ p_kernel,
+ in_batch,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ 1, // NCHW
+ 0, // NHWC
+ p_scratch);
}
+
+ WORD32 p_inp_shape[kNnlibMaxDim];
+ p_inp_shape[0] = batches;
+ p_inp_shape[1] = out_height;
+ p_inp_shape[2] = out_width;
+ p_inp_shape[3] = out_channels;
+
+ WORD32 p_out_shape[kNnlibMaxDim];
+ p_out_shape[0] = batches;
+ p_out_shape[1] = out_channels;
+ p_out_shape[2] = out_height;
+ p_out_shape[3] = out_width;
+
+ WORD32 p_permute_vec[kNnlibMaxDim] = {0, 3, 1, 2};
+
+ xa_nn_transpose_8_8(
+ p_out,
+ p_out_shape,
+ p_out_temp,
+ p_inp_shape,
+ p_permute_vec,
+ kNnlibMaxDim, // input dimensions
+ kNnlibMaxDim); // output dimensions
+
+ return;
}
}
}
@@ -354,78 +515,7 @@ void quantized_conv_nchw(
#undef typed_quantized_conv2d_nchw
}
-void quantized_conv_nhwc(
- const Tensor& input,
- const Tensor& weight,
- const Tensor& bias,
- IntArrayRef stride,
- IntArrayRef padding,
- IntArrayRef dilation,
- int16_t groups,
- int32_t in_zero_point,
- int32_t weight_zero_point,
- float bias_scale,
- float output_scale,
- int32_t output_zero_point,
- Tensor& out) {
- bool conv1d = input.dim() == 3;
- // input = [n, h, w, c]
- const int n = input.size(0);
- const int h = conv1d ? 1 : input.size(1);
- const int w = conv1d ? input.size(1) : input.size(2);
- const int c = conv1d ? input.size(2) : input.size(3);
- // weight = [oc, wh, ww, wc]
- const int oc = weight.size(0);
- const int wh = conv1d ? 1 : weight.size(1);
- const int ww = conv1d ? weight.size(1) : weight.size(2);
- const int wc = conv1d ? weight.size(2) : weight.size(3);
- // output = [n, oh, ow, oc]
- const int oh = conv1d ? 1 : out.size(1);
- const int ow = conv1d ? out.size(1) : out.size(2);
-
-#define typed_quantized_conv2d_nhwc(ctype, dtype) \
- case ScalarType::dtype: { \
- conv2d_nhwc_core_generic( \
- input.const_data_ptr(), \
- weight.const_data_ptr(), \
- bias.const_data_ptr(), \
- out.mutable_data_ptr(), \
- n, \
- h, \
- w, \
- c, \
- oc, \
- wh, \
- ww, \
- wc, \
- oh, \
- ow, \
- stride[0], \
- stride[1], \
- padding[0], \
- padding[1], \
- dilation[0], \
- dilation[1], \
- groups, \
- in_zero_point, \
- weight_zero_point, \
- bias_scale, \
- output_scale, \
- (ctype)output_zero_point); \
- break; \
- }
- ScalarType dtype = out.scalar_type();
- switch (dtype) {
- ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_conv2d_nhwc);
- default:
- ET_DCHECK_MSG(
- false, "Unhandled dtype %s", torch::executor::toString(dtype));
- }
-
-#undef typed_quantized_conv2d_nhwc
-}
-
-void quantized_conv_out(
+void quantized_conv_nchw_out(
__ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& input,
const Tensor& weight,
@@ -441,13 +531,23 @@ void quantized_conv_out(
int64_t output_zero_point,
__ET_UNUSED const Tensor& out_multiplier,
__ET_UNUSED const Tensor& out_shift,
- bool channel_last,
Tensor& out) {
const float bias_scale_float = bias_scale.const_data_ptr()[0];
const int32_t weight_zero_point_int =
weight_zero_point.const_data_ptr()[0];
- if (channel_last) {
- quantized_conv_nhwc(
+
+ bool optimized = 0;
+
+ if ((input.scalar_type() == ScalarType::Char) ||
+ (input.scalar_type() == ScalarType::Byte))
+ optimized = 1;
+
+ if ((dilation[0] != 1) || (dilation[1] != 1))
+ optimized = 0;
+
+ if (optimized) {
+ xa_opt_quantized_conv_nchw(
+ ctx,
input,
weight,
bias,
@@ -479,7 +579,7 @@ void quantized_conv_out(
}
}
-void quantized_conv_per_tensor_out(
+void quantized_conv_nchw_per_tensor_out(
__ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& input,
const Tensor& weight,
@@ -495,10 +595,19 @@ void quantized_conv_per_tensor_out(
int64_t output_zero_point,
__ET_UNUSED int64_t out_multiplier,
__ET_UNUSED int64_t out_shift,
- bool channel_last,
Tensor& out) {
- if (channel_last) {
- quantized_conv_nhwc(
+ bool optimized = 0;
+
+ if ((input.scalar_type() == ScalarType::Char) ||
+ (input.scalar_type() == ScalarType::Byte))
+ optimized = 1;
+
+ if ((dilation[0] != 1) || (dilation[1] != 1))
+ optimized = 0;
+
+ if (optimized) {
+ xa_opt_quantized_conv_nchw(
+ ctx,
input,
weight,
bias,
@@ -531,5 +640,6 @@ void quantized_conv_per_tensor_out(
}
} // namespace native
-} // namespace reference
+} // namespace HiFi
} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp
new file mode 100644
index 00000000000..9416b8b7fd2
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Optimized NHWC convolution for int8 x int8 -> int8
+void xa_opt_quantized_conv_nhwc_asym8sxsym8s_asym8s(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int16_t groups,
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ constexpr int kNnlibMaxDim = 4;
+
+ WORD8* __restrict__ p_out =
+ (WORD8* __restrict__)out.mutable_data_ptr();
+ WORD8* __restrict__ p_inp =
+ (WORD8* __restrict__)input.const_data_ptr();
+ WORD8* __restrict__ p_kernel =
+ (WORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
+
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 kernel_channels = weight.size(1);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+ WORD32 dilation_width = dilation[1];
+ WORD32 dilation_height = dilation[0];
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 kernel_zero_bias = -weight_zero_point;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+ WORD32 kernel_precision = 8;
+ pVOID p_scratch = nullptr;
+ WORD32* ptr_scratch;
+
+ WORD32 scratch_size = 0;
+
+ if (groups == 1) {
+ WORD32 out_data_format = 1;
+
+ scratch_size = xa_nn_conv2d_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ y_stride,
+ y_padding,
+ x_stride,
+ x_padding,
+ out_height,
+ out_width,
+ out_channels,
+ inp_precision,
+ kernel_precision,
+ out_data_format);
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ ptr_scratch = (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+
+ p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ WORD8* in_batch =
+ p_inp + _n * input_channels * input_height * input_width;
+ WORD8* out_batch = p_out + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_per_chan_sym8sxasym8s(
+ out_batch,
+ in_batch,
+ p_kernel,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ out_channels,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ out_data_format,
+ p_scratch);
+ }
+ return;
+ }
+
+ // Depthwise convolutions are now handled by specialized operators
+ ET_CHECK_MSG(groups == 1, "Only groups=1 supported for regular convolution");
+}
+
+void quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ xa_opt_quantized_conv_nhwc_asym8sxsym8s_asym8s(
+ ctx,
+ input,
+ weight,
+ bias,
+ stride,
+ padding,
+ dilation,
+ groups,
+ in_zero_point,
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ output_zero_point,
+ out);
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp
new file mode 100644
index 00000000000..97f7967a2ba
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Optimized NHWC convolution for uint8 x uint8 -> uint8
+void xa_opt_quantized_conv_nhwc_asym8uxsym8u_asym8u(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int16_t groups,
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+ constexpr int kNnlibMaxDim = 4;
+
+ UWORD8* __restrict__ p_out =
+ (UWORD8* __restrict__)out.mutable_data_ptr();
+ UWORD8* __restrict__ p_inp =
+ (UWORD8* __restrict__)input.const_data_ptr();
+ UWORD8* __restrict__ p_kernel =
+ (UWORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
+
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 kernel_channels = weight.size(1);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+ WORD32 dilation_width = dilation[1];
+ WORD32 dilation_height = dilation[0];
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 kernel_zero_bias = -weight_zero_point;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+ WORD32 kernel_precision = 8;
+ pVOID p_scratch = nullptr;
+ WORD32* ptr_scratch;
+
+ WORD32 scratch_size = 0;
+
+ if (groups == 1) {
+ WORD32 out_data_format = 1;
+
+ scratch_size = xa_nn_conv2d_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ y_stride,
+ y_padding,
+ x_stride,
+ x_padding,
+ out_height,
+ out_width,
+ out_channels,
+ inp_precision,
+ kernel_precision,
+ out_data_format);
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ ptr_scratch = (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+
+ p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ UWORD8* in_batch =
+ p_inp + _n * input_channels * input_height * input_width;
+ UWORD8* out_batch = p_out + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_per_chan_sym8sxasym8s(
+ (WORD8*)out_batch,
+ (WORD8*)in_batch,
+ (WORD8*)p_kernel,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ kernel_channels,
+ dilation_height,
+ dilation_width,
+ out_channels,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ out_data_format,
+ p_scratch);
+ }
+ return;
+ }
+
+ // Depthwise convolutions are now handled by specialized operators
+ ET_CHECK_MSG(groups == 1, "Only groups=1 supported for regular convolution");
+}
+
+void quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ xa_opt_quantized_conv_nhwc_asym8uxsym8u_asym8u(
+ ctx,
+ input,
+ weight,
+ bias,
+ stride,
+ padding,
+ dilation,
+ groups,
+ in_zero_point,
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ output_zero_point,
+ out);
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp
new file mode 100644
index 00000000000..6512622f221
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Specialized depthwise NHWC convolution for int8 x int8 -> int8
+void xa_opt_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int16_t groups,
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+
+ WORD8* __restrict__ p_out =
+ (WORD8* __restrict__)out.mutable_data_ptr();
+ WORD8* __restrict__ p_inp =
+ (WORD8* __restrict__)input.const_data_ptr();
+ WORD8* __restrict__ p_kernel =
+ (WORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
+
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+
+ WORD32 channels_multiplier = out_channels / input_channels;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 scratch_size = xa_nn_conv2d_depthwise_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ inp_precision,
+ 0); // NHWC
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ WORD32* ptr_scratch =
+ (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+ pVOID p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ WORD8* in_batch = p_inp + _n * input_channels * input_height * input_width;
+ WORD8* out_batch = p_out + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_depthwise_per_chan_sym8sxasym8s(
+ out_batch,
+ p_kernel,
+ in_batch,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ 0, // NHWC
+ 0, // NHWC
+ p_scratch);
+ }
+}
+
+void quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ xa_opt_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s(
+ ctx,
+ input,
+ weight,
+ bias,
+ stride,
+ padding,
+ dilation,
+ groups,
+ in_zero_point,
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ output_zero_point,
+ out);
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp
new file mode 100644
index 00000000000..d41a9c8d4b7
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Specialized depthwise NHWC convolution for uint8 x uint8 -> uint8
+void xa_opt_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u(
+ KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int16_t groups,
+ int32_t in_zero_point,
+ int32_t weight_zero_point,
+ float bias_scale,
+ float output_scale,
+ int32_t output_zero_point,
+ Tensor& out) {
+ bool conv1d = input.dim() == 3;
+
+ UWORD8* __restrict__ p_out =
+ (UWORD8* __restrict__)out.mutable_data_ptr();
+ UWORD8* __restrict__ p_inp =
+ (UWORD8* __restrict__)input.const_data_ptr();
+ UWORD8* __restrict__ p_kernel =
+ (UWORD8* __restrict__)weight.const_data_ptr();
+ WORD32* __restrict__ p_bias =
+ (WORD32* __restrict__)bias.const_data_ptr();
+
+ WORD32 input_height = conv1d ? 1 : input.size(2);
+ WORD32 input_width = conv1d ? input.size(2) : input.size(3);
+ WORD32 input_channels = input.size(1);
+ WORD32 kernel_height = conv1d ? 1 : weight.size(2);
+ WORD32 kernel_width = conv1d ? weight.size(2) : weight.size(3);
+ WORD32 out_channels = weight.size(0);
+ WORD32 out_height = conv1d ? 1 : out.size(2);
+ WORD32 out_width = conv1d ? out.size(2) : out.size(3);
+ WORD32 batches = input.size(0);
+
+ WORD32 x_stride = stride[1];
+ WORD32 y_stride = stride[0];
+ WORD32 x_padding = padding[1];
+ WORD32 y_padding = padding[0];
+
+ WORD32 input_zero_bias = -in_zero_point;
+ WORD32 out_zero_bias = output_zero_point;
+ WORD32 inp_precision = 8;
+
+ WORD32 channels_multiplier = out_channels / input_channels;
+
+ WORD32 out_multiplier32[out_channels];
+ WORD32 out_shift32[out_channels];
+
+ float out_scale = 1. / output_scale;
+
+ for (int i = 0; i < out_channels; i++) {
+ out_multiplier32[i] = bias_scale * out_scale * 2147483648;
+ out_shift32[i] = 0;
+ }
+
+ WORD32 scratch_size = xa_nn_conv2d_depthwise_getsize(
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ inp_precision,
+ 0); // NHWC
+
+ scratch_size = scratch_size < 0 ? 0 : scratch_size;
+
+ WORD32* ptr_scratch =
+ (WORD32*)kernels::allocate_temp_memory(ctx, scratch_size);
+ pVOID p_scratch = (pVOID)ALIGN_PTR(ptr_scratch, 8);
+
+ for (int _n = 0; _n < batches; _n++) {
+ UWORD8* in_batch = p_inp + _n * input_channels * input_height * input_width;
+ UWORD8* out_batch = p_out + _n * out_channels * out_height * out_width;
+
+ xa_nn_conv2d_depthwise_per_chan_sym8sxasym8s(
+ (WORD8*)out_batch,
+ (WORD8*)p_kernel,
+ (WORD8*)in_batch,
+ p_bias,
+ input_height,
+ input_width,
+ input_channels,
+ kernel_height,
+ kernel_width,
+ channels_multiplier,
+ x_stride,
+ y_stride,
+ x_padding,
+ y_padding,
+ out_height,
+ out_width,
+ input_zero_bias,
+ out_multiplier32,
+ out_shift32,
+ out_zero_bias,
+ 0, // NHWC
+ 0, // NHWC
+ p_scratch);
+ }
+}
+
+void quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out(
+ __ET_UNUSED KernelRuntimeContext& ctx,
+ const Tensor& input,
+ const Tensor& weight,
+ const Tensor& bias,
+ IntArrayRef stride,
+ IntArrayRef padding,
+ IntArrayRef dilation,
+ int64_t groups,
+ int64_t in_zero_point,
+ int64_t weight_zero_point,
+ double bias_scale,
+ double output_scale,
+ int64_t output_zero_point,
+ __ET_UNUSED int64_t out_multiplier,
+ __ET_UNUSED int64_t out_shift,
+ Tensor& out) {
+ xa_opt_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u(
+ ctx,
+ input,
+ weight,
+ bias,
+ stride,
+ padding,
+ dilation,
+ groups,
+ in_zero_point,
+ weight_zero_point,
+ bias_scale,
+ output_scale,
+ output_zero_point,
+ out);
+}
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp
new file mode 100644
index 00000000000..be661334acf
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+#include
+
+using Tensor = executorch::aten::Tensor;
+using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
+using ScalarType = executorch::aten::ScalarType;
+using ::executorch::aten::IntArrayRef;
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Dilated fallback implementation for int8 x int8 -> int8 quantized 2d conv
+// kernel for NHWC layout. This variant is optimized for asymmetric int8 inputs,
+// weights, and outputs. The input is of shape [n x h x w x c] The weight is of
+// shape [oc x wh x ww x wc] The output is of shape [n x oh x ow x oc] The bias
+// is of shape [oc]
+template
+__attribute__((noinline)) void conv2d_nhwc_dilated_asym8sxsym8s_asym8s_core(
+ // All the arrays
+ const int8_t* __restrict__ p_in,
+ const int8_t* __restrict__ p_weight,
+ const int32_t* __restrict__ p_bias,
+ int8_t* __restrict__ p_out,
+ // The array sizes
+ int32_t n,
+ int32_t h,
+ int32_t w,
+ int32_t c,
+ int32_t oc,
+ int32_t wh,
+ int32_t ww,
+ int32_t wc,
+ int32_t oh,
+ int32_t ow,
+ // Stride
+ int16_t s0,
+ int16_t s1,
+ // Padding
+ int16_t p0,
+ int16_t p1,
+ // Dilation
+ int16_t d0,
+ int16_t d1,
+ // Group for depthwise conv
+ int16_t groups,
+ // Quantization parameters
+ int8_t in_zero_point = 0,
+ int32_t weight_zero_point = 0,
+ float bias_scale = 1,
+ float out_scale = 1,
+ int8_t out_zero_point = 0) {
+ float inv_out_scale = 1. / out_scale;
+
+ // Compute the number of in and out channels per group
+ const int ocpg = oc / groups;
+ const int icpg = c / groups;
+
+ // Iterate over all the output batches (i.e., n)
+ for (int _n = 0; _n < n; ++_n) {
+ const int8_t* in_batch = p_in + _n * h * w * c;
+ int8_t* out_batch = p_out + _n * oh * ow * oc;
+ for (int _h = 0, _oh = 0; _oh < oh; _h += s0, ++_oh) {
+ for (int _w = 0, _ow = 0; _ow < ow; _w += s1, ++_ow) {
+ int8_t* out_line = out_batch + (_oh * ow + _ow) * oc;
+ // Compute separable convolution for each group
+ for (int _g = 0; _g < groups; ++_g) {
+ // Identify the input and output channels involved in the computation
+ // of this group
+ int sic = _g * icpg;
+ int soc = _g * ocpg;
+ // Populate all the output channels in the group
+ for (int _oc = soc; _oc < soc + ocpg; ++_oc) {
+ const int8_t* weight_batch = p_weight + _oc * wh * ww * wc;
+ // We compute one output channel at a time. The computation can be
+ // thought of as a stencil computation: we iterate over an input of
+ // size h x w x icpg, with a stencil of size wh x ww x icpg, to
+ // compute an output channel of size oh x ow x 1.
+ float acc = p_bias[_oc];
+ // Below is the stencil computation that performs the hadamard
+ // product+accumulation of each input channel (contributing to
+ // the output channel being computed) with the corresponding
+ // weight channel.
+ // General path for dilated convolutions with padding support
+ for (int _wh = 0; _wh < wh; ++_wh) {
+ for (int _ww = 0; _ww < ww; ++_ww) {
+ int input_h = _h + d0 * _wh - p0;
+ int input_w = _w + d1 * _ww - p1;
+ if ((input_h >= 0) && (input_h < h) && (input_w >= 0) &&
+ (input_w < w)) {
+ const int8_t* in_line =
+ in_batch + input_h * w * c + input_w * c;
+ const int8_t* weight_line =
+ weight_batch + _wh * ww * wc + _ww * wc;
+ for (int _ic = sic; _ic < sic + icpg; ++_ic) {
+ float lhs = static_cast(in_line[_ic]) -
+ static_cast(in_zero_point);
+ float rhs = static_cast(weight_line[_ic - sic]) -
+ static_cast