pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 35 additions & 10 deletions b/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 35 additions & 10 deletions
diff --git a/‎backends/arm/scripts/build_executorch.sh‎
Lines changed: 1 addition & 40 deletions b/‎backends/arm/scripts/build_executorch.sh‎
Lines changed: 1 addition & 40 deletions
diff --git a/‎backends/mediatek/partitioner.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/mediatek/partitioner.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/mediatek/scripts/mtk_build.sh‎
Lines changed: 1 addition & 0 deletions b/‎backends/mediatek/scripts/mtk_build.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm‎
Lines changed: 2 additions & 1 deletion b/‎examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/mediatek/aot_utils/oss_utils/utils.py‎
Lines changed: 5 additions & 5 deletions b/‎examples/mediatek/aot_utils/oss_utils/utils.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎examples/mediatek/model_export_scripts/dcgan.py‎
Lines changed: 96 additions & 0 deletions b/‎examples/mediatek/model_export_scripts/dcgan.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎examples/mediatek/model_export_scripts/dcgan_main.py‎
Lines changed: 72 additions & 0 deletions b/‎examples/mediatek/model_export_scripts/dcgan_main.py‎
Lines changed: 72 additions & 0 deletions
@@ -1 +1 @@
-01f1cc44cbbfdf6307aa01b803a4ee22f9ade946
+5616fa4a68718ead203314a3467f7dd9547153ae
@@ -95,7 +95,10 @@ def _is_ok_for_quantization(
             continue
 
         for n_arg in _as_list(node.args[quant_property.index]):
-            assert isinstance(n_arg, Node)
+            if not isinstance(n_arg, Node):
+                raise TypeError(
+                    f"n_arg must be a Node instance, got {type(n_arg).__name__!r}"
+                )
             if not is_ok_for_quantization(n_arg, gm):  # type: ignore[attr-defined]
                 logger.debug(
                     f'could not quantize node due to input "{node}": '
@@ -108,7 +111,10 @@ def _is_ok_for_quantization(
 
 
 def _annotate_input(node: Node, quant_property: _QuantProperty):
-    assert not is_annotated(node)
+    if is_annotated(node):
+        raise RuntimeError(
+            f"Cannot annotate input: node '{node.name}' is already annotated"
+        )
     if quant_property.optional and (
         quant_property.index >= len(node.args)
         or node.args[quant_property.index] is None
@@ -120,17 +126,28 @@ def _annotate_input(node: Node, quant_property: _QuantProperty):
         _as_list(quant_property.qspec),
         strict=True,
     ):
-        assert isinstance(n_arg, Node)
+        if not isinstance(n_arg, Node):
+            raise TypeError(
+                f"n_arg must be a Node instance, got {type(n_arg).__name__!r}"
+            )
         annotate_input_qspec_map(node, n_arg, qspec)
         if quant_property.mark_annotated:
             mark_node_as_annotated(n_arg)  # type: ignore[attr-defined]
 
 
 def _annotate_output(node: Node, quant_property: _QuantProperty):
-    assert not is_annotated(node)
-    assert not quant_property.mark_annotated
-    assert not quant_property.optional
-    assert quant_property.index == 0, "Only one output annotation supported currently"
+    if is_annotated(node):
+        raise RuntimeError(
+            f"Cannot annotate output: node '{node.name}' is already annotated"
+        )
+    if quant_property.mark_annotated:
+        raise ValueError(
+            "quant_property.mark_annotated must be False for output annotation"
+        )
+    if quant_property.optional:
+        raise ValueError("quant_property.optional must be False for output annotation")
+    if quant_property.index != 0:
+        raise ValueError("Only one output annotation supported currently")
 
     annotate_output_qspec(node, quant_property.qspec)
 
@@ -145,7 +162,9 @@ def _match_pattern(
 
     Each 'pattern' element is composed of a list of disjunctive nodes types.
     """
-    assert len(pattern) > 0, "No pattern provided"
+    if len(pattern) < 1:
+        raise ValueError("No pattern provided")
+
     if filter_fn is not None:
         if not filter_fn(node):
             return False
@@ -417,8 +436,14 @@ def any_or_hardtanh_min_zero(n: Node):
         torch.ops.aten.concatenate.default,
         torch.ops.aten.stack.default,
     ):
-        assert isinstance(node.args[0], list)
-        assert len(node.args[0]) != 0
+        # first argument should be a non-empty list of nodes
+        if not isinstance(node.args[0], list):
+            raise TypeError(
+                "Expected node.args[0] to be a list, got "
+                f"{type(node.args[0]).__name__!r}"
+            )
+        if len(node.args[0]) == 0:
+            raise ValueError("Expected non-empty list for node.args[0]")
 
         shared_qspec = SharedQuantizationSpec((node.args[0][0], node))
         quant_properties.quant_inputs = [
 
@@ -54,47 +54,9 @@ source ${setup_path_script}
 
 et_build_dir="${et_build_root}/cmake-out"
 
-# Used for flatcc host excutable if Devtools is used
-et_build_host_dir=${et_build_root}/cmake-out-host-tools
-
 set -x
 cd "${et_root_dir}"
 
-if [ "$build_with_etdump" = true ] ; then
-    ( set +x ;
-        echo "--------------------------------------------------------------------------------" ;
-        echo "Build ExecuTorch Libraries host flatcc bin ${build_type} into ${et_build_host_dir}/bin/flatcc" ;
-        echo "--------------------------------------------------------------------------------" )
-
-    # Build host flatcc bin
-    # This is a way to work around that the flatcc executable get build for target (e.g. Arm) later
-    # and get replaced. flatcc is a tool used on the host for etdump and BundleIO handling.
-    # The way to solve this is to generate it once for the host, then copy it to ${et_build_host_dir}/bin
-    # and later point that out with -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc later.
-
-    cmake                                                 \
-        -DCMAKE_INSTALL_PREFIX=${et_build_host_dir}       \
-        -DCMAKE_BUILD_TYPE=${build_type}                  \
-        -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF            \
-        -DEXECUTORCH_ENABLE_LOGGING=ON                    \
-        -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON               \
-        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON           \
-        -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON       \
-        -DEXECUTORCH_BUILD_DEVTOOLS=ON                    \
-        -DEXECUTORCH_ENABLE_EVENT_TRACER=ON               \
-        -DFLATCC_ALLOW_WERROR=OFF                         \
-        -B"${et_build_host_dir}"                          \
-        "${et_root_dir}"
-
-    # third-party/flatcc/bin/flatcc gets build already in the in the cmake config step above
-    # so there is no cmake building step done
-
-    # Copy host flatcc excutable so it's saved when we build for target (Arm) later
-    et_build_host_dir=$(realpath ${et_build_host_dir})
-    mkdir -p ${et_build_host_dir}/bin
-    cp third-party/flatcc/bin/flatcc ${et_build_host_dir}/bin
-fi
-
 ( set +x ;
     echo "--------------------------------------------------------------------------------" ;
     echo "Build ExecuTorch target libs ${build_type} into '${et_build_dir}'" ;
@@ -111,8 +73,7 @@ if [ "$build_with_etdump" = true ] ; then
     build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON                    \
                             -DEXECUTORCH_ENABLE_EVENT_TRACER=ON               \
                             -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF      \
-                            -DFLATCC_ALLOW_WERROR=OFF                         \
-                            -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc "
+                            -DFLATCC_ALLOW_WERROR=OFF "
 fi
 
 echo "Building with Devtools: ${build_devtools_flags} ${build_with_etdump_flags}"
 
@@ -81,6 +81,7 @@ def ops_to_not_decompose(
             torch.ops.aten.upsample_bilinear2d.vec,
             torch.ops.aten.upsample_nearest2d.default,
             torch.ops.aten.upsample_nearest2d.vec,
+            torch.ops.aten._safe_softmax.default,
         ]
         return (ops_not_decompose, None)
 
 
@@ -33,6 +33,7 @@ rm -rf cmake-android-out && mkdir cmake-android-out && cd cmake-android-out
 cmake -DBUCK2="$BUCK_PATH" \
       -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
       -DANDROID_ABI=arm64-v8a \
+      -DANDROID_PLATFORM=android-26 \
       -DEXECUTORCH_BUILD_NEURON=ON \
       -DNEURON_BUFFER_ALLOCATOR_LIB="$NEURON_BUFFER_ALLOCATOR_LIB" \
       ..
 
@@ -14,6 +14,7 @@
 
 using executorch::extension::llm::GenerationConfig;
 using executorch::extension::llm::Image;
+using executorch::extension::llm::TextLLMRunner;
 using executorch::runtime::Error;
 
 NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
@@ -23,7 +24,7 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
 @end
 
 @implementation LLaMARunner {
-  std::unique_ptr<example::Runner> _runner;
+  std::unique_ptr<TextLLMRunner> _runner;
 }
 
 - (instancetype)initWithModelPath:(NSString*)modelPath
 
@@ -24,6 +24,8 @@ def build_executorch_binary(
     file_name,
     dataset,
     quant_dtype: Optional[Precision] = None,
+    skip_op_name: Optional[set] = None,
+    skip_op_type: Optional[set] = None,
 ):
     if quant_dtype is not None:
         quantizer = NeuropilotQuantizer()
@@ -47,14 +49,12 @@ def build_executorch_binary(
     from executorch.exir.program._program import to_edge_transform_and_lower
 
     edge_compile_config = exir.EdgeCompileConfig(_check_ir_validity=False)
-    # skipped op names are used for deeplabV3 model
     neuro_partitioner = NeuropilotPartitioner(
         [CompileSpec("platform-config", b"mt6989")],
-        op_names_to_skip={
-            "aten_convolution_default_106",
-            "aten_convolution_default_107",
-        },
+        op_types_to_skip=skip_op_type,
+        op_names_to_skip=skip_op_name,
     )
+
     edge_prog = to_edge_transform_and_lower(
         aten_dialect,
         compile_config=edge_compile_config,
 
@@ -0,0 +1,96 @@
+# Copyright (c) MediaTek Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import sys
+
+if os.getcwd() not in sys.path:
+    sys.path.append(os.getcwd())
+import argparse
+import os
+
+import dcgan_main
+
+import torch
+from aot_utils.oss_utils.utils import build_executorch_binary
+from executorch.backends.mediatek import Precision
+
+
+class NhwcWrappedModel(torch.nn.Module):
+    def __init__(self, is_gen=True):
+        super(NhwcWrappedModel, self).__init__()
+        if is_gen:
+            self.dcgan = dcgan_main.Generator()
+        else:
+            self.dcgan = dcgan_main.Discriminator()
+
+    def forward(self, input1):
+        nchw_input1 = input1.permute(0, 3, 1, 2)
+        output = self.dcgan(nchw_input1)
+        return output
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "-a",
+        "--artifact",
+        help="path for storing generated artifacts by this example. " "Default ./dcgan",
+        default="./dcgan",
+        type=str,
+    )
+
+    args = parser.parse_args()
+
+    # ensure the working directory exist.
+    os.makedirs(args.artifact, exist_ok=True)
+
+    # prepare dummy data
+    inputG = torch.randn(1, 1, 1, 100)
+    inputD = torch.randn(1, 64, 64, 3)
+
+    # build Generator
+    netG_instance = NhwcWrappedModel(True)
+    netG_pte_filename = "dcgan_netG_mtk"
+    build_executorch_binary(
+        netG_instance.eval(),
+        (torch.randn(1, 1, 1, 100),),
+        f"{args.artifact}/{netG_pte_filename}",
+        [(inputG,)],
+        quant_dtype=Precision.A8W8,
+    )
+
+    # build Discriminator
+    netD_instance = NhwcWrappedModel(False)
+    netD_pte_filename = "dcgan_netD_mtk"
+    build_executorch_binary(
+        netD_instance.eval(),
+        (torch.randn(1, 64, 64, 3),),
+        f"{args.artifact}/{netD_pte_filename}",
+        [(inputD,)],
+        quant_dtype=Precision.A8W8,
+    )
+
+    # save data to inference on device
+    input_list_file = f"{args.artifact}/input_list_G.txt"
+    with open(input_list_file, "w") as f:
+        f.write("inputG_0_0.bin")
+        f.flush()
+    file_name = f"{args.artifact}/inputG_0_0.bin"
+    inputG.detach().numpy().tofile(file_name)
+    file_name = f"{args.artifact}/goldenG_0_0.bin"
+    goldenG = netG_instance(inputG)
+    goldenG.detach().numpy().tofile(file_name)
+
+    input_list_file = f"{args.artifact}/input_list_D.txt"
+    with open(input_list_file, "w") as f:
+        f.write("inputD_0_0.bin")
+        f.flush()
+    file_name = f"{args.artifact}/inputD_0_0.bin"
+    inputD.detach().numpy().tofile(file_name)
+    file_name = f"{args.artifact}/goldenD_0_0.bin"
+    goldenD = netD_instance(inputD)
+    goldenD.detach().numpy().tofile(file_name)
@@ -0,0 +1,72 @@
+"""Ref https://github.com/pytorch/examples/blob/main/dcgan/main.py"""
+
+import torch.nn as nn
+
+
+class Generator(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.main = nn.Sequential(
+            # input is Z, going into a convolution
+            nn.ConvTranspose2d(100, 64 * 8, 4, 1, 0, bias=False),
+            nn.BatchNorm2d(64 * 8),
+            nn.ReLU(True),
+            # state size. (64*8) x 4 x 4
+            nn.ConvTranspose2d(64 * 8, 64 * 4, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(64 * 4),
+            nn.ReLU(True),
+            # state size. (64*4) x 8 x 8
+            nn.ConvTranspose2d(64 * 4, 64 * 2, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(64 * 2),
+            nn.ReLU(True),
+            # state size. (64*2) x 16 x 16
+            nn.ConvTranspose2d(64 * 2, 64, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(64),
+            nn.ReLU(True),
+            # state size. (64) x 32 x 32
+            nn.ConvTranspose2d(64, 3, 4, 2, 1, bias=False),
+            nn.Tanh(),
+            # state size. (3) x 64 x 64
+        )
+
+    def forward(self, input):
+        output = self.main(input)
+        return output
+
+
+# main_netG_input_shape = [1, 100, 1, 1]
+# model = Generator()
+
+
+class Discriminator(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.main = nn.Sequential(
+            # input is (3) x 64 x 64
+            nn.Conv2d(3, 64, 4, 2, 1, bias=False),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (64) x 32 x 32
+            nn.Conv2d(64, 64 * 2, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(64 * 2),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (64*2) x 16 x 16
+            nn.Conv2d(64 * 2, 64 * 4, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(64 * 4),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (64*4) x 8 x 8
+            nn.Conv2d(64 * 4, 64 * 8, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(64 * 8),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (64*8) x 4 x 4
+            nn.Conv2d(64 * 8, 1, 4, 1, 0, bias=False),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, input):
+        output = self.main(input)
+
+        return output.view(-1, 1).squeeze(1)
+
+
+# main_netD_input_shape = [1, 3, 64, 64]
+# model = Discriminator()
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-01f1cc44cbbfdf6307aa01b803a4ee22f9ade946`
	`1`	`+5616fa4a68718ead203314a3467f7dd9547153ae`
Original file line number	Diff line number	Diff line change
`@@ -81,6 +81,7 @@ def ops_to_not_decompose(`
`81`	`81`	`torch.ops.aten.upsample_bilinear2d.vec,`
`82`	`82`	`torch.ops.aten.upsample_nearest2d.default,`
`83`	`83`	`torch.ops.aten.upsample_nearest2d.vec,`
	`84`	`+ torch.ops.aten._safe_softmax.default,`
`84`	`85`	`]`
`85`	`86`	`return (ops_not_decompose, None)`
`86`	`87`