pytorch · cccclai · Jul 4, 2025 · Jun 11, 2025
@@ -59,6 +59,7 @@ class TensorOpInfo:
 
 SKIP_LIFT_OPS = {
     aten.full_like.default,
+    aten.full.default,
     aten.arange.start_step,
     aten.arange.default,
     aten.scalar_tensor.default,

@@ -88,11 +88,15 @@ def define_node(
 
         # Need to reconstruct the index tensor.
         # E.g., based on ScatterND Op Def in QNN Docs.
-        # Given that
-        #   shape of input: [1, 12, 1024, 64]
-        #   indicies_node: [None, None, aten__to_copy_default_1]
-        #   shape of aten__to_copy_default_1: [1]
-        # The shape of index tensor should be [1, 12, 1, 3]
+        # Torch:
+        #   Given that
+        #     shape of input: [1, 12, 1024, 64]
+        #     indicies_node: [None, None, aten__to_copy_default_1]
+        #     shape of aten__to_copy_default_1: [1]
+        # QNN:
+        #   Index tensor:
+        #     Shape: [1, 12, 1, 3]
+        #     Value: [[[0,0,x]],[[0,1,x]],...,[[0,11,x]]]
         # The index tensor is treated as 4-dimensional tensor of 3-tuples,
         # where each 3-tuple is a partial-index into input
         # Reference code for QNN ScatterNd:

@@ -4,7 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import warnings
 from typing import Dict
 
 import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
@@ -70,13 +69,6 @@ def define_node(
         if len(node.args) >= 3:
             bias_node = self.get_node(node.args[2])
 
-            # TODO remove this when qnn sdk support
-            if QCOM_SCALES in bias_node.meta.get(QCOM_QUANT_ATTRS, {}):
-                warnings.warn(
-                    f"[QNN Delegate Op Builder]: Fallback linear bias, {bias_node}. per channel bias quantization is not support yet.",
-                    stacklevel=1,
-                )
-
             bias_tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC
             bias_tensor = get_parameter(bias_node, self.edge_program)
             # if bias_node is getitem

@@ -910,9 +910,10 @@ def forward(self, x):
 
 
 class IndexCopy(torch.nn.Module):
-    def __init__(self, skip_mutable_buffer=False):
+    def __init__(self, copy_dim=1, skip_mutable_buffer=False):
         super().__init__()
         self.skip_mutable_buffer = skip_mutable_buffer
+        self.copy_dim = copy_dim
         self.register_buffer(
             "k_cache",
             torch.zeros((1, 1024, 12, 64), dtype=torch.float32),
@@ -921,7 +922,7 @@ def __init__(self, skip_mutable_buffer=False):
 
     def forward(self, input_pos, k_val):
         k_out = self.k_cache
-        k_out.index_copy_(1, input_pos, k_val)
+        k_out.index_copy_(self.copy_dim, input_pos, k_val)
         return k_out + 0
 
 

@@ -622,19 +622,59 @@ def test_qnn_backend_index(self):
     def test_qnn_backend_index_copy(self):
         test_comb = [
             {
-                QCOM_MODULE: IndexCopy(skip_mutable_buffer=False),  # noqa: F405
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=1, skip_mutable_buffer=False
+                ),
                 QCOM_SAMPLE_INPUTS: (
                     torch.tensor([2], dtype=torch.int64),
                     torch.randn([1, 1, 12, 64]),
                 ),
             },
             {
-                QCOM_MODULE: IndexCopy(skip_mutable_buffer=True),  # noqa: F405
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=2, skip_mutable_buffer=False
+                ),
+                QCOM_SAMPLE_INPUTS: (
+                    torch.tensor([2], dtype=torch.int64),
+                    torch.randn([1, 1024, 1, 64]),
+                ),
+            },
+            {
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=2, skip_mutable_buffer=False
+                ),
+                QCOM_SAMPLE_INPUTS: (
+                    torch.tensor([2, 5], dtype=torch.int64),
+                    torch.randn([1, 1024, 2, 64]),
+                ),
+            },
+            {
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=1, skip_mutable_buffer=True
+                ),
                 QCOM_SAMPLE_INPUTS: (
                     torch.tensor([2], dtype=torch.int64),
                     torch.randn([1, 1, 12, 64]),
                 ),
             },
+            {
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=2, skip_mutable_buffer=True
+                ),
+                QCOM_SAMPLE_INPUTS: (
+                    torch.tensor([2], dtype=torch.int64),
+                    torch.randn([1, 1024, 1, 64]),
+                ),
+            },
+            {
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=2, skip_mutable_buffer=True
+                ),
+                QCOM_SAMPLE_INPUTS: (
+                    torch.tensor([2, 5], dtype=torch.int64),
+                    torch.randn([1, 1024, 2, 64]),
+                ),
+            },
         ]
         for i, test in enumerate(test_comb):
             with self.subTest(i=i):
@@ -1907,19 +1947,59 @@ def test_qnn_backend_index(self):
     def test_qnn_backend_index_copy(self):
         test_comb = [
             {
-                QCOM_MODULE: IndexCopy(skip_mutable_buffer=False),  # noqa: F405
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=1, skip_mutable_buffer=False
+                ),
                 QCOM_SAMPLE_INPUTS: (
                     torch.tensor([2], dtype=torch.int64),
                     torch.randn([1, 1, 12, 64]),
                 ),
             },
             {
-                QCOM_MODULE: IndexCopy(skip_mutable_buffer=True),  # noqa: F405
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=2, skip_mutable_buffer=False
+                ),
+                QCOM_SAMPLE_INPUTS: (
+                    torch.tensor([2], dtype=torch.int64),
+                    torch.randn([1, 1024, 1, 64]),
+                ),
+            },
+            {
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=2, skip_mutable_buffer=False
+                ),
+                QCOM_SAMPLE_INPUTS: (
+                    torch.tensor([2, 5], dtype=torch.int64),
+                    torch.randn([1, 1024, 2, 64]),
+                ),
+            },
+            {
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=1, skip_mutable_buffer=True
+                ),
                 QCOM_SAMPLE_INPUTS: (
                     torch.tensor([2], dtype=torch.int64),
                     torch.randn([1, 1, 12, 64]),
                 ),
             },
+            {
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=2, skip_mutable_buffer=True
+                ),
+                QCOM_SAMPLE_INPUTS: (
+                    torch.tensor([2], dtype=torch.int64),
+                    torch.randn([1, 1024, 1, 64]),
+                ),
+            },
+            {
+                QCOM_MODULE: IndexCopy(  # noqa: F405
+                    copy_dim=2, skip_mutable_buffer=True
+                ),
+                QCOM_SAMPLE_INPUTS: (
+                    torch.tensor([2, 5], dtype=torch.int64),
+                    torch.randn([1, 1024, 2, 64]),
+                ),
+            },
         ]
         for i, test in enumerate(test_comb):
             with self.subTest(i=i):
@@ -4909,6 +4989,39 @@ def test_swin_transformer(self):
                 self.assertGreaterEqual(msg["top_1"], 60)
                 self.assertGreaterEqual(msg["top_5"], 80)
 
+    def test_whisper(self):
+        if not self.required_envs():
+            self.skipTest("missing required envs")
+
+        cmds = [
+            "python",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/whisper/whisper.py",
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
+            self.build_folder,
+            "--device",
+            self.device,
+            "--model",
+            self.model,
+            "--ip",
+            self.ip,
+            "--port",
+            str(self.port),
+        ]
+        if self.host:
+            cmds.extend(["--host", self.host])
+
+        p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
+        with Listener((self.ip, self.port)) as listener:
+            conn = listener.accept()
+            p.communicate()
+            msg = json.loads(conn.recv())
+            if "Error" in msg:
+                self.fail(msg["Error"])
+            else:
+                self.assertLessEqual(msg["wer"], 0.25)
+
 
 class TestExampleQaihubScript(TestQNN):
     def test_utils_export(self):

@@ -90,6 +90,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/llama)
 # build qnn_mimi_decoder_runner
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/moshi)
 
+# build qnn_whisper_runner for whisper
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/whisper)
+
 # build qaihub_llama2_7b_runner and qaihub_llama3_8b_runner
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/qaihub_scripts/llama)
 

@@ -0,0 +1,46 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# preprocess qnn runner src files for whisper
+set(_qnn_whisper_runner__srcs
+    ${CMAKE_CURRENT_LIST_DIR}/qnn_whisper_runner.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/runner/decoder.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/runner/decoder.h
+    ${CMAKE_CURRENT_LIST_DIR}/runner/encoder.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/runner/encoder.h
+    ${CMAKE_CURRENT_LIST_DIR}/runner/runner.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/runner/runner.h
+    ${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp
+)
+
+# build qnn whisper runner
+add_executable(qnn_whisper_runner ${_qnn_whisper_runner__srcs})
+target_include_directories(
+    qnn_whisper_runner PUBLIC ${_common_include_directories}
+    ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
+)
+
+
+target_link_libraries(
+  qnn_whisper_runner
+  qnn_executorch_backend
+  executorch_core
+  extension_data_loader
+  extension_flat_tensor
+  extension_module
+  extension_tensor
+  full_portable_ops_lib
+  gflags
+  tokenizers
+)
+
+target_compile_options(
+    qnn_whisper_runner PUBLIC ${_common_compile_options}
+)
+set_target_properties(
+    qnn_whisper_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
+)