Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class TensorOpInfo:

SKIP_LIFT_OPS = {
aten.full_like.default,
aten.full.default,
aten.arange.start_step,
aten.arange.default,
aten.scalar_tensor.default,
Expand Down
14 changes: 9 additions & 5 deletions backends/qualcomm/builders/op_index_put.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,15 @@ def define_node(

# Need to reconstruct the index tensor.
# E.g., based on ScatterND Op Def in QNN Docs.
# Given that
# shape of input: [1, 12, 1024, 64]
# indicies_node: [None, None, aten__to_copy_default_1]
# shape of aten__to_copy_default_1: [1]
# The shape of index tensor should be [1, 12, 1, 3]
# Torch:
# Given that
# shape of input: [1, 12, 1024, 64]
# indicies_node: [None, None, aten__to_copy_default_1]
# shape of aten__to_copy_default_1: [1]
# QNN:
# Index tensor:
# Shape: [1, 12, 1, 3]
# Value: [[[0,0,x]],[[0,1,x]],...,[[0,11,x]]]
# The index tensor is treated as 4-dimensional tensor of 3-tuples,
# where each 3-tuple is a partial-index into input
# Reference code for QNN ScatterNd:
Expand Down
8 changes: 0 additions & 8 deletions backends/qualcomm/builders/op_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import warnings
from typing import Dict

import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
Expand Down Expand Up @@ -70,13 +69,6 @@ def define_node(
if len(node.args) >= 3:
bias_node = self.get_node(node.args[2])

# TODO remove this when qnn sdk support
if QCOM_SCALES in bias_node.meta.get(QCOM_QUANT_ATTRS, {}):
warnings.warn(
f"[QNN Delegate Op Builder]: Fallback linear bias, {bias_node}. per channel bias quantization is not support yet.",
stacklevel=1,
)

bias_tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC
bias_tensor = get_parameter(bias_node, self.edge_program)
# if bias_node is getitem
Expand Down
5 changes: 3 additions & 2 deletions backends/qualcomm/tests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,9 +910,10 @@ def forward(self, x):


class IndexCopy(torch.nn.Module):
def __init__(self, skip_mutable_buffer=False):
def __init__(self, copy_dim=1, skip_mutable_buffer=False):
super().__init__()
self.skip_mutable_buffer = skip_mutable_buffer
self.copy_dim = copy_dim
self.register_buffer(
"k_cache",
torch.zeros((1, 1024, 12, 64), dtype=torch.float32),
Expand All @@ -921,7 +922,7 @@ def __init__(self, skip_mutable_buffer=False):

def forward(self, input_pos, k_val):
k_out = self.k_cache
k_out.index_copy_(1, input_pos, k_val)
k_out.index_copy_(self.copy_dim, input_pos, k_val)
return k_out + 0


Expand Down
121 changes: 117 additions & 4 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,19 +622,59 @@ def test_qnn_backend_index(self):
def test_qnn_backend_index_copy(self):
test_comb = [
{
QCOM_MODULE: IndexCopy(skip_mutable_buffer=False), # noqa: F405
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=1, skip_mutable_buffer=False
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2], dtype=torch.int64),
torch.randn([1, 1, 12, 64]),
),
},
{
QCOM_MODULE: IndexCopy(skip_mutable_buffer=True), # noqa: F405
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=2, skip_mutable_buffer=False
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2], dtype=torch.int64),
torch.randn([1, 1024, 1, 64]),
),
},
{
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=2, skip_mutable_buffer=False
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2, 5], dtype=torch.int64),
torch.randn([1, 1024, 2, 64]),
),
},
{
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=1, skip_mutable_buffer=True
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2], dtype=torch.int64),
torch.randn([1, 1, 12, 64]),
),
},
{
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=2, skip_mutable_buffer=True
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2], dtype=torch.int64),
torch.randn([1, 1024, 1, 64]),
),
},
{
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=2, skip_mutable_buffer=True
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2, 5], dtype=torch.int64),
torch.randn([1, 1024, 2, 64]),
),
},
]
for i, test in enumerate(test_comb):
with self.subTest(i=i):
Expand Down Expand Up @@ -1907,19 +1947,59 @@ def test_qnn_backend_index(self):
def test_qnn_backend_index_copy(self):
test_comb = [
{
QCOM_MODULE: IndexCopy(skip_mutable_buffer=False), # noqa: F405
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=1, skip_mutable_buffer=False
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2], dtype=torch.int64),
torch.randn([1, 1, 12, 64]),
),
},
{
QCOM_MODULE: IndexCopy(skip_mutable_buffer=True), # noqa: F405
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=2, skip_mutable_buffer=False
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2], dtype=torch.int64),
torch.randn([1, 1024, 1, 64]),
),
},
{
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=2, skip_mutable_buffer=False
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2, 5], dtype=torch.int64),
torch.randn([1, 1024, 2, 64]),
),
},
{
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=1, skip_mutable_buffer=True
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2], dtype=torch.int64),
torch.randn([1, 1, 12, 64]),
),
},
{
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=2, skip_mutable_buffer=True
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2], dtype=torch.int64),
torch.randn([1, 1024, 1, 64]),
),
},
{
QCOM_MODULE: IndexCopy( # noqa: F405
copy_dim=2, skip_mutable_buffer=True
),
QCOM_SAMPLE_INPUTS: (
torch.tensor([2, 5], dtype=torch.int64),
torch.randn([1, 1024, 2, 64]),
),
},
]
for i, test in enumerate(test_comb):
with self.subTest(i=i):
Expand Down Expand Up @@ -4909,6 +4989,39 @@ def test_swin_transformer(self):
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)

def test_whisper(self):
if not self.required_envs():
self.skipTest("missing required envs")

cmds = [
"python",
f"{self.executorch_root}/examples/qualcomm/oss_scripts/whisper/whisper.py",
"--artifact",
self.artifact_dir,
"--build_folder",
self.build_folder,
"--device",
self.device,
"--model",
self.model,
"--ip",
self.ip,
"--port",
str(self.port),
]
if self.host:
cmds.extend(["--host", self.host])

p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
with Listener((self.ip, self.port)) as listener:
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertLessEqual(msg["wer"], 0.25)


class TestExampleQaihubScript(TestQNN):
def test_utils_export(self):
Expand Down
3 changes: 3 additions & 0 deletions examples/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/llama)
# build qnn_mimi_decoder_runner
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/moshi)

# build qnn_whisper_runner for whisper
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/whisper)

# build qaihub_llama2_7b_runner and qaihub_llama3_8b_runner
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/qaihub_scripts/llama)

Expand Down
46 changes: 46 additions & 0 deletions examples/qualcomm/oss_scripts/whisper/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


# preprocess qnn runner src files for whisper
set(_qnn_whisper_runner__srcs
${CMAKE_CURRENT_LIST_DIR}/qnn_whisper_runner.cpp
${CMAKE_CURRENT_LIST_DIR}/runner/decoder.cpp
${CMAKE_CURRENT_LIST_DIR}/runner/decoder.h
${CMAKE_CURRENT_LIST_DIR}/runner/encoder.cpp
${CMAKE_CURRENT_LIST_DIR}/runner/encoder.h
${CMAKE_CURRENT_LIST_DIR}/runner/runner.cpp
${CMAKE_CURRENT_LIST_DIR}/runner/runner.h
${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp
)

# build qnn whisper runner
add_executable(qnn_whisper_runner ${_qnn_whisper_runner__srcs})
target_include_directories(
qnn_whisper_runner PUBLIC ${_common_include_directories}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
)


target_link_libraries(
qnn_whisper_runner
qnn_executorch_backend
executorch_core
extension_data_loader
extension_flat_tensor
extension_module
extension_tensor
full_portable_ops_lib
gflags
tokenizers
)

target_compile_options(
qnn_whisper_runner PUBLIC ${_common_compile_options}
)
set_target_properties(
qnn_whisper_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
)
Loading
Loading