Skip to content

Commit f30a3b1

Browse files
authored
Qualcomm AI Engine Direct - Streaming Mimi Enablement (#10570)
### Summary Stateless(Static) Streaming Mimi Decoder Enablement. #### Scripts to run 1. Run `examples/models/moshi/mimi/install_requirements.sh`, manually change `pip install moshi==0.2.3` for this since we are currently building static mimi decoder on top of `0.2.3` version 2. Run the example script - CPU Encoder with QNN Decoder `python examples/qualcomm/oss_scripts/moshi/mimi.py -b build-android -s $DEVICE -m SM8650 --use_cpu_encoder` - QNN Encoder/Decoder `python examples/qualcomm/oss_scripts/moshi/mimi.py -b build-android -s $DEVICE -m SM8650` #### Stats for SM8650 - SQNR Scores Improvement after Streaming Mode Enablement - CPU Encoder with QNN Decoder: 4.7 -> 7.8 - QNN Encoder/Decoder: 0.46 -> 0.88 - Encoder: 3.0ms/chunk - Decoder: 5.9ms/chunk #### Follow ups - Add README - UT and Community CI enablement - Inference Speed Optimization - Accuracy Improvement
1 parent e42dafc commit f30a3b1

File tree

14 files changed

+2136
-212
lines changed

14 files changed

+2136
-212
lines changed

backends/qualcomm/_passes/convert_conv1d_to_conv2d.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ def call(self, graph_module: torch.fx.GraphModule):
9999
)
100100

101101
num_args = len(node.args)
102-
bias_node = node.args[2]
102+
103+
bias_node = node.args[2] if num_args > 2 else None
103104
stride = [1] + node.args[3] if num_args > 3 else [1, 1]
104105
padding = [0] + node.args[4] if num_args > 4 else [0, 0]
105106
if node.target == torch.ops.aten.conv1d.default:

backends/qualcomm/_passes/lift_constant_scalar_operands.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,13 @@ class TensorOpInfo:
4040
aten.ne.Scalar: TensorOpInfo(aten.ne.Tensor, False, False),
4141
aten.add.Scalar: TensorOpInfo(aten.add.Tensor, False, False),
4242
aten.add_.Scalar: TensorOpInfo(aten.add_.Tensor, False, False),
43+
# For below cases, refer to LiftAddTensor Model in UT for sample
44+
aten.add.Tensor: TensorOpInfo(aten.add.Tensor, False, False),
4345
aten.div.Scalar: TensorOpInfo(aten.div.Tensor, False, False),
4446
aten.mul.Scalar: TensorOpInfo(aten.mul.Tensor, False, False),
4547
aten.rsub.Scalar: TensorOpInfo(aten.rsub.Tensor, False, False),
4648
aten.sub.Scalar: TensorOpInfo(aten.sub.Tensor, False, False),
49+
aten.sub.Tensor: TensorOpInfo(aten.sub.Tensor, False, False),
4750
aten.pow.Tensor_Scalar: TensorOpInfo(aten.pow.Tensor_Tensor, False, False),
4851
# The scalar number arg[1] is missing when using default. Result in a corner case to deal
4952
aten.leaky_relu.default: TensorOpInfo(aten.prelu.default, True, False),

backends/qualcomm/tests/models.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -998,6 +998,15 @@ def forward(self, x):
998998
return self.constant < x
999999

10001000

1001+
class LiftAddTensor(torch.nn.Module):
1002+
def __init__(self):
1003+
super().__init__()
1004+
1005+
def forward(self, x):
1006+
N = 2 - 1
1007+
return x + N
1008+
1009+
10011010
class Linear(torch.nn.Module):
10021011
def __init__(self, use_bias: bool = True):
10031012
super().__init__()

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,12 @@ def test_qnn_backend_einsum_outer_product_relu(self):
10721072
)
10731073
self.lower_module_and_test_output(module, sample_input)
10741074

1075+
# TODO: Create a new UT class for passes specific checks
1076+
def test_qnn_backend_lift_add_tensor(self):
1077+
module = LiftAddTensor() # noqa: F405
1078+
sample_input = (torch.Tensor([1, 2, 3, 4]).to(torch.int32),)
1079+
self.lower_module_and_test_output(module, sample_input)
1080+
10751081
@unittest.skip("Fail because of bad accuracy")
10761082
def test_qnn_backend_moe_feed_forward(self):
10771083
args = ModelArgs()

examples/qualcomm/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/executor_runner)
9393
# build qnn_llama_runner for llama
9494
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/llama)
9595

96+
# build qnn_mimi_decoder_runner
97+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/moshi)
98+
9699
# build qaihub_llama2_7b_runner and qaihub_llama3_8b_runner
97100
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/qaihub_scripts/llama)
98101

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
set(_qnn_mimi_decoder_runner__srcs
8+
${CMAKE_CURRENT_LIST_DIR}/qnn_mimi_decoder_runner.cpp
9+
${CMAKE_CURRENT_LIST_DIR}/runner/runner.cpp
10+
${CMAKE_CURRENT_LIST_DIR}/runner/runner.h
11+
)
12+
13+
# build mimi decoder runner
14+
add_executable(qnn_mimi_decoder_runner ${_qnn_mimi_decoder_runner__srcs})
15+
target_include_directories(
16+
qnn_mimi_decoder_runner PUBLIC ${_common_include_directories}
17+
)
18+
target_link_libraries(
19+
qnn_mimi_decoder_runner
20+
qnn_executorch_backend
21+
executorch_core
22+
extension_module
23+
extension_data_loader
24+
extension_flat_tensor
25+
gflags
26+
)
27+
28+
target_compile_options(
29+
qnn_llama_runner PUBLIC ${_common_compile_options}
30+
)
31+
32+
set_target_properties(
33+
qnn_mimi_decoder_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
34+
)

0 commit comments

Comments
 (0)