Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5022,6 +5022,40 @@ def test_swin_transformer(self):
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)

def test_t5(self):
if not self.required_envs([self.qa_dataset]):
self.skipTest("missing required envs")
cmds = [
"python",
f"{self.executorch_root}/examples/qualcomm/oss_scripts/t5/t5.py",
"--dataset",
self.sentence_dataset,
"--artifact",
self.artifact_dir,
"--build_folder",
self.build_folder,
"--device",
self.device,
"--model",
self.model,
"--ip",
self.ip,
"--port",
str(self.port),
]
if self.host:
cmds.extend(["--host", self.host])

p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
with Listener((self.ip, self.port)) as listener:
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["f1"], 0.7)

def test_whisper(self):
if not self.required_envs():
self.skipTest("missing required envs")
Expand Down
1 change: 1 addition & 0 deletions backends/qualcomm/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class TestQNN(unittest.TestCase):
executorch_root: str = ""
artifact_dir: str = ""
image_dataset: str = ""
qa_dataset: str = ""
sentence_dataset: str = ""
pretrained_weight: str = ""
enable_profile: bool = False
Expand Down
3 changes: 3 additions & 0 deletions examples/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/llama)
# build qnn_mimi_decoder_runner
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/moshi)

# build qnn_t5_runner for t5
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/t5)

# build qnn_whisper_runner for whisper
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/whisper)

Expand Down
45 changes: 45 additions & 0 deletions examples/qualcomm/oss_scripts/t5/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


# preprocess qnn runner src files for t5
set(_qnn_t5_runner__srcs
${CMAKE_CURRENT_LIST_DIR}/qnn_t5_runner.cpp
${CMAKE_CURRENT_LIST_DIR}/runner/decoder.cpp
${CMAKE_CURRENT_LIST_DIR}/runner/decoder.h
${CMAKE_CURRENT_LIST_DIR}/runner/encoder.cpp
${CMAKE_CURRENT_LIST_DIR}/runner/encoder.h
${CMAKE_CURRENT_LIST_DIR}/runner/runner.cpp
${CMAKE_CURRENT_LIST_DIR}/runner/runner.h
${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp
)

# build qnn t5 runner
add_executable(qnn_t5_runner ${_qnn_t5_runner__srcs})
target_include_directories(
qnn_t5_runner PUBLIC ${_common_include_directories}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
)


target_link_libraries(
qnn_t5_runner
qnn_executorch_backend
executorch_core
extension_data_loader
extension_flat_tensor
extension_module
extension_tensor
gflags
tokenizers
)

target_compile_options(
qnn_t5_runner PUBLIC ${_common_compile_options}
)
set_target_properties(
qnn_t5_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
)
137 changes: 137 additions & 0 deletions examples/qualcomm/oss_scripts/t5/qnn_t5_runner.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

/**
* @file
*
* This tool can run t5 with Qualcomm AI Engine Direct.
*
*/

#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
#include <executorch/examples/qualcomm/oss_scripts/t5/runner/runner.h>
#include <executorch/runtime/platform/log.h>
#include <gflags/gflags.h>
#include <fstream>
#include <vector>

DEFINE_string(
model_path,
"t5_qnn.pte",
"t5 model serialized in flatbuffer format.");

DEFINE_string(
tokenizer_model_path,
"tokenizer.model",
"The tokenizer is saved from T5Tokenize.save_pretrained for tokenizer.");
DEFINE_string(
input_list_path,
"input_list.txt",
"Input list storing file name of encoded results.");
DEFINE_int32(
seq_len,
128,
"Maximum sequence length for the generated output. Defaults to use the model's `max_cache_size` attribute. Will be truncated to maximal cache size if larger than `max_cache_size`.");

DEFINE_string(
output_folder_path,
"outputs",
"Executorch inference data output path.");

std::vector<std::vector<std::vector<int64_t>>> parse_input_list_file(
const std::string& input_list_path) {
std::vector<std::vector<std::vector<int64_t>>> bufs;
std::ifstream input_list(input_list_path);

auto split = [](std::string s, std::string delimiter) {
size_t pos_start = 0, pos_end, delim_len = delimiter.length();
std::string token;
std::vector<std::string> res;

while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
token = s.substr(pos_start, pos_end - pos_start);
pos_start = pos_end + delim_len;
res.push_back(token);
}
res.push_back(s.substr(pos_start));
return res;
};

if (!input_list.is_open()) {
ET_LOG(Error, "Unable to open file");
return bufs;
}

std::string file_path;
while (std::getline(input_list, file_path)) {
auto input_files = split(file_path, " ");
int num_inputs = input_files.size();
if (num_inputs == 0) {
break;
}

bufs.emplace_back();
bufs.back().resize(num_inputs);
for (int input_index = 0; input_index < num_inputs; ++input_index) {
std::ifstream fin(input_files[input_index], std::ios::binary);
if (!fin.is_open()) {
ET_LOG(
Error, "Could not open file %s", input_files[input_index].c_str());
continue;
}

fin.seekg(0, std::ios::end);
size_t file_size = fin.tellg();
fin.seekg(0, std::ios::beg);

size_t num_tokens = file_size / sizeof(int64_t);
bufs.back()[input_index].resize(num_tokens);

if (!fin.read(
reinterpret_cast<char*>(bufs.back()[input_index].data()),
file_size)) {
ET_LOG(
Error, "Could not read file %s", input_files[input_index].c_str());
continue;
}

fin.close();
}
}

input_list.close();
return bufs;
}

int main(int argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);

std::vector<std::vector<std::vector<int64_t>>> multi_turns_input_buffers =
parse_input_list_file(FLAGS_input_list_path);

for (int iter = 0; iter < multi_turns_input_buffers.size(); ++iter) {
std::vector<char> bufs;
bufs.reserve(5 * FLAGS_seq_len); // assume each token is around 5 char
auto callback = [&](const std::string& piece) {
for (const char c : piece) {
bufs.push_back(c);
}
};

example::Runner runner(FLAGS_model_path, FLAGS_tokenizer_model_path);
// generate tokens
runner.generate(FLAGS_seq_len, multi_turns_input_buffers[iter], callback);
auto output_file_name =
FLAGS_output_folder_path + "/output_" + std::to_string(iter) + ".txt";
std::ofstream fout(output_file_name);
fout.write(bufs.data(), bufs.size());
fout.close();
}

return 0;
}
58 changes: 58 additions & 0 deletions examples/qualcomm/oss_scripts/t5/runner/decoder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/examples/qualcomm/oss_scripts/t5/runner/decoder.h>

using executorch::aten::Tensor;
using executorch::extension::Module;
using executorch::extension::TensorPtr;
using executorch::runtime::Error;
using executorch::runtime::Result;

namespace example {
T5Decoder::T5Decoder(const std::string& model_path) {
module_ = std::make_unique<Module>(
model_path, Module::LoadMode::MmapUseMlockIgnoreErrors);
ET_LOG(Info, "creating decoder module: model_path=%s", model_path.c_str());
}

bool T5Decoder::is_method_loaded() const {
return module_->is_method_loaded(kDecoderForwardName);
}

Error T5Decoder::load() {
if (is_method_loaded()) {
return Error::Ok;
}
return module_->load_method(kDecoderForwardName);
}
Result<Tensor> T5Decoder::step(
TensorPtr& input_ids,
TensorPtr& attention_mask,
TensorPtr& encoder_hidden_states,
TensorPtr& encoder_attention_mask,
TensorPtr& cache_position) {
auto outputs_res = module_->execute(
kDecoderForwardName,
{input_ids,
attention_mask,
encoder_hidden_states,
encoder_attention_mask,
cache_position});
ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
ET_CHECK_MSG(
outputs_res.get().size() == 1,
"More then one output returned from executing decoder.");
ET_CHECK_MSG(
outputs_res.get()[0].isTensor(),
"Non Tensor Output returned from executing decoder");

// Return the logits tensor
return outputs_res.get()[0].toTensor();
}
} // namespace example
52 changes: 52 additions & 0 deletions examples/qualcomm/oss_scripts/t5/runner/decoder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once
#include <executorch/extension/module/module.h>
#include <executorch/extension/tensor/tensor.h>
#include <executorch/extension/tensor/tensor_ptr.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/evalue.h>
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>

namespace example {

class T5Decoder {
public:
explicit T5Decoder(const std::string& model_path);

bool is_method_loaded() const;
executorch::runtime::Error load();
executorch::runtime::Result<executorch::aten::Tensor> step(
executorch::extension::TensorPtr& input_ids,
executorch::extension::TensorPtr& attention_mask,
executorch::extension::TensorPtr& encoder_hidden_states,
executorch::extension::TensorPtr& encoder_attention_mask,
executorch::extension::TensorPtr& cache_position);
executorch::runtime::Result<std::unordered_set<std::string>> method_names() {
return module_->method_names();
}
executorch::runtime::Result<executorch::runtime::EValue> get(
const std::string& method_name) {
return module_->get(method_name);
}

executorch::runtime::Result<std::vector<executorch::runtime::EValue>> execute(
const std::string& method_name) {
return module_->execute(method_name);
}

private:
std::unique_ptr<executorch::extension::Module> module_;
static constexpr const char* kDecoderForwardName = "decoder";
};

} // namespace example
Loading
Loading