pytorch · cccclai · Jul 16, 2025 · May 21, 2025 · Jul 8, 2025 · Jul 15, 2025
@@ -5022,6 +5022,40 @@ def test_swin_transformer(self):
                 self.assertGreaterEqual(msg["top_1"], 60)
                 self.assertGreaterEqual(msg["top_5"], 80)
 
+    def test_t5(self):
+        if not self.required_envs([self.qa_dataset]):
+            self.skipTest("missing required envs")
+        cmds = [
+            "python",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/t5/t5.py",
+            "--dataset",
+            self.sentence_dataset,
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
+            self.build_folder,
+            "--device",
+            self.device,
+            "--model",
+            self.model,
+            "--ip",
+            self.ip,
+            "--port",
+            str(self.port),
+        ]
+        if self.host:
+            cmds.extend(["--host", self.host])
+
+        p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
+        with Listener((self.ip, self.port)) as listener:
+            conn = listener.accept()
+            p.communicate()
+            msg = json.loads(conn.recv())
+            if "Error" in msg:
+                self.fail(msg["Error"])
+            else:
+                self.assertGreaterEqual(msg["f1"], 0.7)
+
     def test_whisper(self):
         if not self.required_envs():
             self.skipTest("missing required envs")

@@ -183,6 +183,7 @@ class TestQNN(unittest.TestCase):
     executorch_root: str = ""
     artifact_dir: str = ""
     image_dataset: str = ""
+    qa_dataset: str = ""
     sentence_dataset: str = ""
     pretrained_weight: str = ""
     enable_profile: bool = False

@@ -90,6 +90,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/llama)
 # build qnn_mimi_decoder_runner
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/moshi)
 
+# build qnn_t5_runner for t5
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/t5)
+
 # build qnn_whisper_runner for whisper
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/whisper)
 

@@ -0,0 +1,45 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# preprocess qnn runner src files for t5
+set(_qnn_t5_runner__srcs
+    ${CMAKE_CURRENT_LIST_DIR}/qnn_t5_runner.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/runner/decoder.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/runner/decoder.h
+    ${CMAKE_CURRENT_LIST_DIR}/runner/encoder.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/runner/encoder.h
+    ${CMAKE_CURRENT_LIST_DIR}/runner/runner.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/runner/runner.h
+    ${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp
+)
+
+# build qnn t5 runner
+add_executable(qnn_t5_runner ${_qnn_t5_runner__srcs})
+target_include_directories(
+    qnn_t5_runner PUBLIC ${_common_include_directories}
+    ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
+)
+
+
+target_link_libraries(
+  qnn_t5_runner
+  qnn_executorch_backend
+  executorch_core
+  extension_data_loader
+  extension_flat_tensor
+  extension_module
+  extension_tensor
+  gflags
+  tokenizers
+)
+
+target_compile_options(
+    qnn_t5_runner PUBLIC ${_common_compile_options}
+)
+set_target_properties(
+    qnn_t5_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
+)
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/**
+ * @file
+ *
+ * This tool can run t5 with Qualcomm AI Engine Direct.
+ *
+ */
+
+#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
+#include <executorch/examples/qualcomm/oss_scripts/t5/runner/runner.h>
+#include <executorch/runtime/platform/log.h>
+#include <gflags/gflags.h>
+#include <fstream>
+#include <vector>
+
+DEFINE_string(
+    model_path,
+    "t5_qnn.pte",
+    "t5 model serialized in flatbuffer format.");
+
+DEFINE_string(
+    tokenizer_model_path,
+    "tokenizer.model",
+    "The tokenizer is saved from T5Tokenize.save_pretrained for tokenizer.");
+DEFINE_string(
+    input_list_path,
+    "input_list.txt",
+    "Input list storing file name of encoded results.");
+DEFINE_int32(
+    seq_len,
+    128,
+    "Maximum sequence length for the generated output.  Defaults to use the model's `max_cache_size` attribute. Will be truncated to maximal cache size if larger than `max_cache_size`.");
+
+DEFINE_string(
+    output_folder_path,
+    "outputs",
+    "Executorch inference data output path.");
+
+std::vector<std::vector<std::vector<int64_t>>> parse_input_list_file(
+    const std::string& input_list_path) {
+  std::vector<std::vector<std::vector<int64_t>>> bufs;
+  std::ifstream input_list(input_list_path);
+
+  auto split = [](std::string s, std::string delimiter) {
+    size_t pos_start = 0, pos_end, delim_len = delimiter.length();
+    std::string token;
+    std::vector<std::string> res;
+
+    while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
+      token = s.substr(pos_start, pos_end - pos_start);
+      pos_start = pos_end + delim_len;
+      res.push_back(token);
+    }
+    res.push_back(s.substr(pos_start));
+    return res;
+  };
+
+  if (!input_list.is_open()) {
+    ET_LOG(Error, "Unable to open file");
+    return bufs;
+  }
+
+  std::string file_path;
+  while (std::getline(input_list, file_path)) {
+    auto input_files = split(file_path, " ");
+    int num_inputs = input_files.size();
+    if (num_inputs == 0) {
+      break;
+    }
+
+    bufs.emplace_back();
+    bufs.back().resize(num_inputs);
+    for (int input_index = 0; input_index < num_inputs; ++input_index) {
+      std::ifstream fin(input_files[input_index], std::ios::binary);
+      if (!fin.is_open()) {
+        ET_LOG(
+            Error, "Could not open file %s", input_files[input_index].c_str());
+        continue;
+      }
+
+      fin.seekg(0, std::ios::end);
+      size_t file_size = fin.tellg();
+      fin.seekg(0, std::ios::beg);
+
+      size_t num_tokens = file_size / sizeof(int64_t);
+      bufs.back()[input_index].resize(num_tokens);
+
+      if (!fin.read(
+              reinterpret_cast<char*>(bufs.back()[input_index].data()),
+              file_size)) {
+        ET_LOG(
+            Error, "Could not read file %s", input_files[input_index].c_str());
+        continue;
+      }
+
+      fin.close();
+    }
+  }
+
+  input_list.close();
+  return bufs;
+}
+
+int main(int argc, char** argv) {
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  std::vector<std::vector<std::vector<int64_t>>> multi_turns_input_buffers =
+      parse_input_list_file(FLAGS_input_list_path);
+
+  for (int iter = 0; iter < multi_turns_input_buffers.size(); ++iter) {
+    std::vector<char> bufs;
+    bufs.reserve(5 * FLAGS_seq_len); // assume each token is around 5 char
+    auto callback = [&](const std::string& piece) {
+      for (const char c : piece) {
+        bufs.push_back(c);
+      }
+    };
+
+    example::Runner runner(FLAGS_model_path, FLAGS_tokenizer_model_path);
+    // generate tokens
+    runner.generate(FLAGS_seq_len, multi_turns_input_buffers[iter], callback);
+    auto output_file_name =
+        FLAGS_output_folder_path + "/output_" + std::to_string(iter) + ".txt";
+    std::ofstream fout(output_file_name);
+    fout.write(bufs.data(), bufs.size());
+    fout.close();
+  }
+
+  return 0;
+}
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/examples/qualcomm/oss_scripts/t5/runner/decoder.h>
+
+using executorch::aten::Tensor;
+using executorch::extension::Module;
+using executorch::extension::TensorPtr;
+using executorch::runtime::Error;
+using executorch::runtime::Result;
+
+namespace example {
+T5Decoder::T5Decoder(const std::string& model_path) {
+  module_ = std::make_unique<Module>(
+      model_path, Module::LoadMode::MmapUseMlockIgnoreErrors);
+  ET_LOG(Info, "creating decoder module: model_path=%s", model_path.c_str());
+}
+
+bool T5Decoder::is_method_loaded() const {
+  return module_->is_method_loaded(kDecoderForwardName);
+}
+
+Error T5Decoder::load() {
+  if (is_method_loaded()) {
+    return Error::Ok;
+  }
+  return module_->load_method(kDecoderForwardName);
+}
+Result<Tensor> T5Decoder::step(
+    TensorPtr& input_ids,
+    TensorPtr& attention_mask,
+    TensorPtr& encoder_hidden_states,
+    TensorPtr& encoder_attention_mask,
+    TensorPtr& cache_position) {
+  auto outputs_res = module_->execute(
+      kDecoderForwardName,
+      {input_ids,
+       attention_mask,
+       encoder_hidden_states,
+       encoder_attention_mask,
+       cache_position});
+  ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
+  ET_CHECK_MSG(
+      outputs_res.get().size() == 1,
+      "More then one output returned from executing decoder.");
+  ET_CHECK_MSG(
+      outputs_res.get()[0].isTensor(),
+      "Non Tensor Output returned from executing decoder");
+
+  // Return the logits tensor
+  return outputs_res.get()[0].toTensor();
+}
+} // namespace example
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <executorch/extension/module/module.h>
+#include <executorch/extension/tensor/tensor.h>
+#include <executorch/extension/tensor/tensor_ptr.h>
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/evalue.h>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+namespace example {
+
+class T5Decoder {
+ public:
+  explicit T5Decoder(const std::string& model_path);
+
+  bool is_method_loaded() const;
+  executorch::runtime::Error load();
+  executorch::runtime::Result<executorch::aten::Tensor> step(
+      executorch::extension::TensorPtr& input_ids,
+      executorch::extension::TensorPtr& attention_mask,
+      executorch::extension::TensorPtr& encoder_hidden_states,
+      executorch::extension::TensorPtr& encoder_attention_mask,
+      executorch::extension::TensorPtr& cache_position);
+  executorch::runtime::Result<std::unordered_set<std::string>> method_names() {
+    return module_->method_names();
+  }
+  executorch::runtime::Result<executorch::runtime::EValue> get(
+      const std::string& method_name) {
+    return module_->get(method_name);
+  }
+
+  executorch::runtime::Result<std::vector<executorch::runtime::EValue>> execute(
+      const std::string& method_name) {
+    return module_->execute(method_name);
+  }
+
+ private:
+  std::unique_ptr<executorch::extension::Module> module_;
+  static constexpr const char* kDecoderForwardName = "decoder";
+};
+
+} // namespace example