Whisper JNI first commit

rohansjoshi · rohansjoshi · commit cb2278ac5cf8 · 2025-08-22T14:36:59.000-07:00
Added EXECUTORCH_BUILD_WHISPER_JNI flag
diff --git a/examples/qualcomm/oss_scripts/whisper/qnn_whisper_runner.cpp b/examples/qualcomm/oss_scripts/whisper/qnn_whisper_runner.cpp
@@ -97,7 +97,7 @@ std::vector<std::vector<std::vector<char>>> parse_input_list_file(
 int main(int argc, char** argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
   // create llama runner
-  example::Runner runner(FLAGS_model_path, FLAGS_tokenizer_json_path);
+  example::WhisperRunner runner(FLAGS_model_path, FLAGS_tokenizer_json_path);
 
   std::vector<std::vector<std::vector<char>>> multi_turns_input_buffers =
       parse_input_list_file(FLAGS_input_list_path);
diff --git a/examples/qualcomm/oss_scripts/whisper/runner/runner.cpp b/examples/qualcomm/oss_scripts/whisper/runner/runner.cpp
@@ -27,20 +27,20 @@ static constexpr auto kDecoderStartTokenId = "decoder_start_token_id";
 static constexpr auto kEosId = "get_eos_id";
 static constexpr auto kMaxContextLen = "get_max_context_len";
 } // namespace
-Runner::Runner(
+WhisperRunner::WhisperRunner(
     const std::string& model_path,
     const std::string& tokenizer_json_path)
     : tokenizer_json_path_(tokenizer_json_path) {
   encoder_ = std::make_unique<WhisperEncoder>(model_path);
   decoder_ = std::make_unique<WhisperDecoder>(model_path);
   tokenizer_ = std::make_unique<tokenizers::HFTokenizer>();
 }
-bool Runner::is_loaded() const {
+bool WhisperRunner::is_loaded() const {
   return encoder_->is_method_loaded() && decoder_->is_method_loaded() &&
       tokenizer_->is_loaded() && sampler_;
 }
 
-Error Runner::load() {
+Error WhisperRunner::load() {
   if (is_loaded()) {
     return Error::Ok;
   }
@@ -108,12 +108,12 @@ Error Runner::load() {
 
   return Error::Ok;
 }
-uint64_t Runner::logits_to_token(
+uint64_t WhisperRunner::logits_to_token(
     const executorch::aten::Tensor& logits_tensor) {
   return sampler_->sample(logits_tensor.data_ptr<float>());
 }
 
-Error Runner::transcribe(
+Error WhisperRunner::transcribe(
     int32_t seq_len,
     std::vector<std::vector<char>>& inputs,
     std::function<void(const std::string&)> token_callback) {
@@ -184,7 +184,7 @@ Error Runner::transcribe(
   return Error::Ok;
 }
 
-Error Runner::print_performance() {
+Error WhisperRunner::print_performance() {
   ET_LOG(Info, "\tTotal Generated token:\t\t\t\t%ld", num_generated_token_);
 
   ET_LOG(
diff --git a/examples/qualcomm/oss_scripts/whisper/runner/runner.h b/examples/qualcomm/oss_scripts/whisper/runner/runner.h
@@ -24,9 +24,9 @@
 
 namespace example {
 
-class Runner {
+class WhisperRunner {
  public:
-  explicit Runner(
+  explicit WhisperRunner(
       const std::string& model_path,
       const std::string& tokenizer_json_path);
 
diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt
@@ -69,10 +69,14 @@ set_target_properties(
 
 executorch_target_link_options_shared_lib(executorch)
 
+<<<<<<< HEAD
 add_library(
   executorch_jni SHARED jni/jni_layer.cpp jni/log.cpp jni/jni_layer_runtime.cpp
                         jni/jni_helper.cpp
 )
+=======
+add_library(executorch_jni SHARED jni/jni_layer.cpp jni/log.cpp jni/jni_layer_runtime.cpp)
+>>>>>>> 37d0a6944a (Whisper JNI first commit)
 
 set(link_libraries)
 list(
@@ -239,6 +243,25 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
   endif()
 endif()
 
+if(EXECUTORCH_BUILD_WHISPER_JNI)
+  target_sources(executorch_jni PRIVATE jni/jni_layer_whisper.cpp jni/log.cpp)
+  target_compile_definitions(executorch_jni PUBLIC EXECUTORCH_BUILD_WHISPER_JNI=1)
+  if(QNN_SDK_ROOT)
+    target_sources(
+      executorch_jni
+      PRIVATE
+        ${EXECUTORCH_ROOT}/examples/qualcomm/oss_scripts/whisper/runner/encoder.cpp
+        ${EXECUTORCH_ROOT}/examples/qualcomm/oss_scripts/whisper/runner/decoder.cpp
+        ${EXECUTORCH_ROOT}/examples/qualcomm/oss_scripts/whisper/runner/runner.cpp
+    )
+
+    target_include_directories(
+      executorch_jni
+      PRIVATE ${EXECUTORCH_ROOT}/examples/qualcomm/oss_scripts/whisper/runner
+    )
+  endif()
+endif()
+
 target_include_directories(
   executorch_jni
   PRIVATE
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/WhisperCallback.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/WhisperCallback.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch.extension.audio;
+
+import com.facebook.jni.annotations.DoNotStrip;
+import org.pytorch.executorch.annotations.Experimental;
+
+/**
+ * Callback interface for Whisper model. Users can implement this interface to receive the generated
+ * tokens and statistics.
+ *
+ * <p>Warning: These APIs are experimental and subject to change without notice
+ */
+@Experimental
+public interface WhisperCallback {
+  /**
+   * Called when a new result is available from JNI. Users will keep getting onResult() invocations
+   * until generate() finishes.
+   *
+   * @param result Last generated token
+   */
+  @DoNotStrip
+  public void onResult(String result);
+
+}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/WhisperModule.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/WhisperModule.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch.extension.audio;
+import com.facebook.jni.HybridData;
+import com.facebook.jni.annotations.DoNotStrip;
+import java.io.File;
+import org.pytorch.executorch.ExecuTorchRuntime;
+import org.pytorch.executorch.annotations.Experimental;
+
+/**
+ * WhisperModule is a wrapper around the Executorch LLM. It provides a simple interface to generate text
+ * from the model.
+ *
+ * <p>Warning: These APIs are experimental and subject to change without notice
+ */
+@Experimental
+public class WhisperModule {
+
+  @DoNotStrip private final HybridData mHybridData;
+
+  @DoNotStrip
+  private static native HybridData initHybrid(
+      String modulePath, String tokenizerPath);
+
+  public WhisperModule(
+      String modulePath, String tokenizerPath) {
+    ExecuTorchRuntime runtime = ExecuTorchRuntime.getRuntime();
+
+    File modelFile = new File(modulePath);
+    if (!modelFile.canRead() || !modelFile.isFile()) {
+      throw new RuntimeException("Cannot load model path " + modulePath);
+    }
+    File tokenizerFile = new File(tokenizerPath);
+    if (!tokenizerFile.canRead() || !tokenizerFile.isFile()) {
+      throw new RuntimeException("Cannot load tokenizer path " + tokenizerPath);
+    }
+    mHybridData = initHybrid(modulePath, tokenizerPath);
+  }
+
+  public void resetNative() {
+    mHybridData.resetNative();
+  }
+
+  @DoNotStrip
+  public native int transcribe(
+      int seqLen,
+      byte[][] inputs,
+      WhisperCallback callback);
+
+
+  /** Force loading the module. Otherwise the model is loaded during first generate(). */
+  @DoNotStrip
+  public native int load();
+}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/package-info.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/package-info.java
@@ -0,0 +1,2 @@
+/** Extension for LLM related use cases for ExecuTorch Android Java/JNI package. */
+package org.pytorch.executorch.extension.audio;
diff --git a/extension/android/jni/BUCK b/extension/android/jni/BUCK
@@ -132,6 +132,35 @@ non_fbcode_target(_kind = fb_android_cxx_library,
     ],
 )
 
+non_fbcode_target(_kind = fb_android_cxx_library,
+    name = "executorch_whisper_jni",
+    srcs = [
+        "jni_layer.cpp",
+        "jni_layer_whisper.cpp",
+        "jni_layer_runtime.cpp",
+    ],
+    allow_jni_merging = False,
+    compiler_flags = ET_JNI_COMPILER_FLAGS + [
+        "-DEXECUTORCH_BUILD_WHISPER_JNI",
+    ],
+    soname = "libexecutorch.$(ext)",
+    visibility = ["PUBLIC"],
+    deps = [
+        ":jni_headers",
+        ":log_provider_static",
+        "//fbandroid/libraries/fbjni:fbjni",
+        "//fbandroid/native/fb:fb",
+        "//third-party/glog:glog",
+        "//xplat/executorch/backends/xnnpack:xnnpack_backend_static",
+        "//xplat/executorch/examples/oss_scripts/qualcomm/whisper/runner:runner_static",
+        "//xplat/executorch/extension/module:module_static",
+        "//xplat/executorch/extension/runner_util:inputs_static",
+        "//xplat/executorch/extension/tensor:tensor_static",
+        "//xplat/executorch/extension/threadpool:cpuinfo_utils_static",
+        "//xplat/executorch/extension/threadpool:threadpool_static",
+    ],
+)
+
 non_fbcode_target(_kind = runtime.cxx_library,
     name = "log_provider",
     srcs = ["log.cpp"],
diff --git a/extension/android/jni/jni_layer.cpp b/extension/android/jni/jni_layer.cpp
@@ -510,6 +510,12 @@ class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
 };
 } // namespace executorch::extension
 
+#ifdef EXECUTORCH_BUILD_WHISPER_JNI
+extern void register_natives_for_whisper();
+#else
+void register_natives_for_whisper() {}
+#endif
+
 #ifdef EXECUTORCH_BUILD_LLAMA_JNI
 extern void register_natives_for_llm();
 #else
@@ -528,6 +534,7 @@ void register_natives_for_training() {}
 JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) {
   return facebook::jni::initialize(vm, [] {
     executorch::extension::ExecuTorchJni::registerNatives();
+    register_natives_for_whisper();
     register_natives_for_llm();
     register_natives_for_runtime();
     register_natives_for_training();
diff --git a/extension/android/jni/jni_layer_whisper.cpp b/extension/android/jni/jni_layer_whisper.cpp
diff --git a/scripts/build_android_library.sh b/scripts/build_android_library.sh

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+/** Extension for LLM related use cases for ExecuTorch Android Java/JNI package. */`
	`2`	`+package org.pytorch.executorch.extension.audio;`