Changed API Whisper -> ASR

rohansjoshi · rohansjoshi · commit d5ed11f1c5bc · 2025-09-12T15:30:36.000-07:00
diff --git a/examples/qualcomm/oss_scripts/whisper/CMakeLists.txt b/examples/qualcomm/oss_scripts/whisper/CMakeLists.txt
@@ -14,6 +14,7 @@ set(_qnn_whisper_runner__srcs
     ${CMAKE_CURRENT_LIST_DIR}/runner/runner.cpp
     ${CMAKE_CURRENT_LIST_DIR}/runner/runner.h
     ${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp
+    ${EXECUTORCH_ROOT}/extension/llm/runner/asr_runner.h
 )
 
 # build qnn whisper runner
diff --git a/examples/qualcomm/oss_scripts/whisper/runner/runner.h b/examples/qualcomm/oss_scripts/whisper/runner/runner.h
@@ -13,6 +13,7 @@
 
 #include <executorch/examples/qualcomm/oss_scripts/whisper/runner/decoder.h>
 #include <executorch/examples/qualcomm/oss_scripts/whisper/runner/encoder.h>
+#include <executorch/extension/llm/runner/asr_runner.h>
 #include <executorch/extension/llm/sampler/sampler.h>
 #include <executorch/runtime/core/error.h>
 #include <pytorch/tokenizers/tokenizer.h>
@@ -24,7 +25,7 @@
 
 namespace example {
 
-class WhisperRunner {
+class WhisperRunner : public executorch::extension::llm::ASRRunner {
  public:
   explicit WhisperRunner(
       const std::string& model_path,
diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt
@@ -239,11 +239,10 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
   endif()
 endif()
 
-if(EXECUTORCH_BUILD_WHISPER_JNI)
-  target_sources(executorch_jni PRIVATE jni/jni_layer_whisper.cpp jni/log.cpp)
-  target_compile_definitions(
-    executorch_jni PUBLIC EXECUTORCH_BUILD_WHISPER_JNI=1
-  )
+if(EXECUTORCH_BUILD_ASR_JNI)
+  target_sources(executorch_jni PRIVATE jni/jni_layer_asr.cpp jni/log.cpp)
+  target_compile_definitions(executorch_jni PUBLIC EXECUTORCH_BUILD_ASR_JNI=1)
+
   if(QNN_SDK_ROOT)
     target_sources(
       executorch_jni
@@ -257,6 +256,7 @@ if(EXECUTORCH_BUILD_WHISPER_JNI)
       executorch_jni
       PRIVATE ${EXECUTORCH_ROOT}/examples/qualcomm/oss_scripts/whisper/runner
     )
+    target_compile_definitions(executorch_jni PRIVATE EXECUTORCH_BUILD_QNN=1)
   endif()
 endif()
 
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/ASRCallback.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/ASRCallback.java
@@ -18,7 +18,7 @@
  * <p>Warning: These APIs are experimental and subject to change without notice
  */
 @Experimental
-public interface WhisperCallback {
+public interface ASRCallback {
   /**
    * Called when a new result is available from JNI. Users will keep getting onResult() invocations
    * until generate() finishes.
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/ASRModule.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/ASRModule.java
@@ -14,21 +14,20 @@
 import org.pytorch.executorch.annotations.Experimental;
 
 /**
- * WhisperModule is a wrapper around the Executorch LLM. It provides a simple interface to generate text
- * from the model.
+ * ASRModule is a wrapper around the Executorch ASR runners like Whisper runner.
  *
  * <p>Warning: These APIs are experimental and subject to change without notice
  */
 @Experimental
-public class WhisperModule {
+public class ASRModule {
 
   @DoNotStrip private final HybridData mHybridData;
 
   @DoNotStrip
   private static native HybridData initHybrid(
       String modulePath, String tokenizerPath);
 
-  public WhisperModule(
+  public ASRModule(
       String modulePath, String tokenizerPath) {
     ExecuTorchRuntime runtime = ExecuTorchRuntime.getRuntime();
 
@@ -51,7 +50,7 @@ public void resetNative() {
   public native int transcribe(
       int seqLen,
       byte[][] inputs,
-      WhisperCallback callback);
+      ASRCallback callback);
 
 
   /** Force loading the module. Otherwise the model is loaded during first generate(). */
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/package-info.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/audio/package-info.java
@@ -1,2 +1,2 @@
-/** Extension for LLM related use cases for ExecuTorch Android Java/JNI package. */
+/** Extension for ASR related use cases for ExecuTorch Android Java/JNI package. */
 package org.pytorch.executorch.extension.audio;
diff --git a/extension/android/jni/BUCK b/extension/android/jni/BUCK
@@ -123,15 +123,15 @@ non_fbcode_target(_kind = fb_android_cxx_library,
 )
 
 non_fbcode_target(_kind = fb_android_cxx_library,
-    name = "executorch_whisper_jni",
+    name = "executorch_asr_jni",
     srcs = [
         "jni_layer.cpp",
-        "jni_layer_whisper.cpp",
+        "jni_layer_asr.cpp",
         "jni_layer_runtime.cpp",
     ],
     allow_jni_merging = False,
     compiler_flags = ET_JNI_COMPILER_FLAGS + [
-        "-DEXECUTORCH_BUILD_WHISPER_JNI",
+        "-DEXECUTORCH_BUILD_ASR_JNI",
     ],
     soname = "libexecutorch.$(ext)",
     visibility = ["PUBLIC"],
diff --git a/extension/android/jni/jni_layer.cpp b/extension/android/jni/jni_layer.cpp
@@ -508,10 +508,10 @@ class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
 };
 } // namespace executorch::extension
 
-#ifdef EXECUTORCH_BUILD_WHISPER_JNI
-extern void register_natives_for_whisper();
+#ifdef EXECUTORCH_BUILD_ASR_JNI
+extern void register_natives_for_asr();
 #else
-void register_natives_for_whisper() {}
+void register_natives_for_asr() {}
 #endif
 
 #ifdef EXECUTORCH_BUILD_LLAMA_JNI
@@ -532,7 +532,7 @@ void register_natives_for_training() {}
 JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) {
   return facebook::jni::initialize(vm, [] {
     executorch::extension::ExecuTorchJni::registerNatives();
-    register_natives_for_whisper();
+    register_natives_for_asr();
     register_natives_for_llm();
     register_natives_for_runtime();
     register_natives_for_training();
diff --git a/extension/android/jni/jni_layer_asr.cpp b/extension/android/jni/jni_layer_asr.cpp
@@ -13,7 +13,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include <executorch/examples/qualcomm/oss_scripts/whisper/runner/runner.h>
+#include <executorch/extension/llm/runner/asr_runner.h>
 #include <executorch/runtime/platform/log.h>
 #include <executorch/runtime/platform/platform.h>
 #include <executorch/runtime/platform/runtime.h>
@@ -23,6 +23,10 @@
 #include <executorch/extension/threadpool/threadpool.h>
 #endif
 
+#if defined(EXECUTORCH_BUILD_QNN)
+#include <executorch/examples/qualcomm/oss_scripts/whisper/runner/runner.h>
+#endif
+
 #include <fbjni/ByteBuffer.h>
 #include <fbjni/fbjni.h>
 
@@ -67,14 +71,14 @@ std::string token_buffer;
 
 namespace executorch_jni {
 
-class ExecuTorchWhisperCallbackJni
-    : public facebook::jni::JavaClass<ExecuTorchWhisperCallbackJni> {
+class ExecuTorchASRCallbackJni
+    : public facebook::jni::JavaClass<ExecuTorchASRCallbackJni> {
  public:
   constexpr static const char* kJavaDescriptor =
-      "Lorg/pytorch/executorch/extension/audio/WhisperCallback;";
+      "Lorg/pytorch/executorch/extension/audio/ASRCallback;";
 
   void onResult(std::string result) const {
-    static auto cls = ExecuTorchWhisperCallbackJni::javaClassStatic();
+    static auto cls = ExecuTorchASRCallbackJni::javaClassStatic();
     static const auto method =
         cls->getMethod<void(facebook::jni::local_ref<jstring>)>("onResult");
 
@@ -91,15 +95,14 @@ class ExecuTorchWhisperCallbackJni
   }
 };
 
-class ExecuTorchWhisperJni
-    : public facebook::jni::HybridClass<ExecuTorchWhisperJni> {
+class ExecuTorchASRJni : public facebook::jni::HybridClass<ExecuTorchASRJni> {
  private:
   friend HybridBase;
-  std::unique_ptr<example::WhisperRunner> runner_;
+  std::unique_ptr<::executorch::extension::llm::ASRRunner> runner_;
 
  public:
   constexpr static auto kJavaDescriptor =
-      "Lorg/pytorch/executorch/extension/audio/WhisperModule;";
+      "Lorg/pytorch/executorch/extension/audio/ASRModule;";
 
   static facebook::jni::local_ref<jhybriddata> initHybrid(
       facebook::jni::alias_ref<jclass>,
@@ -108,7 +111,7 @@ class ExecuTorchWhisperJni
     return makeCxxInstance(model_path, tokenizer_path);
   }
 
-  ExecuTorchWhisperJni(
+  ExecuTorchASRJni(
       facebook::jni::alias_ref<jstring> model_path,
       facebook::jni::alias_ref<jstring> tokenizer_path) {
 #if defined(ET_USE_THREADPOOL)
@@ -121,17 +124,18 @@ class ExecuTorchWhisperJni
           ->_unsafe_reset_threadpool(num_performant_cores);
     }
 #endif
-
+#if defined(EXECUTORCH_BUILD_QNN)
     // create runner
     runner_ = std::make_unique<example::WhisperRunner>(
         model_path->toStdString(), tokenizer_path->toStdString());
+#endif
   }
 
   jint transcribe(
       jint seq_len,
       facebook::jni::alias_ref<
           facebook::jni::JArrayClass<jbyteArray>::javaobject> inputs,
-      facebook::jni::alias_ref<ExecuTorchWhisperCallbackJni> callback) {
+      facebook::jni::alias_ref<ExecuTorchASRCallbackJni> callback) {
     // Convert Java byte[][] to C++ vector<vector<char>>
     std::vector<std::vector<char>> cppData;
     auto input_size = inputs->size();
@@ -162,15 +166,15 @@ class ExecuTorchWhisperJni
 
   static void registerNatives() {
     registerHybrid({
-        makeNativeMethod("initHybrid", ExecuTorchWhisperJni::initHybrid),
-        makeNativeMethod("transcribe", ExecuTorchWhisperJni::transcribe),
-        makeNativeMethod("load", ExecuTorchWhisperJni::load),
+        makeNativeMethod("initHybrid", ExecuTorchASRJni::initHybrid),
+        makeNativeMethod("transcribe", ExecuTorchASRJni::transcribe),
+        makeNativeMethod("load", ExecuTorchASRJni::load),
     });
   }
 };
 
 } // namespace executorch_jni
 
-void register_natives_for_whisper() {
-  executorch_jni::ExecuTorchWhisperJni::registerNatives();
+void register_natives_for_asr() {
+  executorch_jni::ExecuTorchASRJni::registerNatives();
 }
diff --git a/extension/llm/runner/asr_runner.h b/extension/llm/runner/asr_runner.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Interface for audio-to-text model runners. Currently only used for
+// supporting QNN Whisper Runner
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <string>
+
+#include <executorch/extension/llm/runner/stats.h>
+#include <executorch/runtime/core/error.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+class ET_EXPERIMENTAL ASRRunner {
+ public:
+  virtual ~ASRRunner() = default;
+
+  /**
+   * Check if the runner is loaded and ready for inference.
+   *
+   * @return true if the runner is loaded, false otherwise
+   */
+  virtual bool is_loaded() const = 0;
+
+  /**
+   * Load the model and prepare for inference.
+   *
+   * @return Error::Ok if successful, an error otherwise
+   */
+  virtual runtime::Error load() = 0;
+
+  /**
+   * Generate text from raw audio.
+   *
+   * @param seq_len Length of input sequence
+   * @param inputs A vector containing one element: a vector of bytes that
+   * encodes a float tensor in little-endian byte order
+   * @param token_callback Callback function called for each generated token
+   * @return Error::Ok if successful, an error otherwise
+   */
+  virtual runtime::Error transcribe(
+      int32_t seq_len,
+      std::vector<std::vector<char>>& inputs,
+      std::function<void(const std::string&)> token_callback = {}) = 0;
+};
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/scripts/build_android_library.sh b/scripts/build_android_library.sh
@@ -43,7 +43,7 @@ build_android_native_library() {
     -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \
     -DEXECUTORCH_BUILD_EXTENSION_TRAINING=ON \
     -DEXECUTORCH_BUILD_LLAMA_JNI="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \
-    -DEXECUTORCH_BUILD_WHISPER_JNI="${EXECUTORCH_BUILD_EXTENSION_AUDIO:-ON}" \
+    -DEXECUTORCH_BUILD_ASR_JNI="${EXECUTORCH_BUILD_EXTENSION_AUDIO:-ON}" \
     -DEXECUTORCH_BUILD_NEURON="${EXECUTORCH_BUILD_NEURON}" \
     -DNEURON_BUFFER_ALLOCATOR_LIB="${NEURON_BUFFER_ALLOCATOR_LIB}" \
     -DEXECUTORCH_BUILD_QNN="${EXECUTORCH_BUILD_QNN}" \

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,7 @@ set(_qnn_whisper_runner__srcs`
`14`	`14`	`${CMAKE_CURRENT_LIST_DIR}/runner/runner.cpp`
`15`	`15`	`${CMAKE_CURRENT_LIST_DIR}/runner/runner.h`
`16`	`16`	`${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp`
	`17`	`+ ${EXECUTORCH_ROOT}/extension/llm/runner/asr_runner.h`
`17`	`18`	`)`
`18`	`19`
`19`	`20`	`# build qnn whisper runner`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-/** Extension for LLM related use cases for ExecuTorch Android Java/JNI package. */`
	`1`	`+/** Extension for ASR related use cases for ExecuTorch Android Java/JNI package. */`
`2`	`2`	`package org.pytorch.executorch.extension.audio;`