pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 146 additions & 256 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 146 additions & 256 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/models/whisper/CMakeLists.txt‎
Lines changed: 89 additions & 0 deletions b/‎examples/models/whisper/CMakeLists.txt‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎examples/models/whisper/README.md‎
Lines changed: 75 additions & 0 deletions b/‎examples/models/whisper/README.md‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎examples/models/whisper/main.cpp‎
Lines changed: 126 additions & 0 deletions b/‎examples/models/whisper/main.cpp‎
Lines changed: 126 additions & 0 deletions
diff --git a/‎extension/asr/runner/CMakeLists.txt‎
Lines changed: 51 additions & 0 deletions b/‎extension/asr/runner/CMakeLists.txt‎
Lines changed: 51 additions & 0 deletions
@@ -1 +1 @@
-467660923a5a25e4718e1d6697b93ff1bab4e807
+4361747abfc55e40e929396ed986efe775d745f9
@@ -926,6 +926,11 @@ if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
   list(APPEND _executorch_extensions extension_llm_runner)
 endif()
 
+if(EXECUTORCH_BUILD_EXTENSION_ASR_RUNNER)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/asr/runner)
+  list(APPEND _executorch_extensions extension_asr_runner)
+endif()
+
 if(EXECUTORCH_BUILD_EXTENSION_LLM_APPLE)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/apple)
 endif()
 
@@ -0,0 +1,89 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+cmake_minimum_required(VERSION 3.29)
+project(whisper_runner)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(EXECUTORCH_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
+include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
+
+# Let files say "include <executorch/path/to/header.h>"
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+
+# Need this for gflags for some reason
+set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
+find_package(gflags REQUIRED)
+
+list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
+find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
+
+set(_link_libraries executorch gflags)
+set(_srcs multimodal.cpp)
+
+list(
+  APPEND
+  _link_libraries
+  optimized_native_cpu_ops_lib
+  quantized_ops_lib
+  custom_ops
+  cpublas
+  eigen_blas
+)
+
+# XNNPACK
+if(TARGET xnnpack_backend)
+  list(APPEND _link_libraries xnnpack_backend)
+endif()
+
+# Add LLM runner and extension module
+if(NOT TARGET extension_asr_runner)
+  message(
+    FATAL_ERROR
+      "ExecuTorch must be installed with EXECUTORCH_BUILD_EXTENSION_ASR_RUNNER enabled."
+  )
+endif()
+
+# Needed for cpuinfo where it uses android specific log lib
+if(ANDROID)
+  list(APPEND _link_libraries log)
+endif()
+
+# Add the required ExecuTorch extensions for multimodal LLM runner
+list(
+  APPEND
+  _link_libraries
+  extension_asr_runner
+  extension_llm_runner # Needed for load_tokenizer()
+  extension_module
+  extension_data_loader
+  extension_tensor
+  extension_flat_tensor
+)
+
+# Link CUDA backend
+if(EXECUTORCH_BUILD_CUDA)
+  find_package(CUDAToolkit REQUIRED)
+  list(APPEND _link_libraries aoti_cuda)
+  executorch_target_link_options_shared_lib(aoti_cuda)
+endif()
+
+if(EXECUTORCH_BUILD_METAL)
+  list(APPEND _link_libraries metal_backend)
+  executorch_target_link_options_shared_lib(metal_backend)
+endif()
+
+# Add tokenizers
+list(APPEND _link_libraries tokenizers::tokenizers)
+
+add_executable(whisper_runner main.cpp)
+
+target_include_directories(whisper_runner PUBLIC ${_common_include_directories})
+
+target_link_libraries(whisper_runner PUBLIC ${_link_libraries})
+target_compile_options(whisper_runner PUBLIC ${_common_compile_options})
@@ -0,0 +1,75 @@
+# Whisper Runner
+
+This directory hosts a lightweight C++ helper that drives Whisper models
+exported to ExecuTorch. The `AsrRunner` owns the `Module` instance that
+wraps a bundled `.pte` program and optional `.ptd` weight file, loads the
+`encoder` and `text_decoder` methods, and exposes a `transcribe()` loop that
+streams decoded text pieces through a callback.
+
+The runner assumes:
+- `model.pte` contains both Whisper encoder and decoder entry points named
+  `encoder` and `text_decoder`.
+- External parameters (for example KV cache blocks) are stored in a companion
+  `model.ptd`.
+- A tokenizer JSON compatible with the ExecuTorch tokenizers shim is available.
+
+Audio preprocessing is not part of the runner itself. To transform raw audio
+into the mel features expected by the encoder, reuse the pattern in
+`examples/models/voxtral/multimodal.cpp`, which loads a `preprocessor.pte`
+module to generate the spectrogram tensor.
+
+## Build
+
+```bash
+# Install ExecuTorch libraries:
+cmake --preset llm -DEXECUTORCH_BUILD_CUDA=ON -DCMAKE_INSTALL_PREFIX=cmake-out -DCMAKE_BUILD_TYPE=Release . -Bcmake-out
+cmake --build cmake-out -j$(nproc) --target install --config Release
+
+# Build the runner:
+cmake \
+  -B cmake-out/examples/models/whisper \
+  -S examples/models/whisper
+cmake --build cmake-out/examples/models/whisper -j
+```
+
+The first cmake command build produces a static library named `extension_asr_runner`. The second cmake command links it into your
+application together with the standard ExecuTorch runtime libraries and the
+tokenizer target (`tokenizers::tokenizers`).
+
+## Usage
+
+```cpp
+#include <executorch/extension/asr/runner.h>
+#include <executorch/extension/tensor/tensor_ptr.h>
+
+using executorch::extension::llm::AsrRunner;
+using executorch::extension::llm::AsrTranscribeConfig;
+
+AsrRunner runner("model.pte", "model.ptd", "tokenizer.json");
+ET_CHECK_OK(runner.load());
+
+// `features` is the mel spectrogram tensor produced by the preprocessor.
+executorch::aten::Tensor features = load_features_somehow();
+
+AsrTranscribeConfig config;
+config.max_new_tokens = 128; // stop after 128 generated tokens
+config.temperature = 0.7f;  // optional: enable stochastic sampling
+config.decoder_start_token_id = 50257; // override the BOS token id
+
+auto tokens_result = runner.transcribe(
+    features,
+    config,
+    [](const std::string& piece) {
+      std::cout << piece;
+    });
+
+if (!tokens_result.ok()) {
+  ET_LOG(Error, "Transcription failed: %d", static_cast<int>(tokens_result.error()));
+}
+```
+
+`transcribe()` returns the full token history (prompt + generated tokens) and
+invokes the callback every time a new token is emitted. Provide a non-empty
+`decoder_input_ids` vector if you want to seed the decoder with a custom prompt,
+and override `AsrTranscribeConfig::eos_token_ids` when the model exposes
+custom termination ids.
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cmath>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gflags/gflags.h>
+
+#include <executorch/extension/asr/runner/runner.h>
+#include <executorch/extension/llm/runner/util.h>
+#include <executorch/extension/llm/runner/wav_loader.h>
+#include <executorch/extension/module/module.h>
+#include <executorch/extension/tensor/tensor_ptr_maker.h>
+#include <executorch/runtime/core/evalue.h>
+#include <executorch/runtime/platform/log.h>
+
+DEFINE_string(model_path, "model.pte", "Path to Whisper model (.pte).");
+DEFINE_string(data_path, "", "Optional path to Whisper weights (.ptd).");
+DEFINE_string(
+    tokenizer_path,
+    ".",
+    "Path to tokenizer directory containing tokenizer.json, tokenizer_config.json, and special_tokens_map.json.");
+DEFINE_string(
+    processor_path,
+    "",
+    "Path to preprocessor .pte for converting raw audio.");
+DEFINE_string(
+    audio_path,
+    "",
+    "Path to input audio file. Accepts .wav or raw float .bin.");
+DEFINE_double(
+    temperature,
+    0.0,
+    "Sampling temperature. 0.0 performs greedy decoding.");
+DEFINE_int32(max_new_tokens, 128, "Maximum number of tokens to generate.");
+
+using ::executorch::extension::from_blob;
+using ::executorch::extension::Module;
+
+int main(int argc, char** argv) {
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+  ::executorch::extension::TensorPtr features;
+  std::vector<float> audio_data;
+  std::unique_ptr<Module> processor;
+
+  if (FLAGS_audio_path.empty()) {
+    ET_LOG(Error, "audio_path flag must be provided.");
+    return 1;
+  }
+
+  audio_data =
+      executorch::extension::llm::load_wav_audio_data(FLAGS_audio_path);
+  ET_LOG(
+      Info,
+      "First 2 values of audio data: %f, %f",
+      audio_data[0],
+      audio_data[1]);
+
+  processor =
+      std::make_unique<Module>(FLAGS_processor_path, Module::LoadMode::Mmap);
+  auto load_error = processor->load();
+  if (load_error != ::executorch::runtime::Error::Ok) {
+    ET_LOG(Error, "Failed to load preprocessor module.");
+    return 1;
+  }
+
+  auto audio_tensor = from_blob(
+      audio_data.data(),
+      {static_cast<::executorch::aten::SizesType>(audio_data.size())},
+      ::executorch::aten::ScalarType::Float);
+
+  auto processed_result = processor->execute("forward", audio_tensor);
+  if (processed_result.error() != ::executorch::runtime::Error::Ok) {
+    ET_LOG(Error, "Audio preprocessing failed.");
+    return 1;
+  }
+  auto outputs = std::move(processed_result.get());
+  if (outputs.empty() || !outputs[0].isTensor()) {
+    ET_LOG(Error, "Preprocessor returned unexpected outputs.");
+    return 1;
+  }
+  auto tensor = outputs[0].toTensor();
+  ET_LOG(
+      Info,
+      "Result scalar_type: %s, first value %f",
+      ::executorch::runtime::toString(tensor.scalar_type()),
+      tensor.mutable_data_ptr<float>()[0]);
+  features = std::make_shared<::executorch::aten::Tensor>(std::move(tensor));
+
+  executorch::extension::asr::AsrRunner runner(
+      FLAGS_model_path, FLAGS_data_path, FLAGS_tokenizer_path);
+  auto load_err = runner.load();
+  if (load_err != ::executorch::runtime::Error::Ok) {
+    ET_LOG(Error, "Failed to load Whisper model.");
+    return 1;
+  }
+
+  executorch::extension::asr::AsrTranscribeConfig config;
+  config.max_new_tokens = FLAGS_max_new_tokens;
+  config.temperature = static_cast<float>(FLAGS_temperature);
+  config.decoder_start_token_id = 50257;
+
+  auto result =
+      runner.transcribe(features, config, [&](const std::string& piece) {
+        ::executorch::extension::llm::safe_printf(piece.c_str());
+        fflush(stdout);
+      });
+
+  if (!result.ok()) {
+    ET_LOG(Error, "Transcription failed.");
+    return 1;
+  }
+
+  return 0;
+}
@@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# ASR runner for models like Whisper
+#
+# ### Editing this file ###
+#
+# This file should be formatted with
+# ~~~
+# cmake-format -i CMakeLists.txt
+# ~~~
+# It should also be cmake-lint clean.
+#
+
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
+endif()
+
+include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
+
+set(runner_deps executorch_core extension_module extension_tensor
+                tokenizers::tokenizers
+)
+
+# Define runner library
+add_library(extension_asr_runner STATIC runner.cpp)
+target_include_directories(
+  extension_asr_runner INTERFACE ${_common_include_directories}
+)
+target_link_libraries(extension_asr_runner PUBLIC ${runner_deps})
+set_target_properties(
+  extension_asr_runner PROPERTIES POSITION_INDEPENDENT_CODE ON
+)
+
+install(
+  TARGETS extension_asr_runner
+  EXPORT ExecuTorchTargets
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  INCLUDES
+  DESTINATION ${_common_include_directories}
+)
+
+install(
+  DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/extension/asr/runner
+  FILES_MATCHING
+  PATTERN "*.h"
+)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-467660923a5a25e4718e1d6697b93ff1bab4e807`
	`1`	`+4361747abfc55e40e929396ed986efe775d745f9`