Skip to content

Commit ca27eee

Browse files
committed
Update
1 parent 7215b88 commit ca27eee

File tree

8 files changed

+890
-26
lines changed

8 files changed

+890
-26
lines changed

backends/cuda/cuda_partitioner.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
PartitionResult,
1717
)
1818
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
19+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
1920
from torch.export.exported_program import ExportedProgram
2021

2122

@@ -56,6 +57,18 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
5657
tag_constant_data(exported_program)
5758
tag_mutated_buffer(exported_program)
5859

60+
# Tag constant placeholders that have no users
61+
# tag_constant_data only tags constants that have users with delegation_tag
62+
# but we need to tag all constants for this partition
63+
for node in exported_program.graph.nodes:
64+
if node.op == "placeholder" and (
65+
is_param(exported_program, node)
66+
or is_buffer(exported_program, node)
67+
or is_lifted_tensor_constant(exported_program, node)
68+
):
69+
if "delegation_tag" not in node.meta:
70+
node.meta["delegation_tag"] = tag
71+
5972
return PartitionResult(
6073
tagged_exported_program=exported_program, partition_tags=partition_tags
6174
)
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
cmake_minimum_required(VERSION 3.24)
8+
project(whisper_runner)
9+
10+
set(CMAKE_CXX_STANDARD 17)
11+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
12+
13+
set(EXECUTORCH_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
14+
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
15+
16+
# Let files say "include <executorch/path/to/header.h>"
17+
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
18+
19+
# Need this for gflags for some reason
20+
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
21+
find_package(gflags REQUIRED)
22+
23+
list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
24+
find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
25+
executorch_target_link_options_shared_lib(executorch)
26+
27+
set(link_libraries executorch gflags)
28+
set(_srcs multimodal.cpp)
29+
30+
list(
31+
APPEND
32+
link_libraries
33+
optimized_native_cpu_ops_lib
34+
quantized_ops_lib
35+
custom_ops
36+
cpublas
37+
eigen_blas
38+
)
39+
executorch_target_link_options_shared_lib(optimized_native_cpu_ops_lib)
40+
executorch_target_link_options_shared_lib(quantized_ops_lib)
41+
executorch_target_link_options_shared_lib(custom_ops)
42+
43+
# XNNPACK
44+
if(TARGET xnnpack_backend)
45+
set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
46+
if(TARGET kleidiai)
47+
list(APPEND xnnpack_backend_libs kleidiai)
48+
endif()
49+
list(APPEND link_libraries ${xnnpack_backend_libs})
50+
executorch_target_link_options_shared_lib(xnnpack_backend)
51+
endif()
52+
53+
# Add LLM runner and extension module
54+
if(NOT TARGET extension_llm_runner)
55+
message(
56+
FATAL_ERROR
57+
"ExecuTorch must be installed with EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER enabled."
58+
)
59+
endif()
60+
61+
# Needed for cpuinfo where it uses android specific log lib
62+
if(ANDROID)
63+
list(APPEND link_libraries log)
64+
endif()
65+
66+
# Add the required ExecuTorch extensions for multimodal LLM runner
67+
list(
68+
APPEND
69+
link_libraries
70+
extension_llm_runner
71+
extension_module
72+
extension_data_loader
73+
extension_tensor
74+
extension_flat_tensor
75+
)
76+
77+
# Link CUDA backend
78+
if(EXECUTORCH_BUILD_CUDA)
79+
find_package(CUDAToolkit REQUIRED)
80+
list(APPEND link_libraries aoti_cuda)
81+
executorch_target_link_options_shared_lib(aoti_cuda)
82+
endif()
83+
84+
if(EXECUTORCH_BUILD_METAL)
85+
list(APPEND link_libraries metal_backend)
86+
executorch_target_link_options_shared_lib(metal_backend)
87+
endif()
88+
89+
# Add tokenizers
90+
list(APPEND link_libraries tokenizers::tokenizers)
91+
92+
add_executable(whisper_runner runner.cpp main.cpp)
93+
94+
target_include_directories(whisper_runner PUBLIC ${_common_include_directories})
95+
96+
target_link_libraries(
97+
whisper_runner
98+
PUBLIC
99+
${link_libraries}
100+
)
101+
target_compile_options(whisper_runner PUBLIC ${_common_compile_options})

examples/models/whisper/README.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Whisper Runner
2+
3+
This directory hosts a lightweight C++ helper that drives Whisper models
4+
exported to ExecuTorch. The `WhisperRunner` owns the `Module` instance that
5+
wraps a bundled `.pte` program and optional `.ptd` weight file, loads the
6+
`encoder` and `text_decoder` methods, and exposes a `transcribe()` loop that
7+
streams decoded text pieces through a callback.
8+
9+
The runner assumes:
10+
- `model.pte` contains both Whisper encoder and decoder entry points named
11+
`encoder` and `text_decoder`.
12+
- External parameters (for example KV cache blocks) are stored in a companion
13+
`model.ptd`.
14+
- A tokenizer JSON compatible with the ExecuTorch tokenizers shim is available.
15+
16+
Audio preprocessing is not part of the runner itself. To transform raw audio
17+
into the mel features expected by the encoder, reuse the pattern in
18+
`examples/models/voxtral/multimodal.cpp`, which loads a `preprocessor.pte`
19+
module to generate the spectrogram tensor.
20+
21+
## Build
22+
23+
```bash
24+
cmake -G Ninja \
25+
-B cmake-out/examples/models/whisper \
26+
-S examples/models/whisper
27+
cmake --build cmake-out/examples/models/whisper -j
28+
```
29+
30+
The build produces a static library named `whisper_runner`. Link it into your
31+
application together with the standard ExecuTorch runtime libraries and the
32+
tokenizer target (`tokenizers::tokenizers`).
33+
34+
## Usage
35+
36+
```cpp
37+
#include <executorch/examples/models/whisper/runner.h>
38+
#include <executorch/extension/tensor/tensor_ptr.h>
39+
40+
using example::WhisperRunner;
41+
using example::WhisperTranscribeConfig;
42+
43+
WhisperRunner runner("model.pte", "model.ptd", "tokenizer.json");
44+
ET_CHECK_OK(runner.load());
45+
46+
// `features` is the mel spectrogram tensor produced by the preprocessor.
47+
executorch::aten::Tensor features = load_features_somehow();
48+
49+
WhisperTranscribeConfig config;
50+
config.max_new_tokens = 128; // stop after 128 generated tokens
51+
config.temperature = 0.7f; // optional: enable stochastic sampling
52+
53+
auto tokens_result = runner.transcribe(
54+
features,
55+
config,
56+
[](const std::string& piece) {
57+
std::cout << piece;
58+
});
59+
60+
if (!tokens_result.ok()) {
61+
ET_LOG(Error, "Transcription failed: %d", static_cast<int>(tokens_result.error()));
62+
}
63+
```
64+
65+
`transcribe()` returns the full token history (prompt + generated tokens) and
66+
invokes the callback every time a new token is emitted. Provide a non-empty
67+
`decoder_input_ids` vector if you want to seed the decoder with a custom prompt,
68+
and override `WhisperTranscribeConfig::eos_token_ids` when the model exposes
69+
custom termination ids.

examples/models/whisper/main.cpp

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <fstream>
10+
#include <memory>
11+
#include <string>
12+
#include <vector>
13+
14+
#include <gflags/gflags.h>
15+
16+
#include <executorch/examples/models/whisper/runner.h>
17+
#include <executorch/extension/llm/runner/util.h>
18+
#include <executorch/extension/llm/runner/wav_loader.h>
19+
#include <executorch/extension/module/module.h>
20+
#include <executorch/extension/tensor/tensor_ptr_maker.h>
21+
#include <executorch/runtime/core/evalue.h>
22+
#include <executorch/runtime/platform/log.h>
23+
24+
DEFINE_string(model_path, "model.pte", "Path to Whisper model (.pte).");
25+
DEFINE_string(data_path, "", "Optional path to Whisper weights (.ptd).");
26+
DEFINE_string(
27+
tokenizer_path,
28+
".",
29+
"Path to tokenizer directory containing tokenizer.json, tokenizer_config.json, and special_tokens_map.json.");
30+
DEFINE_string(
31+
preprocessor_path,
32+
"",
33+
"Path to preprocessor .pte for converting raw audio.");
34+
DEFINE_string(
35+
audio_path,
36+
"",
37+
"Path to input audio file. Accepts .wav or raw float .bin.");
38+
DEFINE_string(
39+
preprocessed_audio_path,
40+
"",
41+
"Path to preprocessed audio features file (.bin). If provided, skips preprocessing.");
42+
DEFINE_double(
43+
temperature,
44+
0.0,
45+
"Sampling temperature. 0.0 performs greedy decoding.");
46+
DEFINE_int32(max_new_tokens, 128, "Maximum number of tokens to generate.");
47+
48+
namespace {
49+
50+
using ::executorch::extension::from_blob;
51+
using ::executorch::extension::Module;
52+
53+
bool ends_with(const std::string& value, const std::string& suffix) {
54+
return value.size() >= suffix.size() &&
55+
value.compare(value.size() - suffix.size(), suffix.size(), suffix) == 0;
56+
}
57+
58+
std::vector<float> load_preprocessed_audio(
59+
const std::string& preprocessed_audio_path) {
60+
ET_LOG(
61+
Info,
62+
"Loading preprocessed audio from: %s",
63+
preprocessed_audio_path.c_str());
64+
65+
std::ifstream stream(
66+
preprocessed_audio_path, std::ios::binary | std::ios::ate);
67+
if (!stream.is_open()) {
68+
ET_LOG(
69+
Error,
70+
"Failed to open preprocessed audio file: %s",
71+
preprocessed_audio_path.c_str());
72+
throw std::runtime_error("Failed to open preprocessed audio file");
73+
}
74+
75+
std::size_t byte_size = static_cast<std::size_t>(stream.tellg());
76+
stream.seekg(0, std::ios::beg);
77+
78+
const int64_t batch_size = 1;
79+
const int64_t feature_dim = 128;
80+
const int64_t time_steps = 3000;
81+
const int64_t expected_elements = batch_size * feature_dim * time_steps;
82+
const std::size_t expected_bytes = expected_elements * sizeof(float);
83+
84+
if (byte_size != expected_bytes) {
85+
ET_LOG(
86+
Error,
87+
"Preprocessed audio file size mismatch. Expected %zu bytes, got %zu bytes",
88+
expected_bytes,
89+
byte_size);
90+
throw std::runtime_error("Preprocessed audio file size mismatch");
91+
}
92+
93+
std::vector<float> feature_data(expected_elements);
94+
stream.read(reinterpret_cast<char*>(feature_data.data()), byte_size);
95+
stream.close();
96+
97+
return feature_data;
98+
}
99+
100+
} // namespace
101+
102+
int main(int argc, char** argv) {
103+
gflags::ParseCommandLineFlags(&argc, &argv, true);
104+
105+
::executorch::extension::TensorPtr features;
106+
std::vector<float> audio_data;
107+
std::unique_ptr<Module> processor;
108+
109+
if (!FLAGS_preprocessed_audio_path.empty()) {
110+
audio_data = load_preprocessed_audio(FLAGS_preprocessed_audio_path);
111+
112+
const int64_t batch_size = 1;
113+
const int64_t feature_dim = 128;
114+
const int64_t time_steps = 3000;
115+
features = from_blob(
116+
audio_data.data(),
117+
/*sizes=*/{batch_size, feature_dim, time_steps},
118+
/*strides=*/{feature_dim * time_steps, feature_dim, 1},
119+
::executorch::aten::ScalarType::Float);
120+
} else {
121+
// Original preprocessing path
122+
if (FLAGS_audio_path.empty()) {
123+
ET_LOG(
124+
Error,
125+
"Either audio_path or preprocessed_audio_path flag must be provided.");
126+
return 1;
127+
}
128+
129+
audio_data =
130+
executorch::extension::llm::load_wav_audio_data(FLAGS_audio_path);
131+
ET_LOG(
132+
Info,
133+
"First 2 values of audio data: %f, %f",
134+
audio_data[0],
135+
audio_data[1]);
136+
// Preprocess audio
137+
processor = std::make_unique<Module>(
138+
FLAGS_preprocessor_path, Module::LoadMode::Mmap);
139+
auto load_error = processor->load();
140+
if (load_error != ::executorch::runtime::Error::Ok) {
141+
ET_LOG(Error, "Failed to load preprocessor module.");
142+
return 1;
143+
}
144+
145+
auto audio_tensor = from_blob(
146+
audio_data.data(),
147+
{static_cast<::executorch::aten::SizesType>(audio_data.size())},
148+
::executorch::aten::ScalarType::Float);
149+
150+
auto processed_result = processor->execute("forward", audio_tensor);
151+
if (processed_result.error() != ::executorch::runtime::Error::Ok) {
152+
ET_LOG(Error, "Audio preprocessing failed.");
153+
return 1;
154+
}
155+
auto outputs = std::move(processed_result.get());
156+
if (outputs.empty() || !outputs[0].isTensor()) {
157+
ET_LOG(Error, "Preprocessor returned unexpected outputs.");
158+
return 1;
159+
}
160+
auto tensor = outputs[0].toTensor();
161+
ET_LOG(
162+
Info,
163+
"Result scalar_type: %s, first value %f",
164+
::executorch::runtime::toString(tensor.scalar_type()),
165+
tensor.mutable_data_ptr<float>()[0]);
166+
features = std::make_shared<::executorch::aten::Tensor>(std::move(tensor));
167+
}
168+
169+
example::WhisperRunner runner(
170+
FLAGS_model_path, FLAGS_data_path, FLAGS_tokenizer_path);
171+
auto load_err = runner.load();
172+
if (load_err != ::executorch::runtime::Error::Ok) {
173+
ET_LOG(Error, "Failed to load Whisper model.");
174+
return 1;
175+
}
176+
177+
example::WhisperTranscribeConfig config;
178+
config.max_new_tokens = FLAGS_max_new_tokens;
179+
config.temperature = static_cast<float>(FLAGS_temperature);
180+
181+
std::string transcript;
182+
auto result =
183+
runner.transcribe(features, config, [&](const std::string& piece) {
184+
::executorch::extension::llm::safe_printf(piece.c_str());
185+
fflush(stdout);
186+
});
187+
188+
if (!result.ok()) {
189+
ET_LOG(Error, "Transcription failed.");
190+
return 1;
191+
}
192+
193+
return 0;
194+
}

0 commit comments

Comments
 (0)