Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions extension/llm/runner/multimodal_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,6 @@ Error MultimodalRunner::load() {
ET_LOG(Info, format, __VA_ARGS__); \
}

Error MultimodalRunner::prefill(std::vector<MultimodalInput>&& inputs) {
// Forward to the const reference version
return prefill(inputs);
}

Error MultimodalRunner::prefill(const std::vector<MultimodalInput>& inputs) {
if (!is_loaded()) {
ET_CHECK_OK_OR_RETURN_ERROR(load());
Expand All @@ -77,16 +72,6 @@ Error MultimodalRunner::prefill(const std::vector<MultimodalInput>& inputs) {
return Error::Ok;
}

Error MultimodalRunner::generate(
std::vector<MultimodalInput>&& inputs,
const GenerationConfig& config,
std::function<void(const std::string&)> token_callback,
std::function<void(const Stats&)> stats_callback) {
// Forward to the const reference version
return generate(
inputs, config, std::move(token_callback), std::move(stats_callback));
}

Error MultimodalRunner::generate(
const std::vector<MultimodalInput>& inputs,
const GenerationConfig& config,
Expand Down
24 changes: 0 additions & 24 deletions extension/llm/runner/multimodal_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,21 +119,6 @@ class ET_EXPERIMENTAL MultimodalRunner {
std::function<void(const std::string&)> token_callback = {},
std::function<void(const Stats&)> stats_callback = {});

/**
* Generate tokens from multimodal inputs with move semantics.
* This overload allows efficient transfer of temporary vectors.
* @param inputs A vector of MultimodalInput objects (moved).
* @param config Generation configuration parameters.
* @param token_callback Callback function called for each generated token.
* @param stats_callback Callback function for generation statistics.
* @return The error code. KV cache position is tracked internally in pos_.
*/
virtual ::executorch::runtime::Error generate(
std::vector<MultimodalInput>&& inputs,
const GenerationConfig& config,
std::function<void(const std::string&)> token_callback = {},
std::function<void(const Stats&)> stats_callback = {});

/**
* Prefill multimodal inputs, for example to reload chat history.
* @param inputs A vector of MultimodalInput objects containing images and
Expand All @@ -143,15 +128,6 @@ class ET_EXPERIMENTAL MultimodalRunner {
virtual ::executorch::runtime::Error prefill(
const std::vector<MultimodalInput>& inputs);

/**
* Prefill multimodal inputs with move semantics.
* This overload allows efficient transfer of temporary vectors.
* @param inputs A vector of MultimodalInput objects (moved).
* @return The error code. KV cache position is tracked internally in pos_.
*/
virtual ::executorch::runtime::Error prefill(
std::vector<MultimodalInput>&& inputs);

inline void stop() {
text_token_generator_->stop();
}
Expand Down
Loading