Skip to content
Merged
4 changes: 2 additions & 2 deletions extension/llm/runner/multimodal_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ Error MultimodalRunner::load() {
Error MultimodalRunner::generate(
const std::vector<MultimodalInput>& inputs,
const GenerationConfig& config,
std::function<void(const std::string&)>& token_callback,
std::function<void(const Stats&)>& stats_callback) {
std::function<void(const std::string&)> token_callback,
std::function<void(const Stats&)> stats_callback) {
if (inputs.empty()) {
ET_LOG(Error, "MultimodalInput vector cannot be empty");
return Error::InvalidArgument;
Expand Down
4 changes: 2 additions & 2 deletions extension/llm/runner/multimodal_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ class ET_EXPERIMENTAL MultimodalRunner {
virtual ::executorch::runtime::Error generate(
const std::vector<MultimodalInput>& inputs,
const GenerationConfig& config,
std::function<void(const std::string&)>& token_callback,
std::function<void(const Stats&)>& stats_callback);
std::function<void(const std::string&)> token_callback = {},
std::function<void(const Stats&)> stats_callback = {});

inline void stop() {
text_token_generator_->stop();
Expand Down
6 changes: 3 additions & 3 deletions extension/llm/runner/text_token_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ class ET_EXPERIMENTAL TextTokenGenerator {

/**
* Token generation loop.
* @param tokens prompt tokens as well as the first token generated by
* prefill.
* @param start_pos the start position of the new tokens, based on how many
* @param tokens The first token generated by prefill, if using kv cache. Else
* the prompt tokens + the first token generated by prefill.
* @param start_pos The start position of the new tokens, based on how many
* prompt tokens is prefilled.
* @param max_new_tokens Maximum number of new tokens to generate.
* @param temperature controls the randomness of predictions by scaling the
Expand Down
Loading