Skip to content

Commit a77c8df

Browse files
authored
Remove generate_from_pos since there's no user of it (#14277)
As titled, rely on CI jobs
1 parent 1454743 commit a77c8df

File tree

7 files changed

+14
-72
lines changed

7 files changed

+14
-72
lines changed

examples/mediatek/executor_runner/mtk_llama_runner.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ class MTKLlamaRunner : public executorch::extension::llm::IRunner {
6666
std::function<void(const std::string&)> token_callback);
6767
std::unique_ptr<Tokenizer> load_tokenizer();
6868

69+
void reset() {}
70+
6971
private:
7072
// model
7173
const LlamaModelOptions modeloptions_;

examples/models/llama/main.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ int32_t main(int32_t argc, char** argv) {
105105
ET_LOG(Error, "Failed to warmup llama runner");
106106
return 1;
107107
}
108+
// reset kv cache pos to 0
109+
runner->reset();
108110
}
109111
// generate
110112
executorch::extension::llm::GenerationConfig config{

examples/qualcomm/oss_scripts/llama/runner/runner.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -354,17 +354,6 @@ Error Runner<T>::generate(
354354
const llm::GenerationConfig& config,
355355
std::function<void(const std::string&)> token_callback,
356356
std::function<void(const Stats&)> stats_callback) {
357-
return generate_from_pos(prompt, 0, config, token_callback, stats_callback);
358-
}
359-
360-
template <typename T>
361-
Error Runner<T>::generate_from_pos(
362-
const std::string& prompt,
363-
int64_t start_pos,
364-
const llm::GenerationConfig& config,
365-
std::function<void(const std::string&)> token_callback,
366-
std::function<void(const Stats&)> stats_callback) {
367-
// TODO: currently only support start_pos == 0
368357
return generate_from_prompt_or_file(
369358
prompt, false, config, token_callback, stats_callback);
370359
}
@@ -435,7 +424,8 @@ Error Runner<T>::generate_from_prompt_or_file(
435424
stats_.first_token_ms = time_in_ms();
436425
stats_.prompt_eval_end_ms = time_in_ms();
437426

438-
// print the first token from prefill. No prev_token so use cur_token for it.
427+
// print the first token from prefill. No prev_token so use cur_token for
428+
// it.
439429
if (token_callback) {
440430
token_callback(
441431
ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));

examples/qualcomm/oss_scripts/llama/runner/runner.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,20 +72,15 @@ class Runner : public executorch::extension::llm::IRunner {
7272
std::function<void(const std::string&)> token_callback = {},
7373
std::function<void(const executorch::llm::Stats&)> stats_callback = {})
7474
override;
75-
executorch::runtime::Error generate_from_pos(
76-
const std::string& prompt,
77-
int64_t start_pos,
78-
const executorch::extension::llm::GenerationConfig& config,
79-
std::function<void(const std::string&)> token_callback = {},
80-
std::function<void(const executorch::llm::Stats&)> stats_callback = {})
81-
override;
75+
8276
executorch::runtime::Error generate_from_prompt_or_file(
8377
const std::string& prompt,
8478
bool tokenized_prompt,
8579
const executorch::extension::llm::GenerationConfig& config,
8680
std::function<void(const std::string&)> token_callback = {},
8781
std::function<void(const executorch::llm::Stats&)> stats_callback = {});
8882
void stop() override {};
83+
void reset() override {};
8984
executorch::runtime::Result<DecoderModelVersion> get_decoder_model_version();
9085

9186
private:

extension/llm/runner/irunner.h

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -125,39 +125,18 @@ class ET_EXPERIMENTAL IRunner {
125125
std::function<void(const std::string&)> token_callback,
126126
std::function<void(const Stats&)> stats_callback) = 0;
127127

128-
/**
129-
* Generate text based on the provided prompt and generation config, from a
130-
* given position in KV cache.
131-
*
132-
* @param prompt The input prompt to generate from
133-
* @param start_pos The starting position in KV cache of the input. Note:
134-
* Depending on the actual implementation, a runner may manage the position
135-
* internally, and this may not be respected.
136-
* @param config Generation configuration parameters
137-
* @param token_callback Callback function called for each generated token
138-
* @param stats_callback Callback function for generation statistics
139-
* @return Error::Ok if successful, an error otherwise
140-
*/
141-
virtual runtime::Error generate_from_pos(
142-
const std::string& prompt,
143-
int64_t start_pos,
144-
const GenerationConfig& config,
145-
std::function<void(const std::string&)> token_callback,
146-
std::function<void(const Stats&)> stats_callback) = 0;
147128
/**
148129
* Stop the generation process.
149130
*/
150131
virtual void stop() = 0;
132+
151133
/**
152134
* Force remove prefilled tokens and reset KV cache start position
153135
*
154-
* For some existing runners, overriding this method is not needed because
155-
* start_pos is passed as an argument to generate_from_pos.
156-
*
157136
* This method removes the prefilled tokens from the KV cache and resets the
158137
* start position to 0.
159138
*/
160-
virtual void reset() {};
139+
virtual void reset() = 0;
161140
};
162141

163142
} // namespace llm

extension/llm/runner/text_llm_runner.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ TextLLMRunner::TextLLMRunner(
4343
io_manager_(std::move(io_manager)),
4444
text_token_generator_(std::move(text_token_generator)),
4545
stats_(std::move(stats)),
46+
pos_(0),
4647
temperature_(temperature) {
4748
// Note: This constructor assumes that text_prefiller and text_token_generator
4849
// already have references to the Module and TextDecoderRunner they need
@@ -70,9 +71,8 @@ Error TextLLMRunner::load() {
7071
ET_LOG(Info, format, __VA_ARGS__); \
7172
}
7273

73-
Error TextLLMRunner::generate_from_pos(
74+
Error TextLLMRunner::generate(
7475
const std::string& prompt,
75-
ET_UNUSED int64_t start_pos,
7676
const GenerationConfig& config,
7777
std::function<void(const std::string&)> token_callback,
7878
std::function<void(const Stats&)> stats_callback) {
@@ -217,15 +217,6 @@ Error TextLLMRunner::generate_from_pos(
217217
return Error::Ok;
218218
}
219219

220-
Error TextLLMRunner::generate(
221-
const std::string& prompt,
222-
const GenerationConfig& config,
223-
std::function<void(const std::string&)> token_callback,
224-
std::function<void(const Stats&)> stats_callback) {
225-
pos_ = 0;
226-
return generate_from_pos(prompt, 0, config, token_callback, stats_callback);
227-
}
228-
229220
Error TextLLMRunner::warmup(const std::string& prompt, int32_t max_new_tokens) {
230221
// Create a GenerationConfig for warmup
231222
GenerationConfig config{

extension/llm/runner/text_llm_runner.h

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -101,25 +101,6 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner {
101101
std::function<void(const std::string&)> token_callback = {},
102102
std::function<void(const Stats&)> stats_callback = {}) override;
103103

104-
/**
105-
* Generate text based on the provided prompt and generation config, from a
106-
* given position in KV cache.
107-
*
108-
* @param prompt The input prompt to generate from
109-
* @param start_pos [Unused] The starting position in KV cache of the input,
110-
* ignored because the runner manages the position internally.
111-
* @param config Generation configuration parameters
112-
* @param token_callback Callback function called for each generated token
113-
* @param stats_callback Callback function for generation statistics
114-
* @return Error::Ok if successful, an error otherwise
115-
*/
116-
ET_DEPRECATED runtime::Error generate_from_pos(
117-
const std::string& prompt,
118-
ET_UNUSED int64_t start_pos,
119-
const GenerationConfig& config,
120-
std::function<void(const std::string&)> token_callback = {},
121-
std::function<void(const Stats&)> stats_callback = {}) override;
122-
123104
/**
124105
* @brief Warms up the model with a sample prompt
125106
*
@@ -133,13 +114,15 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner {
133114
::executorch::runtime::Error warmup(
134115
const std::string& prompt,
135116
int32_t max_new_tokens);
117+
136118
/**
137119
* @brief Remove prefilled tokens and reset start position, and stats.
138120
*
139121
* This method removes the prefilled tokens from the KV cache and resets the
140122
* start position to 0. It also clears the stats for previous runs.
141123
*/
142124
void reset() override;
125+
143126
/**
144127
* @brief Stops the ongoing text generation process
145128
*

0 commit comments

Comments
 (0)