Skip to content

Commit f10dfef

Browse files
committed
Remove generate_from_pos since there's no user of it
As titled
1 parent f1ca55a commit f10dfef

File tree

4 files changed

+8
-53
lines changed

4 files changed

+8
-53
lines changed

examples/mediatek/executor_runner/mtk_llama_runner.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ class MTKLlamaRunner : public executorch::extension::llm::IRunner {
6666
std::function<void(const std::string&)> token_callback);
6767
std::unique_ptr<Tokenizer> load_tokenizer();
6868

69+
void reset() {}
70+
6971
private:
7072
// model
7173
const LlamaModelOptions modeloptions_;

extension/llm/runner/irunner.h

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -125,39 +125,18 @@ class ET_EXPERIMENTAL IRunner {
125125
std::function<void(const std::string&)> token_callback,
126126
std::function<void(const Stats&)> stats_callback) = 0;
127127

128-
/**
129-
* Generate text based on the provided prompt and generation config, from a
130-
* given position in KV cache.
131-
*
132-
* @param prompt The input prompt to generate from
133-
* @param start_pos The starting position in KV cache of the input. Note:
134-
* Depending on the actual implementation, a runner may manage the position
135-
* internally, and this may not be respected.
136-
* @param config Generation configuration parameters
137-
* @param token_callback Callback function called for each generated token
138-
* @param stats_callback Callback function for generation statistics
139-
* @return Error::Ok if successful, an error otherwise
140-
*/
141-
virtual runtime::Error generate_from_pos(
142-
const std::string& prompt,
143-
int64_t start_pos,
144-
const GenerationConfig& config,
145-
std::function<void(const std::string&)> token_callback,
146-
std::function<void(const Stats&)> stats_callback) = 0;
147128
/**
148129
* Stop the generation process.
149130
*/
150131
virtual void stop() = 0;
132+
151133
/**
152134
* Force remove prefilled tokens and reset KV cache start position
153135
*
154-
* For some existing runners, overriding this method is not needed because
155-
* start_pos is passed as an argument to generate_from_pos.
156-
*
157136
* This method removes the prefilled tokens from the KV cache and resets the
158137
* start position to 0.
159138
*/
160-
virtual void reset() {};
139+
virtual void reset() = 0;
161140
};
162141

163142
} // namespace llm

extension/llm/runner/text_llm_runner.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ TextLLMRunner::TextLLMRunner(
4343
io_manager_(std::move(io_manager)),
4444
text_token_generator_(std::move(text_token_generator)),
4545
stats_(std::move(stats)),
46+
pos_(0),
4647
temperature_(temperature) {
4748
// Note: This constructor assumes that text_prefiller and text_token_generator
4849
// already have references to the Module and TextDecoderRunner they need
@@ -70,9 +71,8 @@ Error TextLLMRunner::load() {
7071
ET_LOG(Info, format, __VA_ARGS__); \
7172
}
7273

73-
Error TextLLMRunner::generate_from_pos(
74+
Error TextLLMRunner::generate(
7475
const std::string& prompt,
75-
ET_UNUSED int64_t start_pos,
7676
const GenerationConfig& config,
7777
std::function<void(const std::string&)> token_callback,
7878
std::function<void(const Stats&)> stats_callback) {
@@ -217,15 +217,6 @@ Error TextLLMRunner::generate_from_pos(
217217
return Error::Ok;
218218
}
219219

220-
Error TextLLMRunner::generate(
221-
const std::string& prompt,
222-
const GenerationConfig& config,
223-
std::function<void(const std::string&)> token_callback,
224-
std::function<void(const Stats&)> stats_callback) {
225-
pos_ = 0;
226-
return generate_from_pos(prompt, 0, config, token_callback, stats_callback);
227-
}
228-
229220
Error TextLLMRunner::warmup(const std::string& prompt, int32_t max_new_tokens) {
230221
// Create a GenerationConfig for warmup
231222
GenerationConfig config{

extension/llm/runner/text_llm_runner.h

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -101,25 +101,6 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner {
101101
std::function<void(const std::string&)> token_callback = {},
102102
std::function<void(const Stats&)> stats_callback = {}) override;
103103

104-
/**
105-
* Generate text based on the provided prompt and generation config, from a
106-
* given position in KV cache.
107-
*
108-
* @param prompt The input prompt to generate from
109-
* @param start_pos [Unused] The starting position in KV cache of the input,
110-
* ignored because the runner manages the position internally.
111-
* @param config Generation configuration parameters
112-
* @param token_callback Callback function called for each generated token
113-
* @param stats_callback Callback function for generation statistics
114-
* @return Error::Ok if successful, an error otherwise
115-
*/
116-
ET_DEPRECATED runtime::Error generate_from_pos(
117-
const std::string& prompt,
118-
ET_UNUSED int64_t start_pos,
119-
const GenerationConfig& config,
120-
std::function<void(const std::string&)> token_callback = {},
121-
std::function<void(const Stats&)> stats_callback = {}) override;
122-
123104
/**
124105
* @brief Warms up the model with a sample prompt
125106
*
@@ -133,13 +114,15 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner {
133114
::executorch::runtime::Error warmup(
134115
const std::string& prompt,
135116
int32_t max_new_tokens);
117+
136118
/**
137119
* @brief Remove prefilled tokens and reset start position, and stats.
138120
*
139121
* This method removes the prefilled tokens from the KV cache and resets the
140122
* start position to 0. It also clears the stats for previous runs.
141123
*/
142124
void reset() override;
125+
143126
/**
144127
* @brief Stops the ongoing text generation process
145128
*

0 commit comments

Comments
 (0)