Skip to content

Commit cdeb9b8

Browse files
committed
Try again
1 parent a2d6bfe commit cdeb9b8

File tree

4 files changed

+25
-17
lines changed

4 files changed

+25
-17
lines changed

examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/MainActivity.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -778,8 +778,12 @@ public void run() {
778778
mCurrentSettingsFields.getModelType(),
779779
mCurrentSettingsFields.getBackendType())
780780
== ModelUtils.VISION_MODEL) {
781-
mModule.generate(
782-
finalPrompt, ModelUtils.VISION_MODEL_SEQ_LEN, MainActivity.this, false);
781+
mModule.generateFromPos(
782+
finalPrompt,
783+
ModelUtils.VISION_MODEL_SEQ_LEN,
784+
startPos,
785+
MainActivity.this,
786+
false);
783787
} else if (mCurrentSettingsFields.getModelType() == ModelType.LLAMA_GUARD_3) {
784788
String llamaGuardPromptForClassification =
785789
PromptFormat.getFormattedLlamaGuardPrompt(rawPrompt);

extension/llm/runner/irunner.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,19 +128,17 @@ class ET_EXPERIMENTAL IRunner {
128128
* Generate text based on the provided prompt and generation config, from a
129129
* given position in KV cache.
130130
*
131-
* Deprecated method. Please use generate() instead. It manages the pos
132-
* automatically.
133-
*
134131
* @param prompt The input prompt to generate from
135-
* @param start_pos The starting position in KV cache of the input
132+
* @param start_pos [Unused] The starting position in KV cache of the input,
133+
* ignored because the runner manages the position internally.
136134
* @param config Generation configuration parameters
137135
* @param token_callback Callback function called for each generated token
138136
* @param stats_callback Callback function for generation statistics
139137
* @return Error::Ok if successful, an error otherwise
140138
*/
141-
ET_DEPRECATED virtual runtime::Error generate_from_pos(
139+
virtual runtime::Error generate_from_pos(
142140
const std::string& prompt,
143-
int64_t start_pos,
141+
ET_UNUSED int64_t start_pos,
144142
const GenerationConfig& config,
145143
std::function<void(const std::string&)> token_callback,
146144
std::function<void(const Stats&)> stats_callback) = 0;

extension/llm/runner/text_llm_runner.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ Error TextLLMRunner::load() {
7070
ET_LOG(Info, format, __VA_ARGS__); \
7171
}
7272

73-
Error TextLLMRunner::generate(
73+
Error TextLLMRunner::generate_from_pos(
7474
const std::string& prompt,
75+
ET_UNUSED int64_t start_pos,
7576
const GenerationConfig& config,
7677
std::function<void(const std::string&)> token_callback,
7778
std::function<void(const Stats&)> stats_callback) {
@@ -216,6 +217,15 @@ Error TextLLMRunner::generate(
216217
return Error::Ok;
217218
}
218219

220+
Error TextLLMRunner::generate(
221+
const std::string& prompt,
222+
const GenerationConfig& config,
223+
std::function<void(const std::string&)> token_callback,
224+
std::function<void(const Stats&)> stats_callback) {
225+
reset();
226+
return generate_from_pos(prompt, 0, config, token_callback, stats_callback);
227+
}
228+
219229
Error TextLLMRunner::warmup(const std::string& prompt, int32_t max_new_tokens) {
220230
// Create a GenerationConfig for warmup
221231
GenerationConfig config{

extension/llm/runner/text_llm_runner.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,24 +105,20 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner {
105105
* Generate text based on the provided prompt and generation config, from a
106106
* given position in KV cache.
107107
*
108-
* Deprecated method. Please use generate() instead. It manages the pos
109-
* automatically.
110-
*
111108
* @param prompt The input prompt to generate from
112-
* @param start_pos The starting position in KV cache of the input
109+
* @param start_pos [Unused] The starting position in KV cache of the input,
110+
* ignored because the runner manages the position internally.
113111
* @param config Generation configuration parameters
114112
* @param token_callback Callback function called for each generated token
115113
* @param stats_callback Callback function for generation statistics
116114
* @return Error::Ok if successful, an error otherwise
117115
*/
118-
ET_DEPRECATED virtual runtime::Error generate_from_pos(
116+
ET_DEPRECATED runtime::Error generate_from_pos(
119117
const std::string& prompt,
120118
ET_UNUSED int64_t start_pos,
121119
const GenerationConfig& config,
122120
std::function<void(const std::string&)> token_callback = {},
123-
std::function<void(const Stats&)> stats_callback = {}) override {
124-
return generate(prompt, config, token_callback, stats_callback);
125-
}
121+
std::function<void(const Stats&)> stats_callback = {}) override;
126122

127123
/**
128124
* @brief Warms up the model with a sample prompt

0 commit comments

Comments
 (0)