Try again

kirklandsign · kirklandsign · commit cdeb9b8044ba · 2025-08-28T16:06:53.000-07:00
diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/MainActivity.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/MainActivity.java
@@ -778,8 +778,12 @@ public void run() {
                           mCurrentSettingsFields.getModelType(),
                           mCurrentSettingsFields.getBackendType())
                       == ModelUtils.VISION_MODEL) {
-                    mModule.generate(
-                        finalPrompt, ModelUtils.VISION_MODEL_SEQ_LEN, MainActivity.this, false);
+                    mModule.generateFromPos(
+                        finalPrompt,
+                        ModelUtils.VISION_MODEL_SEQ_LEN,
+                        startPos,
+                        MainActivity.this,
+                        false);
                   } else if (mCurrentSettingsFields.getModelType() == ModelType.LLAMA_GUARD_3) {
                     String llamaGuardPromptForClassification =
                         PromptFormat.getFormattedLlamaGuardPrompt(rawPrompt);
diff --git a/extension/llm/runner/irunner.h b/extension/llm/runner/irunner.h
@@ -128,19 +128,17 @@ class ET_EXPERIMENTAL IRunner {
    * Generate text based on the provided prompt and generation config, from a
    * given position in KV cache.
    *
-   * Deprecated method. Please use generate() instead. It manages the pos
-   * automatically.
-   *
    * @param prompt The input prompt to generate from
-   * @param start_pos The starting position in KV cache of the input
+   * @param start_pos [Unused] The starting position in KV cache of the input,
+   * ignored because the runner manages the position internally.
    * @param config Generation configuration parameters
    * @param token_callback Callback function called for each generated token
    * @param stats_callback Callback function for generation statistics
    * @return Error::Ok if successful, an error otherwise
    */
-  ET_DEPRECATED virtual runtime::Error generate_from_pos(
+  virtual runtime::Error generate_from_pos(
       const std::string& prompt,
-      int64_t start_pos,
+      ET_UNUSED int64_t start_pos,
       const GenerationConfig& config,
       std::function<void(const std::string&)> token_callback,
       std::function<void(const Stats&)> stats_callback) = 0;
diff --git a/extension/llm/runner/text_llm_runner.cpp b/extension/llm/runner/text_llm_runner.cpp
@@ -70,8 +70,9 @@ Error TextLLMRunner::load() {
     ET_LOG(Info, format, __VA_ARGS__);     \
   }
 
-Error TextLLMRunner::generate(
+Error TextLLMRunner::generate_from_pos(
     const std::string& prompt,
+    ET_UNUSED int64_t start_pos,
     const GenerationConfig& config,
     std::function<void(const std::string&)> token_callback,
     std::function<void(const Stats&)> stats_callback) {
@@ -216,6 +217,15 @@ Error TextLLMRunner::generate(
   return Error::Ok;
 }
 
+Error TextLLMRunner::generate(
+    const std::string& prompt,
+    const GenerationConfig& config,
+    std::function<void(const std::string&)> token_callback,
+    std::function<void(const Stats&)> stats_callback) {
+  reset();
+  return generate_from_pos(prompt, 0, config, token_callback, stats_callback);
+}
+
 Error TextLLMRunner::warmup(const std::string& prompt, int32_t max_new_tokens) {
   // Create a GenerationConfig for warmup
   GenerationConfig config{
diff --git a/extension/llm/runner/text_llm_runner.h b/extension/llm/runner/text_llm_runner.h
@@ -105,24 +105,20 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner {
    * Generate text based on the provided prompt and generation config, from a
    * given position in KV cache.
    *
-   * Deprecated method. Please use generate() instead. It manages the pos
-   * automatically.
-   *
    * @param prompt The input prompt to generate from
-   * @param start_pos The starting position in KV cache of the input
+   * @param start_pos [Unused] The starting position in KV cache of the input,
+   * ignored because the runner manages the position internally.
    * @param config Generation configuration parameters
    * @param token_callback Callback function called for each generated token
    * @param stats_callback Callback function for generation statistics
    * @return Error::Ok if successful, an error otherwise
    */
-  ET_DEPRECATED virtual runtime::Error generate_from_pos(
+  ET_DEPRECATED runtime::Error generate_from_pos(
       const std::string& prompt,
       ET_UNUSED int64_t start_pos,
       const GenerationConfig& config,
       std::function<void(const std::string&)> token_callback = {},
-      std::function<void(const Stats&)> stats_callback = {}) override {
-    return generate(prompt, config, token_callback, stats_callback);
-  }
+      std::function<void(const Stats&)> stats_callback = {}) override;
 
   /**
    * @brief Warms up the model with a sample prompt