diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/MainActivity.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/MainActivity.java
index b26031d89a6..922f0598f1d 100644
--- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/MainActivity.java
+++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/MainActivity.java
@@ -778,12 +778,8 @@ public void run() {
                           mCurrentSettingsFields.getModelType(),
                           mCurrentSettingsFields.getBackendType())
                       == ModelUtils.VISION_MODEL) {
-                    mModule.generateFromPos(
-                        finalPrompt,
-                        ModelUtils.VISION_MODEL_SEQ_LEN,
-                        startPos,
-                        MainActivity.this,
-                        false);
+                    mModule.generate(
+                        finalPrompt, ModelUtils.VISION_MODEL_SEQ_LEN, MainActivity.this, false);
                   } else if (mCurrentSettingsFields.getModelType() == ModelType.LLAMA_GUARD_3) {
                     String llamaGuardPromptForClassification =
                         PromptFormat.getFormattedLlamaGuardPrompt(rawPrompt);
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
index 9494b0fe5cb..6599cb4c15d 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
@@ -218,24 +218,6 @@ public long prefillPrompt(String prompt, long startPos, int bos, int eos) {
   // returns a tuple of (status, updated startPos)
   private native int appendTextInput(String prompt, int bos, int eos);
 
-  /**
-   * Generate tokens from the given prompt, starting from the given position.
-   *
-   * <p>This is a deprecated API. Please use {@link #generate(String, int, LlmCallback, boolean)}
-   *
-   * @param prompt The text prompt to LLaVA.
-   * @param seqLen The total sequence length, including the prompt tokens and new tokens.
-   * @param startPos The starting position in KV cache of the input in the LLM.
-   * @param callback callback object to receive results.
-   * @param echo indicate whether to echo the input prompt or not.
-   * @return The error code.
-   */
-  @Deprecated
-  public int generateFromPos(
-      String prompt, int seqLen, long startPos, LlmCallback callback, boolean echo) {
-    return generate(prompt, seqLen, callback, echo);
-  }
-
   /**
    * Reset the context of the LLM. This will clear the KV cache and reset the state of the LLM.
    *