Expose reset() method. (#14589)

pytorchbot · shoumikhin · web-flow · commit 38e68b14a78d · 2025-09-26T14:02:24.000-07:00
Summary: .

Differential Revision: D83220816

Co-authored-by: Anthony Shoumikhin &lt;anthony@shoumikh.in&gt;
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h
@@ -184,10 +184,16 @@ withTokenCallback:(nullable void (^)(NSString *))callback
             error:(NSError **)error;
 
 /**
- Stops any ongoing generation and cleans up internal resources.
+ Stop producing new tokens and terminate the current generation process.
 */
 - (void)stop;
 
+/**
+  Remove the prefilled tokens from the KV cache and resets the start position
+  to 0. It also clears the stats for previous runs.
+ */
+- (void)reset;
+
 + (instancetype)new NS_UNAVAILABLE;
 - (instancetype)init NS_UNAVAILABLE;
 
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm
@@ -216,4 +216,10 @@ - (void)stop {
   }
 }
 
+- (void)reset {
+  if (_runner) {
+    _runner->reset();
+  }
+}
+
 @end
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
@@ -64,10 +64,16 @@ withTokenCallback:(nullable void (^)(NSString *))callback
             error:(NSError **)error;
 
 /**
- Stops any ongoing generation and cleans up internal resources.
+ Stop producing new tokens and terminate the current generation process.
 */
 - (void)stop;
 
+/**
+  Remove the prefilled tokens from the KV cache and resets the start position
+  to 0. It also clears the stats for previous runs.
+ */
+- (void)reset;
+
 + (instancetype)new NS_UNAVAILABLE;
 - (instancetype)init NS_UNAVAILABLE;
 
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm
@@ -101,4 +101,10 @@ - (void)stop {
   }
 }
 
+- (void)reset {
+  if (_runner) {
+    _runner->reset();
+  }
+}
+
 @end
diff --git a/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift b/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift
@@ -45,6 +45,11 @@ extension UIImage {
 }
 
 class MultimodalRunnerTest: XCTestCase {
+  let systemPrompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: "
+  let assistantPrompt = "ASSISTANT: "
+  let userPrompt = "What's on the picture?"
+  let sequenceLength = 768
+
   func test() {
     let bundle = Bundle(for: type(of: self))
     guard let modelPath = bundle.path(forResource: "llava", ofType: "pte"),
@@ -59,10 +64,25 @@ class MultimodalRunnerTest: XCTestCase {
 
     do {
       try runner.generate([
-        MultimodalInput("A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: "),
+        MultimodalInput(systemPrompt),
+        MultimodalInput(image.asImage()),
+        MultimodalInput("\(userPrompt) \(assistantPrompt)"),
+      ], sequenceLength: sequenceLength) { token in
+        text += token
+      }
+    } catch {
+      XCTFail("Failed to generate text with error \(error)")
+    }
+    XCTAssertTrue(text.lowercased().contains("waterfall"))
+
+    text = ""
+    runner.reset()
+    do {
+      try runner.generate([
+        MultimodalInput(systemPrompt),
         MultimodalInput(image.asImage()),
-        MultimodalInput("What's on the picture? ASSISTANT: "),
-      ], sequenceLength: 768) { token in
+        MultimodalInput("\(userPrompt) \(assistantPrompt)"),
+      ], sequenceLength: sequenceLength) { token in
         text += token
       }
     } catch {
diff --git a/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift b/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift
@@ -36,6 +36,9 @@ struct SpecialTokens {
 }
 
 class TextRunnerTest: XCTestCase {
+  let userPrompt = "The capital of France is called"
+  let sequenceLength = 128
+
   func test() {
     let bundle = Bundle(for: type(of: self))
     guard let modelPath = bundle.path(forResource: "llama3_2-1B", ofType: "pte"),
@@ -47,12 +50,23 @@ class TextRunnerTest: XCTestCase {
     var text = ""
 
     do {
-      try runner.generate("hello", sequenceLength: 2) { token in
+      try runner.generate(userPrompt, sequenceLength: sequenceLength) { token in
+        text += token
+      }
+    } catch {
+      XCTFail("Failed to generate text with error \(error)")
+    }
+    XCTAssertTrue(text.lowercased().contains("paris"))
+
+    text = ""
+    runner.reset()
+    do {
+      try runner.generate(userPrompt, sequenceLength: sequenceLength) { token in
         text += token
       }
     } catch {
       XCTFail("Failed to generate text with error \(error)")
     }
-    XCTAssertEqual("hello,", text.lowercased())
+    XCTAssertTrue(text.lowercased().contains("paris"))
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -216,4 +216,10 @@ - (void)stop {`
`216`	`216`	`}`
`217`	`217`	`}`
`218`	`218`
	`219`	`+- (void)reset {`
	`220`	`+ if (_runner) {`
	`221`	`+ _runner->reset();`
	`222`	`+ }`
	`223`	`+}`
	`224`	`+`
`219`	`225`	`@end`
Original file line number	Diff line number	Diff line change
`@@ -101,4 +101,10 @@ - (void)stop {`
`101`	`101`	`}`
`102`	`102`	`}`
`103`	`103`
	`104`	`+- (void)reset {`
	`105`	`+ if (_runner) {`
	`106`	`+ _runner->reset();`
	`107`	`+ }`
	`108`	`+}`
	`109`	`+`
`104`	`110`	`@end`