Update

kirklandsign · kirklandsign · commit 5665e9b29349 · 2025-09-15T14:47:26.000-07:00
diff --git a/extension/llm/runner/multimodal_prefiller.cpp b/extension/llm/runner/multimodal_prefiller.cpp
@@ -103,7 +103,7 @@ Result<uint64_t> MultimodalPrefiller::prefill(
   std::vector<int64_t> cache_positions;
 
   auto cache_position_tensor = ET_UNWRAP(populate_start_pos_or_cache_position(
-      kTextModelMethod, module_, start_pos, cache_positions, seq_len));
+      module_, start_pos, cache_positions, seq_len, kTextModelMethod));
 
   auto prefill_result = module_->execute(
       kTextModelMethod, {encoder_output, cache_position_tensor});
diff --git a/extension/llm/runner/text_decoder_runner.cpp b/extension/llm/runner/text_decoder_runner.cpp
@@ -40,7 +40,7 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
 
   if (use_kv_cache) {
     auto start_pos_tensor = ET_UNWRAP(populate_start_pos_or_cache_position(
-        "forward", module_, start_pos, cache_positions, tokens->numel()));
+        module_, start_pos, cache_positions, tokens->numel()), "forward");
 
     std::vector<runtime::EValue> inputs;
     auto inputs_res = io_manager_->prepare_decode(tokens, start_pos_tensor);
diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h
@@ -108,11 +108,11 @@ ET_EXPERIMENTAL size_t inline get_rss_bytes() {
 // size 1 because model will populate the cache position tensor underneath), or
 // a populated tensor for cache position, for the given start_pos and seq_len.
 inline runtime::Result<TensorPtr> populate_start_pos_or_cache_position(
-    const char* method_name,
     Module* module,
     int64_t& start_pos,
     std::vector<int64_t>& cache_positions_vec,
-    int seq_len) {
+    int seq_len,
+    const char* method_name = "forward") {
   // Get expected shape of cache position tensor, which should be the second
   // argument
   auto method_meta = ET_UNWRAP(module->method_meta(method_name));