Address comments

kirklandsign · kirklandsign · commit 05ac4c1df0b1 · 2025-09-12T14:09:16.000-07:00
diff --git a/extension/llm/runner/multimodal_prefiller.cpp b/extension/llm/runner/multimodal_prefiller.cpp
@@ -100,8 +100,8 @@ Result<uint64_t> MultimodalPrefiller::prefill(
     ET_LOG(Error, "The encoder returned an empty output.");
     return ::executorch::runtime::Error::InvalidState;
   }
-  auto cache_position_tensor =
-      ET_UNWRAP(populate_start_pos_tensor(module_, start_pos, seq_len));
+  auto cache_position_tensor = ET_UNWRAP(
+      populate_start_pos_or_cache_position(module_, start_pos, seq_len));
 
   auto prefill_result = module_->execute(
       kTextModelMethod, {encoder_output, cache_position_tensor});
diff --git a/extension/llm/runner/text_decoder_runner.cpp b/extension/llm/runner/text_decoder_runner.cpp
@@ -53,8 +53,8 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
     auto numel = sizes[0];
     std::vector<::executorch::aten::SizesType> sizes_vec = {numel};
 
-    auto start_pos_tensor = ET_UNWRAP(
-        populate_start_pos_tensor(module_, start_pos, tokens->numel()));
+    auto start_pos_tensor = ET_UNWRAP(populate_start_pos_or_cache_position(
+        module_, start_pos, tokens->numel()));
 
     std::vector<runtime::EValue> inputs;
     auto inputs_res = io_manager_->prepare_decode(tokens, start_pos_tensor);
diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h
@@ -103,8 +103,14 @@ ET_EXPERIMENTAL size_t inline get_rss_bytes() {
   return 0;
 }
 
-inline runtime::Result<TensorPtr>
-populate_start_pos_tensor(Module* module, int64_t& start_pos, int seq_len) {
+// Returns the cache position tensor, which can be either a single start_pos
+// (when the text_decoder expects a tensor with size 1 because model will
+// populate the cache position tensor underneath), or a populated tensor for
+// cache position, for the given start_pos and seq_len.
+inline runtime::Result<TensorPtr> populate_start_pos_or_cache_position(
+    Module* module,
+    int64_t& start_pos,
+    int seq_len) {
   // Get expected shape of cache position tensor, which should be the second
   // argument
   auto method_meta = ET_UNWRAP(module->method_meta(kTextModelMethod));
@@ -113,7 +119,6 @@ populate_start_pos_tensor(Module* module, int64_t& start_pos, int seq_len) {
   auto numel = second_input_sizes[0];
 
   TensorPtr start_pos_tensor;
-  std::vector<::executorch::aten::SizesType> sizes_vec = {numel};
   if (numel > 1) {
     // `cache_position` goes from start_pos to start_pos +
     // encoder_output.size(1). e.g. if start_pos = 2 and encoder_output.size(1)