Update cache position size for llava

kirklandsign · kirklandsign · commit 9bf819cf0ea9 · 2025-09-11T12:32:14.000-07:00
diff --git a/extension/llm/runner/multimodal_prefiller.cpp b/extension/llm/runner/multimodal_prefiller.cpp
@@ -94,6 +94,11 @@ Result<uint64_t> MultimodalPrefiller::prefill(
   // `cache_position` goes from start_pos to start_pos + encoder_output.size(1).
   // e.g. if start_pos = 2 and encoder_output.size(1) = 5,
   // cache_position_tensor should be [2, 3, 4, 5, 6].
+  auto method_meta = ET_UNWRAP(module_->method_meta(kTextModelMethod));
+  auto first_input_info = ET_UNWRAP(method_meta.input_tensor_meta(0));
+  auto sizes = first_input_info.sizes();
+  auto numel = sizes[0];
+
   int64_t seq_len = encoder_output.toTensor().size(1);
   if (seq_len == 0) {
     ET_LOG(Error, "The encoder returned an empty output.");
@@ -103,10 +108,13 @@ Result<uint64_t> MultimodalPrefiller::prefill(
   for (int64_t i = 0; i < seq_len; ++i) {
     cache_positions[i] = start_pos + i;
   }
-  auto cache_position_tensor = ::executorch::extension::from_blob(
-      cache_positions.data(),
-      {static_cast<int>(seq_len)},
-      executorch::aten::ScalarType::Long);
+  auto cache_position_tensor = (numel > 1)
+      ? ::executorch::extension::from_blob(
+            cache_positions.data(),
+            {static_cast<int>(seq_len)},
+            executorch::aten::ScalarType::Long)
+      : ::executorch::extension::from_blob(
+            &start_pos, {1}, executorch::aten::ScalarType::Long);
   auto prefill_result = module_->execute(
       kTextModelMethod, {cache_position_tensor, encoder_output});
   if (prefill_result.error() != ::executorch::runtime::Error::Ok) {