Update on "[llm] Support different shape of input_pos"

larryliu0820 · larryliu0820 · commit 64aab38770c6 · 2025-06-25T00:16:59.000-07:00
For huggingface models, `forward()` is taking `tokens` as well as `cache_positions`, which is a list of cache indices. This is different than the .pte files `export_llama` gives, which are taking `tokens` and `input_pos` where `input_pos` is a scalar tensor. This PR adds support inside `text_decoder_runner.cpp` to handle both shapes of `input_pos`/`cache_positions`. To make the logic more generic without relying on extra metadata, here I'm adding the logic of inspecting method meta and input tensor info, to make a decision if we want to feed in `input_pos` or `cache_position`. Differential Revision: [D77203700](https://our.internmc.facebook.com/intern/diff/D77203700/) [ghstack-poisoned]
diff --git a/extension/llm/runner/targets.bzl b/extension/llm/runner/targets.bzl
@@ -34,7 +34,7 @@ def define_common_targets():
             ],
             exported_deps = [
                 ":stats",
-                "//executorch/kernels/portable/cpu/util:arange_util",
+                "//executorch/kernels/portable/cpu/util:arange_util" + aten_suffix,
                 "//executorch/extension/llm/sampler:sampler" + aten_suffix,
                 "//executorch/extension/module:module" + aten_suffix,
                 "//executorch/extension/tensor:tensor" + aten_suffix,
diff --git a/kernels/portable/cpu/util/arange_util.cpp b/kernels/portable/cpu/util/arange_util.cpp
@@ -12,20 +12,21 @@ namespace torch::executor::native {
 #define ET_ARANGE_IMPL(ctx, start, numel, step, out, op_name)               \
   ET_SWITCH_REALHBF16_TYPES(out.scalar_type(), ctx, op_name, CTYPE, [&]() { \
     auto out_data = out.mutable_data_ptr<CTYPE>();                          \
-    for (Tensor::SizesType i = 0; i < numel; ++i) {                         \
+    for (executorch::aten::SizesType i = 0; i < numel; ++i) {               \
       out_data[i] = static_cast<CTYPE>(start + i * step);                   \
     }                                                                       \
   })
 
-Tensor::SizesType
+executorch::aten::SizesType
 compute_arange_out_size(double start, double end, double step) {
-  Tensor::SizesType numel =
-      static_cast<Tensor::SizesType>(std::ceil((end - start) / step));
+  executorch::aten::SizesType numel =
+      static_cast<executorch::aten::SizesType>(std::ceil((end - start) / step));
 
   ET_CHECK_MSG(
       numel >= 0,
-      "numel should be non-negative, but got (%d). start (%f), end (%f), step (%f)",
-      numel,
+      "numel should be non-negative, but got (%" PRId64
+      "). start (%f), end (%f), step (%f)",
+      static_cast<int64_t>(numel),
       start,
       end,
       step);
@@ -39,7 +40,7 @@ void arange_out_impl(
     double step,
     Tensor& out) {
   (void)ctx;
-  Tensor::SizesType numel = compute_arange_out_size(start, end, step);
+  executorch::aten::SizesType numel = compute_arange_out_size(start, end, step);
   ET_ARANGE_IMPL(ctx, start, numel, step, out, "arange.start_out");
 }
 
diff --git a/kernels/portable/cpu/util/arange_util.h b/kernels/portable/cpu/util/arange_util.h
@@ -12,10 +12,10 @@
 
 namespace torch::executor::native {
 
-Tensor::SizesType
+executorch::aten::SizesType
 compute_arange_out_size(double start, double end, double step);
 
-inline Tensor::SizesType compute_arange_out_size(double end) {
+inline executorch::aten::SizesType compute_arange_out_size(double end) {
   return compute_arange_out_size(0.0, end, 1.0);
 }
 
diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl
@@ -295,19 +295,6 @@ def define_common_targets():
         visibility = ["//executorch/kernels/portable/cpu/..."],
     )
 
-    runtime.cxx_library(
-        name = "arange_util",
-        srcs = ["arange_util.cpp"],
-        exported_headers = ["arange_util.h"],
-        exported_deps = [
-            "//executorch/runtime/kernel:kernel_includes",
-        ],
-        visibility = [
-            "//executorch/kernels/portable/cpu/...",
-            "//executorch/extension/llm/...",
-        ],
-    )
-
     runtime.cxx_library(
         name = "broadcast_indexes_range",
         exported_headers = ["broadcast_indexes_range.h"],
@@ -343,3 +330,17 @@ def define_common_targets():
                 "@EXECUTORCH_CLIENTS",
             ],
         )
+
+
+        runtime.cxx_library(
+            name = "arange_util{}".format(suffix),
+            srcs = ["arange_util.cpp"],
+            exported_headers = ["arange_util.h"],
+            exported_deps = [
+                "//executorch/runtime/kernel:kernel_includes{}".format(suffix),
+            ],
+            visibility = [
+                "//executorch/kernels/portable/cpu/...",
+                "//executorch/extension/llm/...",
+            ],
+        )

Original file line number	Diff line number	Diff line change
`@@ -12,10 +12,10 @@`
`12`	`12`
`13`	`13`	`namespace torch::executor::native {`
`14`	`14`
`15`		`-Tensor::SizesType`
	`15`	`+executorch::aten::SizesType`
`16`	`16`	`compute_arange_out_size(double start, double end, double step);`
`17`	`17`
`18`		`-inline Tensor::SizesType compute_arange_out_size(double end) {`
	`18`	`+inline executorch::aten::SizesType compute_arange_out_size(double end) {`
`19`	`19`	`return compute_arange_out_size(0.0, end, 1.0);`
`20`	`20`	`}`
`21`	`21`