Skip to content

Commit 05ac4c1

Browse files
committed
Address comments
1 parent 75bd77d commit 05ac4c1

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

extension/llm/runner/multimodal_prefiller.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ Result<uint64_t> MultimodalPrefiller::prefill(
100100
ET_LOG(Error, "The encoder returned an empty output.");
101101
return ::executorch::runtime::Error::InvalidState;
102102
}
103-
auto cache_position_tensor =
104-
ET_UNWRAP(populate_start_pos_tensor(module_, start_pos, seq_len));
103+
auto cache_position_tensor = ET_UNWRAP(
104+
populate_start_pos_or_cache_position(module_, start_pos, seq_len));
105105

106106
auto prefill_result = module_->execute(
107107
kTextModelMethod, {encoder_output, cache_position_tensor});

extension/llm/runner/text_decoder_runner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
5353
auto numel = sizes[0];
5454
std::vector<::executorch::aten::SizesType> sizes_vec = {numel};
5555

56-
auto start_pos_tensor = ET_UNWRAP(
57-
populate_start_pos_tensor(module_, start_pos, tokens->numel()));
56+
auto start_pos_tensor = ET_UNWRAP(populate_start_pos_or_cache_position(
57+
module_, start_pos, tokens->numel()));
5858

5959
std::vector<runtime::EValue> inputs;
6060
auto inputs_res = io_manager_->prepare_decode(tokens, start_pos_tensor);

extension/llm/runner/util.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,14 @@ ET_EXPERIMENTAL size_t inline get_rss_bytes() {
103103
return 0;
104104
}
105105

106-
inline runtime::Result<TensorPtr>
107-
populate_start_pos_tensor(Module* module, int64_t& start_pos, int seq_len) {
106+
// Returns the cache position tensor, which can be either a single start_pos
107+
// (when the text_decoder expects a tensor with size 1 because model will
108+
// populate the cache position tensor underneath), or a populated tensor for
109+
// cache position, for the given start_pos and seq_len.
110+
inline runtime::Result<TensorPtr> populate_start_pos_or_cache_position(
111+
Module* module,
112+
int64_t& start_pos,
113+
int seq_len) {
108114
// Get expected shape of cache position tensor, which should be the second
109115
// argument
110116
auto method_meta = ET_UNWRAP(module->method_meta(kTextModelMethod));
@@ -113,7 +119,6 @@ populate_start_pos_tensor(Module* module, int64_t& start_pos, int seq_len) {
113119
auto numel = second_input_sizes[0];
114120

115121
TensorPtr start_pos_tensor;
116-
std::vector<::executorch::aten::SizesType> sizes_vec = {numel};
117122
if (numel > 1) {
118123
// `cache_position` goes from start_pos to start_pos +
119124
// encoder_output.size(1). e.g. if start_pos = 2 and encoder_output.size(1)

0 commit comments

Comments
 (0)