Skip to content

Commit 9b2610e

Browse files
committed
Fix
1 parent 0237dfd commit 9b2610e

File tree

3 files changed

+12
-3
lines changed

3 files changed

+12
-3
lines changed

extension/llm/runner/multimodal_prefiller.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,10 @@ Result<uint64_t> MultimodalPrefiller::prefill(
100100
ET_LOG(Error, "The encoder returned an empty output.");
101101
return ::executorch::runtime::Error::InvalidState;
102102
}
103+
std::vector<int64_t> cache_positions;
104+
103105
auto cache_position_tensor = ET_UNWRAP(populate_start_pos_or_cache_position(
104-
kTextModelMethod, module_, start_pos, seq_len));
106+
kTextModelMethod, module_, start_pos, cache_positions, seq_len));
105107

106108
auto prefill_result = module_->execute(
107109
kTextModelMethod, {encoder_output, cache_position_tensor});

extension/llm/runner/text_decoder_runner.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
3636
// If only 1 input, we are not using kv cache
3737
bool use_kv_cache = method_meta.num_inputs() > 1;
3838

39+
std::vector<int64_t> cache_positions;
40+
3941
if (use_kv_cache) {
4042
auto start_pos_tensor = ET_UNWRAP(populate_start_pos_or_cache_position(
41-
"forward", module_, start_pos, tokens->numel()));
43+
"forward", module_, start_pos, cache_positions, tokens->numel()));
4244

4345
std::vector<runtime::EValue> inputs;
4446
auto inputs_res = io_manager_->prepare_decode(tokens, start_pos_tensor);

extension/llm/runner/util.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ inline runtime::Result<TensorPtr> populate_start_pos_or_cache_position(
111111
const char* method_name,
112112
Module* module,
113113
int64_t& start_pos,
114+
std::vector<int64_t>& cache_positions_underlying_vector,
114115
int seq_len) {
115116
// Get expected shape of cache position tensor, which should be the second
116117
// argument
@@ -119,12 +120,16 @@ inline runtime::Result<TensorPtr> populate_start_pos_or_cache_position(
119120
auto second_input_sizes = second_input_info.sizes();
120121
auto numel = second_input_sizes[0];
121122

123+
for (int i = 0; i < second_input_sizes.size(); ++i) {
124+
ET_LOG(Error, "second_input_sizes[%d] = %d", i, second_input_sizes[i]);
125+
}
126+
122127
TensorPtr start_pos_tensor;
123128
if (numel > 1) {
124129
// `cache_position` goes from start_pos to start_pos +
125130
// encoder_output.size(1). e.g. if start_pos = 2 and encoder_output.size(1)
126131
// = 5, cache_position_tensor should be [2, 3, 4, 5, 6].
127-
std::vector<int64_t> cache_positions(seq_len);
132+
cache_positions_underlying_vector.resize(seq_len);
128133
for (int64_t i = 0; i < seq_len; ++i) {
129134
cache_positions[i] = start_pos + i;
130135
}

0 commit comments

Comments
 (0)