Skip to content

Commit b562f36

Browse files
authored
Fix lookahead decoding cache buffer size
Differential Revision: D78759433 Pull Request resolved: #12725
1 parent 56b0074 commit b562f36

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

examples/models/llama/runner/static_attention_io_manager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ template <typename TokenT>
396396
class SuffixCache {
397397
public:
398398
SuffixCache(size_t n, size_t capacity)
399-
: n_(n), capacity_(capacity), pos_(0), cache_(n_ * capacity_) {}
399+
: n_(n), capacity_(capacity), pos_(0), cache_((n_ - 1) * capacity_) {}
400400

401401
void add(executorch::runtime::Span<TokenT> suffix) {
402402
if (suffix.size() != n_ - 1) {

0 commit comments

Comments
 (0)