diff --git a/examples/models/llama/runner/static_attention_io_manager.h b/examples/models/llama/runner/static_attention_io_manager.h index 14182bd9cb3..74925a777a2 100644 --- a/examples/models/llama/runner/static_attention_io_manager.h +++ b/examples/models/llama/runner/static_attention_io_manager.h @@ -396,7 +396,7 @@ template class SuffixCache { public: SuffixCache(size_t n, size_t capacity) - : n_(n), capacity_(capacity), pos_(0), cache_(n_ * capacity_) {} + : n_(n), capacity_(capacity), pos_(0), cache_((n_ - 1) * capacity_) {} void add(executorch::runtime::Span suffix) { if (suffix.size() != n_ - 1) {