@@ -576,6 +576,10 @@ class StaticAttentionIOManager {
576576 }
577577 }
578578
579+ size_t input_pos () const {
580+ return input_pos_;
581+ }
582+
579583 /* *
580584 * Prefill helper. Run multiple inferences as needed depending on the length
581585 * of the prompt and method's input length. Returns the position in the output
@@ -586,6 +590,7 @@ class StaticAttentionIOManager {
586590 executorch::runtime::Span<TokenT> tokens,
587591 executorch::runtime::Span<TokenT> input_buffer,
588592 executorch::runtime::Method& method) {
593+ ET_LOG (Info, " Prefilling at position %zu" , input_pos_);
589594 size_t input_len = input_buffer.size ();
590595 auto & masks = get_mask (input_buffer.size ());
591596 for (auto & pair : masks) {
@@ -621,6 +626,7 @@ class StaticAttentionIOManager {
621626 executorch::runtime::Method& method,
622627 std::function<TokenT(executorch::runtime::Method&)>& sample,
623628 std::function<bool(TokenT)>& token_callback) {
629+ ET_LOG (Info, " Decoding at position %zu" , input_pos_);
624630 set_input (method, 0 , input_buffer.data ());
625631 auto & masks = get_mask (input_buffer.size ());
626632 for (auto & pair : masks) {
@@ -661,6 +667,10 @@ class StaticAttentionIOManager {
661667 size_t window_size,
662668 size_t n_verifications,
663669 std::unordered_map<TokenT, SuffixCache<TokenT>> suffix_caches) {
670+ ET_LOG (
671+ Info,
672+ " Decoding with lookahead and verification at position %zu" ,
673+ input_pos_);
664674 set_input (method, 0 , input_buffer.data ());
665675 size_t input_len = input_buffer.size ();
666676
0 commit comments