@@ -738,66 +738,70 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
738
738
}
739
739
740
740
llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot (const llama_ubatch & ubatch, bool cont) const {
741
- if (debug > 0 ) {
742
- const auto & cells = v_cells[seq_to_stream[1 ]];
743
-
744
- const uint32_t head_cur = v_heads[1 ];
745
741
746
- LLAMA_LOG_DEBUG (" %s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n " ,
747
- __func__, cells.used_max_p1 (), cells.get_used (), head_cur, get_size (), n_swa);
742
+ if (debug > 0 ) {
743
+ for (uint32_t s = 0 ; s < ubatch.n_seqs_unq ; ++s) {
744
+ const auto seq_id = ubatch.seq_id_unq [s];
745
+ const auto stream_id = seq_to_stream[seq_id];
746
+ const auto & cells = v_cells[stream_id];
747
+ const uint32_t head_cur = v_heads[stream_id];
748
+
749
+ LLAMA_LOG_DEBUG (" %s: stream[%d], n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n " ,
750
+ __func__, stream_id, cells.used_max_p1 (), cells.get_used (), head_cur, get_size (), n_swa);
751
+
752
+ if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
753
+ std::string ss;
754
+ for (uint32_t i = 0 ; i < cells.size (); ++i) {
755
+ if (cells.is_empty (i)) {
756
+ ss += ' .' ;
757
+ } else {
758
+ assert (cells.seq_count (i) >= 1 );
748
759
749
- if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
750
- std::string ss;
751
- for (uint32_t i = 0 ; i < cells.size (); ++i) {
752
- if (cells.is_empty (i)) {
753
- ss += ' .' ;
754
- } else {
755
- assert (cells.seq_count (i) >= 1 );
760
+ if (cells.seq_count (i) == 1 ) {
761
+ ss += std::to_string (cells.seq_get (i));
762
+ } else {
763
+ ss += ' M' ;
764
+ }
765
+ }
766
+ if (i%256 == 255 ) {
767
+ ss += " *" ;
768
+ ss += ' \n ' ;
769
+ }
770
+ }
771
+ LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
772
+ }
756
773
757
- if (cells.seq_count (i) == 1 ) {
758
- ss += std::to_string (cells.seq_get (i));
774
+ if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
775
+ std::string ss;
776
+ for (uint32_t i = 0 ; i < cells.size (); ++i) {
777
+ std::string cur;
778
+ if (cells.is_empty (i)) {
779
+ cur = ' .' ;
759
780
} else {
760
- ss += ' M' ;
781
+ cur = std::to_string (cells.pos_get (i));
782
+ }
783
+ const int n = cur.size ();
784
+ for (int j = 0 ; j < 5 - n; ++j) {
785
+ cur += ' ' ;
786
+ }
787
+ ss += cur;
788
+ if (i%256 == 255 ) {
789
+ ss += " *" ;
790
+ }
791
+ if (i%64 == 63 ) {
792
+ ss += ' \n ' ;
761
793
}
762
794
}
763
- if (i%256 == 255 ) {
764
- ss += " *" ;
765
- ss += ' \n ' ;
766
- }
795
+ LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
767
796
}
768
- LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
769
- }
770
797
771
- if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
772
- std::string ss;
773
- for (uint32_t i = 0 ; i < cells.size (); ++i) {
774
- std::string cur;
775
- if (cells.is_empty (i)) {
776
- cur = ' .' ;
777
- } else {
778
- cur = std::to_string (cells.pos_get (i));
779
- }
780
- const int n = cur.size ();
781
- for (int j = 0 ; j < 5 - n; ++j) {
782
- cur += ' ' ;
783
- }
784
- ss += cur;
785
- if (i%256 == 255 ) {
786
- ss += " *" ;
787
- }
788
- if (i%64 == 63 ) {
789
- ss += ' \n ' ;
798
+ for (int s = 0 ; s < LLAMA_MAX_SEQ; ++s) {
799
+ if (cells.seq_pos_min (s) < 0 ) {
800
+ continue ;
790
801
}
791
- }
792
- LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
793
- }
794
802
795
- for (int s = 0 ; s < LLAMA_MAX_SEQ; ++s) {
796
- if (cells.seq_pos_min (s) < 0 ) {
797
- continue ;
803
+ LLAMA_LOG_DEBUG (" %s: stream[%d] min[%d] = %5d, max[%d] = %5d\n " , __func__, stream_id, s, cells.seq_pos_min (s), s, cells.seq_pos_max (s));
798
804
}
799
-
800
- LLAMA_LOG_DEBUG (" %s: min[%d] = %5d, max[%d] = %5d\n " , __func__, s, cells.seq_pos_min (s), s, cells.seq_pos_max (s));
801
805
}
802
806
}
803
807
0 commit comments