@@ -1144,6 +1144,7 @@ struct llama_context_params common_context_params_to_llama(const common_params &
11441144 cparams.flash_attn = params.flash_attn ;
11451145 cparams.no_perf = params.no_perf ;
11461146 cparams.op_offload = !params.no_op_offload ;
1147+ cparams.swa_full = params.swa_full ;
11471148
11481149 if (params.reranking ) {
11491150 cparams.embeddings = true ;
@@ -1336,81 +1337,6 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto
13361337 return text;
13371338}
13381339
1339- //
1340- // KV cache utils
1341- //
1342-
1343- void common_kv_cache_dump_view (const llama_kv_cache_view & view, int row_size) {
1344- static const char slot_chars[] = " .123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+" ;
1345-
1346- printf (" === Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d" ,
1347- view.n_cells , view.n_seq_max , view.used_cells , view.token_count , view.max_contiguous , view.max_contiguous_idx );
1348-
1349- llama_kv_cache_view_cell * c_curr = view.cells ;
1350- llama_seq_id * cs_curr = view.cells_sequences ;
1351-
1352- for (int i = 0 ; i < view.n_cells ; i++, c_curr++, cs_curr += view.n_seq_max ) {
1353- if (i % row_size == 0 ) {
1354- printf (" \n %5d: " , i);
1355- }
1356- int seq_count = 0 ;
1357- for (int j = 0 ; j < view.n_seq_max ; j++) {
1358- if (cs_curr[j] >= 0 ) { seq_count++; }
1359- }
1360- putchar (slot_chars[std::min (sizeof (slot_chars) - 2 , size_t (seq_count))]);
1361- }
1362-
1363- printf (" \n === Done dumping\n " );
1364- }
1365-
1366- void common_kv_cache_dump_view_seqs (const llama_kv_cache_view & view, int row_size) {
1367- static const char slot_chars[] = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
1368-
1369- printf (" === Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n " ,
1370- view.n_cells , view.n_seq_max , view.used_cells , view.token_count , view.max_contiguous , view.max_contiguous_idx );
1371-
1372- std::unordered_map<llama_seq_id, size_t > seqs;
1373- llama_kv_cache_view_cell * c_curr = view.cells ;
1374- llama_seq_id * cs_curr = view.cells_sequences ;
1375-
1376- for (int i = 0 ; i < view.n_cells ; i++, c_curr++, cs_curr += view.n_seq_max ) {
1377- for (int j = 0 ; j < view.n_seq_max ; j++) {
1378- if (cs_curr[j] < 0 ) { continue ; }
1379- if (seqs.find (cs_curr[j]) == seqs.end ()) {
1380- if (seqs.size () + 1 >= sizeof (slot_chars)) { break ; }
1381- const size_t sz = seqs.size ();
1382- seqs[cs_curr[j]] = sz;
1383- }
1384- }
1385- if (seqs.size () + 1 >= sizeof (slot_chars)) { break ; }
1386- }
1387-
1388- printf (" === Sequence legend: " );
1389- for (const auto & it : seqs) {
1390- printf (" %zu=%d, " , it.second , it.first );
1391- }
1392- printf (" '+'=other sequence ids" );
1393-
1394- c_curr = view.cells ;
1395- cs_curr = view.cells_sequences ;
1396- for (int i = 0 ; i < view.n_cells ; i++, c_curr++, cs_curr += view.n_seq_max ) {
1397- if (i % row_size == 0 ) {
1398- printf (" \n %5d: " , i);
1399- }
1400- for (int j = 0 ; j < view.n_seq_max ; j++) {
1401- if (cs_curr[j] >= 0 ) {
1402- const auto & it = seqs.find (cs_curr[j]);
1403- putchar (it != seqs.end () ? int (slot_chars[it->second ]) : ' +' );
1404- } else {
1405- putchar (' .' );
1406- }
1407- }
1408- putchar (' ' );
1409- }
1410-
1411- printf (" \n === Done dumping\n " );
1412- }
1413-
14141340//
14151341// Embedding utils
14161342//
0 commit comments