@@ -1110,6 +1110,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
11101110 mparams.tensor_buft_overrides = params.tensor_buft_overrides .data ();
11111111 }
11121112
1113+ mparams.progress_callback = params.load_progress_callback ;
1114+ mparams.progress_callback_user_data = params.load_progress_callback_user_data ;
1115+
11131116 return mparams;
11141117}
11151118
@@ -1141,6 +1144,7 @@ struct llama_context_params common_context_params_to_llama(const common_params &
11411144 cparams.flash_attn = params.flash_attn ;
11421145 cparams.no_perf = params.no_perf ;
11431146 cparams.op_offload = !params.no_op_offload ;
1147+ cparams.swa_full = params.swa_full ;
11441148
11451149 if (params.reranking ) {
11461150 cparams.embeddings = true ;
@@ -1333,81 +1337,6 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto
13331337 return text;
13341338}
13351339
1336- //
1337- // KV cache utils
1338- //
1339-
1340- void common_kv_cache_dump_view (const llama_kv_cache_view & view, int row_size) {
1341- static const char slot_chars[] = " .123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+" ;
1342-
1343- printf (" === Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d" ,
1344- view.n_cells , view.n_seq_max , view.used_cells , view.token_count , view.max_contiguous , view.max_contiguous_idx );
1345-
1346- llama_kv_cache_view_cell * c_curr = view.cells ;
1347- llama_seq_id * cs_curr = view.cells_sequences ;
1348-
1349- for (int i = 0 ; i < view.n_cells ; i++, c_curr++, cs_curr += view.n_seq_max ) {
1350- if (i % row_size == 0 ) {
1351- printf (" \n %5d: " , i);
1352- }
1353- int seq_count = 0 ;
1354- for (int j = 0 ; j < view.n_seq_max ; j++) {
1355- if (cs_curr[j] >= 0 ) { seq_count++; }
1356- }
1357- putchar (slot_chars[std::min (sizeof (slot_chars) - 2 , size_t (seq_count))]);
1358- }
1359-
1360- printf (" \n === Done dumping\n " );
1361- }
1362-
1363- void common_kv_cache_dump_view_seqs (const llama_kv_cache_view & view, int row_size) {
1364- static const char slot_chars[] = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
1365-
1366- printf (" === Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n " ,
1367- view.n_cells , view.n_seq_max , view.used_cells , view.token_count , view.max_contiguous , view.max_contiguous_idx );
1368-
1369- std::unordered_map<llama_seq_id, size_t > seqs;
1370- llama_kv_cache_view_cell * c_curr = view.cells ;
1371- llama_seq_id * cs_curr = view.cells_sequences ;
1372-
1373- for (int i = 0 ; i < view.n_cells ; i++, c_curr++, cs_curr += view.n_seq_max ) {
1374- for (int j = 0 ; j < view.n_seq_max ; j++) {
1375- if (cs_curr[j] < 0 ) { continue ; }
1376- if (seqs.find (cs_curr[j]) == seqs.end ()) {
1377- if (seqs.size () + 1 >= sizeof (slot_chars)) { break ; }
1378- const size_t sz = seqs.size ();
1379- seqs[cs_curr[j]] = sz;
1380- }
1381- }
1382- if (seqs.size () + 1 >= sizeof (slot_chars)) { break ; }
1383- }
1384-
1385- printf (" === Sequence legend: " );
1386- for (const auto & it : seqs) {
1387- printf (" %zu=%d, " , it.second , it.first );
1388- }
1389- printf (" '+'=other sequence ids" );
1390-
1391- c_curr = view.cells ;
1392- cs_curr = view.cells_sequences ;
1393- for (int i = 0 ; i < view.n_cells ; i++, c_curr++, cs_curr += view.n_seq_max ) {
1394- if (i % row_size == 0 ) {
1395- printf (" \n %5d: " , i);
1396- }
1397- for (int j = 0 ; j < view.n_seq_max ; j++) {
1398- if (cs_curr[j] >= 0 ) {
1399- const auto & it = seqs.find (cs_curr[j]);
1400- putchar (it != seqs.end () ? int (slot_chars[it->second ]) : ' +' );
1401- } else {
1402- putchar (' .' );
1403- }
1404- }
1405- putchar (' ' );
1406- }
1407-
1408- printf (" \n === Done dumping\n " );
1409- }
1410-
14111340//
14121341// Embedding utils
14131342//
0 commit comments