@@ -1583,7 +1583,7 @@ struct llama_sampler_dry {
15831583 ring_buffer<llama_token> last_tokens;
15841584};
15851585
1586- std::vector<llama_token> llama_tokenize (
1586+ static std::vector<llama_token> llama_tokenize (
15871587 const struct llama_model * model,
15881588 const std::string & text,
15891589 bool add_special,
@@ -1602,9 +1602,9 @@ std::vector<llama_token> llama_tokenize(
16021602 return result;
16031603}
16041604
1605- std::string llama_detokenize (const struct llama_model * model, const std::vector<llama_token> & tokens, bool special) {
1606- if (model == nullptr ) {
1607- return " ?? " ;
1605+ static std::string llama_detokenize (const struct llama_model * model, const std::vector<llama_token> & tokens, bool special) {
1606+ if (model == nullptr ) { // model is passed as nullptr in test-sampling.cpp
1607+ return " " ;
16081608 }
16091609 std::string text;
16101610 text.resize (std::max (text.capacity (), tokens.size ()));
@@ -1621,13 +1621,14 @@ std::string llama_detokenize(const struct llama_model * model, const std::vector
16211621 return text;
16221622}
16231623
1624- std::string llama_detokenize_single (const struct llama_model * model, llama_token token, bool special) {
1624+ static std::string llama_detokenize_single (const struct llama_model * model, llama_token token, bool special) {
16251625 std::vector<llama_token> tokens = {token};
16261626 return llama_detokenize (model, tokens, special);
16271627}
16281628
1629+ #ifdef DEBUG
16291630// For DRY debugging
1630- std::string detokenize_for_display (const struct llama_model * model, llama_token token, bool special) {
1631+ static std::string detokenize_for_display (const struct llama_model * model, llama_token token, bool special) {
16311632 std::string token_text = llama_detokenize_single (model, token, special);
16321633 size_t pos = 0 ;
16331634 while ((pos = token_text.find (' \n ' , pos)) != std::string::npos) {
@@ -1639,31 +1640,32 @@ std::string detokenize_for_display(const struct llama_model * model, llama_token
16391640}
16401641
16411642// For DRY debugging
1642- void dry_print_ring_buffer_debug (const llama_sampler_dry * ctx, int max_tokens_per_side = 100 ) {
1643+ static void dry_print_ring_buffer_debug (const llama_sampler_dry * ctx, int max_tokens_per_side = 100 ) {
16431644 const size_t total_tokens = ctx->last_tokens .size ();
16441645 size_t tokens_to_print = total_tokens;
1646+ size_t mps = (max_tokens_per_side >= 0 ) ? static_cast <size_t >(max_tokens_per_side) : 0 ;
16451647
1646- if (max_tokens_per_side != - 1 ) {
1647- tokens_to_print = std::min ( total_tokens, static_cast < size_t >(max_tokens_per_side) * 2 ) ;
1648+ if (max_tokens_per_side < 0 ) {
1649+ tokens_to_print = total_tokens;
16481650 }
16491651
16501652 std::vector<std::pair<int , std::string>> token_info;
16511653 token_info.reserve (tokens_to_print);
16521654
16531655 // Collect token information
1654- if (max_tokens_per_side == - 1 || total_tokens <= tokens_to_print) {
1656+ if (max_tokens_per_side < 0 || total_tokens <= tokens_to_print) {
16551657 for (size_t i = 0 ; i < total_tokens; ++i) {
16561658 llama_token token = ctx->last_tokens .rat (total_tokens - 1 - i);
16571659 std::string token_text = detokenize_for_display (ctx->model , token, true );
16581660 token_info.emplace_back (token, std::move (token_text));
16591661 }
16601662 } else {
1661- for (size_t i = 0 ; i < max_tokens_per_side ; ++i) {
1663+ for (size_t i = 0 ; i < mps ; ++i) {
16621664 llama_token token = ctx->last_tokens .rat (total_tokens - 1 - i);
16631665 std::string token_text = detokenize_for_display (ctx->model , token, true );
16641666 token_info.emplace_back (token, std::move (token_text));
16651667 }
1666- for (size_t i = total_tokens - max_tokens_per_side ; i < total_tokens; ++i) {
1668+ for (size_t i = total_tokens - mps ; i < total_tokens; ++i) {
16671669 llama_token token = ctx->last_tokens .rat (total_tokens - 1 - i);
16681670 std::string token_text = detokenize_for_display (ctx->model , token, true );
16691671 token_info.emplace_back (token, std::move (token_text));
@@ -1686,14 +1688,14 @@ void dry_print_ring_buffer_debug(const llama_sampler_dry * ctx, int max_tokens_p
16861688
16871689 // Print tokens
16881690 for (size_t i = 0 ; i < tokens_to_print; ++i) {
1689- size_t true_index = (max_tokens_per_side == - 1 || total_tokens <= tokens_to_print) ? i :
1690- (i < max_tokens_per_side ) ? i : (total_tokens - tokens_to_print + i);
1691+ size_t true_index = (max_tokens_per_side < 0 || total_tokens <= tokens_to_print) ? i :
1692+ (i < mps ) ? i : (total_tokens - tokens_to_print + i);
16911693 LLAMA_LOG_INFO (" %-*zu | %-*d | %-*s\n " ,
16921694 (int )max_index_width, true_index,
16931695 (int )max_token_width, token_info[i].first ,
16941696 (int )max_text_width, token_info[i].second .c_str ());
16951697 // Add a separator between oldest and newest tokens if applicable
1696- if (max_tokens_per_side != - 1 && total_tokens > tokens_to_print && i == max_tokens_per_side - 1 ) {
1698+ if (max_tokens_per_side > 0 && total_tokens > tokens_to_print && i == mps - 1 ) {
16971699 LLAMA_LOG_INFO (" %s\n " , std::string (max_index_width + max_token_width + max_text_width + 6 , ' .' ).c_str ());
16981700 }
16991701 }
@@ -1707,7 +1709,7 @@ struct CandidateInfo {
17071709};
17081710
17091711// For DRY debugging
1710- std::vector<CandidateInfo> get_top_n_candidates (const llama_token_data_array * cur_p, size_t n) {
1712+ static std::vector<CandidateInfo> get_top_n_candidates (const llama_token_data_array * cur_p, size_t n) {
17111713 std::vector<CandidateInfo> candidates;
17121714 candidates.reserve (cur_p->size );
17131715
@@ -1721,6 +1723,7 @@ std::vector<CandidateInfo> get_top_n_candidates(const llama_token_data_array * c
17211723 candidates.resize (std::min (n, candidates.size ()));
17221724 return candidates;
17231725}
1726+ #endif // DEBUG
17241727
17251728static void GetOverlappingTokenSequences (const struct llama_model * model, const std::string& str, std::unordered_multimap<llama_token, std::vector<llama_token>>& token_sequences, int max_tail_len = -1 ) {
17261729 const int n_vocab = llama_n_vocab (model);
@@ -1766,7 +1769,7 @@ static void GetOverlappingTokenSequences(const struct llama_model * model, const
17661769
17671770
17681771
1769- static const char * llama_sampler_dry_name (const struct llama_sampler * smpl) {
1772+ static const char * llama_sampler_dry_name (const struct llama_sampler * /* smpl*/ ) {
17701773 return " dry" ;
17711774}
17721775
@@ -1798,7 +1801,7 @@ static void llama_sampler_dry_apply(struct llama_sampler * smpl, llama_token_dat
17981801
17991802 // Step 1: Look for restart sequences
18001803 int rep_limit = last_n_repeat;
1801- for (size_t i = 0 ; i < last_n_repeat; ++i) {
1804+ for (int i = 0 ; i < last_n_repeat; ++i) {
18021805 llama_token token = ctx->last_tokens .rat (i);
18031806 auto its = ctx->dry_processed_breakers .equal_range (token);
18041807 if (its.first == ctx->dry_processed_breakers .end ()) {
@@ -1809,7 +1812,7 @@ static void llama_sampler_dry_apply(struct llama_sampler * smpl, llama_token_dat
18091812 int seq_len = (int )it->second .size ();
18101813 if (seq_len > longest_match && seq_len <= (int )i) {
18111814 bool match = true ;
1812- for (size_t offset = 0 ; offset < seq_len; ++offset) {
1815+ for (int offset = 0 ; offset < seq_len; ++offset) {
18131816 if (it->second [offset] != ctx->last_tokens .rat (i - offset - 1 )) {
18141817 match = false ;
18151818 break ;
@@ -1868,7 +1871,7 @@ static void llama_sampler_dry_apply(struct llama_sampler * smpl, llama_token_dat
18681871 }
18691872
18701873 // Step 3: Find maximum repeat length for each token
1871- for (size_t i = 0 ; i < last_n_repeat - 1 ; ++i) {
1874+ for (int i = 0 ; i < last_n_repeat - 1 ; ++i) {
18721875 int repeat_len = ctx->dry_repeat_count [i];
18731876 if (repeat_len >= ctx->dry_allowed_length ) {
18741877 llama_token token = ctx->last_tokens .rat (last_n_repeat - 2 - i);
@@ -1891,7 +1894,7 @@ static void llama_sampler_dry_apply(struct llama_sampler * smpl, llama_token_dat
18911894 const size_t top_n = 10 ;
18921895 std::vector<CandidateInfo> top_n_before = get_top_n_candidates (cur_p, top_n);
18931896
1894- #endif
1897+ #endif // DEBUG
18951898
18961899 for (size_t i = 0 ; i < cur_p->size ; ++i) {
18971900 const auto & af_kvp = ctx->dry_max_token_repeat .find (cur_p->data [i].id );
@@ -1910,7 +1913,7 @@ static void llama_sampler_dry_apply(struct llama_sampler * smpl, llama_token_dat
19101913 std::string token_text = detokenize_for_display (ctx->model , cur_p->data [i].id , true );
19111914 LLAMA_LOG_INFO (" Applied penalty %.4f to token %d (%s) (repeat length %d)\n " ,
19121915 penalty, cur_p->data [i].id , token_text.c_str (), af_kvp->second );
1913- #endif
1916+ #endif // DEBUG
19141917 }
19151918 }
19161919
@@ -1996,16 +1999,16 @@ struct llama_sampler * llama_sampler_init_dry(const struct llama_model * model,
19961999 return new llama_sampler {
19972000 /* .iface = */ &llama_sampler_dry_i,
19982001 /* .ctx = */ new llama_sampler_dry {
1999- /* .model = */ model,
2000- /* .total_context_size = */ context_size,
2001- /* .dry_multiplier = */ dry_multiplier,
2002- /* .dry_base = */ dry_base,
2003- /* .dry_allowed_length = */ dry_allowed_length,
2004- /* .dry_penalty_last_n = */ dry_penalty_last_n,
2002+ /* .model = */ model,
2003+ /* .total_context_size = */ context_size,
2004+ /* .dry_multiplier = */ dry_multiplier,
2005+ /* .dry_base = */ dry_base,
2006+ /* .dry_allowed_length = */ dry_allowed_length,
2007+ /* .dry_penalty_last_n = */ dry_penalty_last_n,
20052008 /* .dry_processed_breakers = */ {},
2006- /* .dry_repeat_count = */ std::vector<int >(effective_dry_penalty_last_n, 0 ),
2007- /* .dry_max_token_repeat = */ {},
2008- /* .last_tokens = */ ring_buffer<llama_token>(effective_dry_penalty_last_n),
2009+ /* .dry_repeat_count = */ std::vector<int >(effective_dry_penalty_last_n, 0 ),
2010+ /* .dry_max_token_repeat = */ {},
2011+ /* .last_tokens = */ ring_buffer<llama_token>(effective_dry_penalty_last_n),
20092012 },
20102013 };
20112014}
0 commit comments