@@ -65,13 +65,13 @@ constexpr int draft_min_percent_strict[LLAMA_NGRAM_MAX] = {75, 66, 66, 66};
6565static llama_token try_draft (common_ngram_cache & nc_static, const common_ngram ngram_static) {
6666 common_ngram_cache::iterator part_static_it = nc_static.find (ngram_static);
6767 if (part_static_it == nc_static.end ()) {
68- return - 1 ;
68+ return LLAMA_TOKEN_NULL ;
6969 }
7070 const common_ngram_cache_part part_static = part_static_it->second ;
7171
7272 int max_count_static = 0 ;
7373 int sum_count_static = 0 ;
74- llama_token max_token = - 1 ;
74+ llama_token max_token = LLAMA_TOKEN_NULL ;
7575
7676 for (std::pair<llama_token, int > token_count_static : part_static) {
7777 const llama_token token = token_count_static.first ;
@@ -85,10 +85,10 @@ static llama_token try_draft(common_ngram_cache & nc_static, const common_ngram
8585 }
8686
8787 if (sum_count_static < draft_min_sample_size_lax[LLAMA_NGRAM_STATIC-1 ]) {
88- return - 1 ;
88+ return LLAMA_TOKEN_NULL ;
8989 }
9090 if (100 *max_count_static < draft_min_percent_lax[LLAMA_NGRAM_STATIC-1 ]*sum_count_static) {
91- return - 1 ;
91+ return LLAMA_TOKEN_NULL ;
9292 }
9393 return max_token;
9494}
@@ -98,9 +98,9 @@ static llama_token try_draft(
9898 common_ngram_cache & nc_primary, const std::vector<common_ngram> & ngrams_primary, common_ngram_cache_part & part_static,
9999 const int * min_sample_size, const int * min_percent) {
100100
101- llama_token drafted_token = - 1 ;
101+ llama_token drafted_token = LLAMA_TOKEN_NULL ;
102102
103- for (int i = ngrams_primary.size ()-1 ; i >= 0 && drafted_token == - 1 ; --i) {
103+ for (int i = ngrams_primary.size ()-1 ; i >= 0 && drafted_token == LLAMA_TOKEN_NULL ; --i) {
104104 const common_ngram ngram_primary = ngrams_primary[i];
105105
106106 common_ngram_cache::iterator part_primary_it = nc_primary.find (ngram_primary);
@@ -112,7 +112,7 @@ static llama_token try_draft(
112112 int max_count_primary = 0 ;
113113 int max_count_static = 0 ;
114114 int sum_count_primary = 0 ;
115- llama_token max_token = - 1 ;
115+ llama_token max_token = LLAMA_TOKEN_NULL ;
116116
117117 for (std::pair<llama_token, int > token_count_primary : part_primary) {
118118 const llama_token token = token_count_primary.first ;
@@ -154,7 +154,7 @@ void common_ngram_cache_draft(
154154 }
155155
156156 while ((int ) draft.size ()-1 < n_draft) {
157- llama_token drafted_token = - 1 ;
157+ llama_token drafted_token = LLAMA_TOKEN_NULL ;
158158
159159 const int ngram_start_static = inp_size-LLAMA_NGRAM_STATIC + draft.size ()-1 ;
160160 common_ngram ngram_static;
@@ -177,17 +177,17 @@ void common_ngram_cache_draft(
177177 }
178178 ngrams_cd.push_back (ngram_cd);
179179 }
180- if (drafted_token == - 1 ) {
180+ if (drafted_token == LLAMA_TOKEN_NULL ) {
181181 drafted_token = try_draft (nc_context, ngrams_cd, part_static, draft_min_sample_size_lax, draft_min_percent_lax);
182182 }
183- if (drafted_token == - 1 ) {
183+ if (drafted_token == LLAMA_TOKEN_NULL ) {
184184 drafted_token = try_draft (nc_dynamic, ngrams_cd, part_static, draft_min_sample_size_strict, draft_min_percent_strict);
185185 }
186- if (drafted_token == - 1 ) {
186+ if (drafted_token == LLAMA_TOKEN_NULL ) {
187187 drafted_token = try_draft (nc_static, ngram_static);
188188 }
189189
190- if (drafted_token == - 1 ) {
190+ if (drafted_token == LLAMA_TOKEN_NULL ) {
191191 break ;
192192 }
193193
0 commit comments