@@ -835,7 +835,7 @@ struct llm_tokenizer_ugm_session {
835835 }
836836
837837 // initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
838- std::vector<struct best_tokenization > tokenization_results (input_len + 1 , {vocab.token_unk (), 0 , -FLT_MAX });
838+ std::vector<struct best_tokenization > tokenization_results (input_len + 1 , {vocab.token_unk (), 0 , -DBL_MAX });
839839 // at the beginning tokenization score is zero
840840 tokenization_results[0 ] = { vocab.token_unk (), 0 , 0 };
841841
@@ -867,7 +867,7 @@ struct llm_tokenizer_ugm_session {
867867 const double challenger_score = current_best.score_sum + token_score;
868868 struct best_tokenization & current_champ = tokenization_results[prefix_offset];
869869 if (challenger_score > current_champ.score_sum ) {
870- struct best_tokenization challenger = { token_id, input_offset, ( float ) challenger_score };
870+ struct best_tokenization challenger = { token_id, input_offset, challenger_score };
871871 current_champ = challenger;
872872 }
873873 }
@@ -881,7 +881,7 @@ struct llm_tokenizer_ugm_session {
881881 prefix_offset = input_offset + n_utf8_code_units;
882882 struct best_tokenization & current_champ = tokenization_results[prefix_offset];
883883 if (challenger_score > current_champ.score_sum ) {
884- struct best_tokenization challenger = { vocab.token_unk (), input_offset, ( float ) challenger_score };
884+ struct best_tokenization challenger = { vocab.token_unk (), input_offset, challenger_score };
885885 current_champ = challenger;
886886 }
887887 }
@@ -1007,7 +1007,7 @@ struct llm_tokenizer_ugm_session {
10071007 struct best_tokenization {
10081008 llama_token token_id;
10091009 size_t input_offset;
1010- float score_sum;
1010+ double score_sum;
10111011 };
10121012
10131013 struct normalization_result normalize_prefix (const std::string & input, size_t input_offset) {
0 commit comments