|
9 | 9 | #include <nlohmann/json.hpp> |
10 | 10 |
|
11 | 11 | #if defined(_WIN32) |
12 | | -# ifndef NOMINMAX |
13 | | -# define NOMINMAX |
14 | | -# endif |
15 | 12 | # include <windows.h> |
16 | 13 | # include <io.h> |
17 | 14 | #else |
@@ -943,29 +940,16 @@ static int apply_chat_template(const struct common_chat_templates * tmpls, Llama |
943 | 940 | static int tokenize_prompt(const llama_vocab * vocab, const std::string & prompt, |
944 | 941 | std::vector<llama_token> & prompt_tokens, const LlamaData & llama_data) { |
945 | 942 | const bool is_first = llama_memory_seq_pos_max(llama_get_memory(llama_data.context.get()), 0) == -1; |
946 | | - int n_tokens = prompt.size() + 2 * is_first; |
947 | | - prompt_tokens.resize(n_tokens); |
948 | | - n_tokens = llama_tokenize(vocab, prompt.c_str(), prompt.size(), |
949 | | - prompt_tokens.data(), prompt_tokens.size(), |
950 | | - is_first, /*parse_special =*/true); |
951 | | - if (n_tokens == std::numeric_limits<int32_t>::min()) { |
952 | | - printe("tokenization failed: input too large\n"); |
| 943 | + |
| 944 | + const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, is_first, true); |
| 945 | + prompt_tokens.resize(n_prompt_tokens); |
| 946 | + if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), is_first, |
| 947 | + true) < 0) { |
| 948 | + printe("failed to tokenize the prompt\n"); |
953 | 949 | return -1; |
954 | 950 | } |
955 | | - if (n_tokens < 0) { |
956 | | - prompt_tokens.resize(-n_tokens); |
957 | | - int check = llama_tokenize(vocab, prompt.c_str(), prompt.size(), |
958 | | - prompt_tokens.data(), prompt_tokens.size(), |
959 | | - is_first, /*parse_special =*/true); |
960 | | - if (check != -n_tokens) { |
961 | | - printe("failed to tokenize the prompt (size mismatch)\n"); |
962 | | - return -1; |
963 | | - } |
964 | | - n_tokens = check; |
965 | | - } else { |
966 | | - prompt_tokens.resize(n_tokens); |
967 | | - } |
968 | | - return n_tokens; |
| 951 | + |
| 952 | + return n_prompt_tokens; |
969 | 953 | } |
970 | 954 |
|
971 | 955 | // Check if we have enough space in the context to evaluate this batch |
|
0 commit comments