vocab : INT32_MIN from llama_tokenize on overflow

retr0reg · retr0reg · commit d087f7495055 · 2025-06-20T01:22:46.000+08:00
diff --git a/common/common.cpp b/common/common.cpp
@@ -1284,6 +1284,9 @@ std::vector<llama_token> common_tokenize(
     int n_tokens = text.length() + 2 * add_special;
     std::vector<llama_token> result(n_tokens);
     n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+    if (n_tokens == std::numeric_limits<int32_t>::min()) {
+        throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
+    }
     if (n_tokens < 0) {
         result.resize(-n_tokens);
         int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
diff --git a/include/llama.h b/include/llama.h
@@ -1087,6 +1087,7 @@ extern "C" {
     /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
     /// @return Returns the number of tokens on success, no more than n_tokens_max
     /// @return Returns a negative number on failure - the number of tokens that would have been returned
+    /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
     /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
     /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
     ///                      as plaintext. Does not insert a leading space.
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -3062,7 +3062,7 @@ int32_t llama_vocab::tokenize(
     auto res = tokenize(std::string(text, text_len), add_special, parse_special);
     if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
         LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
-        return -1;
+        return std::numeric_limits<int32_t>::min();
     }
     
     if (n_tokens_max < (int) res.size()) {

Original file line number	Diff line number	Diff line change
`@@ -3062,7 +3062,7 @@ int32_t llama_vocab::tokenize(`
`3062`	`3062`	`auto res = tokenize(std::string(text, text_len), add_special, parse_special);`
`3063`	`3063`	`if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {`
`3064`	`3064`	`LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());`
`3065`		`- return -1;`
	`3065`	`+ return std::numeric_limits<int32_t>::min();`
`3066`	`3066`	`}`
`3067`	`3067`
`3068`	`3068`	`if (n_tokens_max < (int) res.size()) {`