vocab : return error instead of aborting on oversized token count

retr0reg · retr0reg · commit f5972a1f6fa3 · 2025-06-19T22:03:50.000+08:00
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -3061,7 +3061,8 @@ int32_t llama_vocab::tokenize(
                         bool   parse_special) const {
     auto res = tokenize(std::string(text, text_len), add_special, parse_special);
     if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
-        GGML_ABORT("tokenization result size %zu exceeds int32_t limit", res.size());
+        LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
+        return -1;
     }
     
     if (n_tokens_max < (int) res.size()) {

Original file line number	Diff line number	Diff line change
`@@ -3061,7 +3061,8 @@ int32_t llama_vocab::tokenize(`
`3061`	`3061`	`bool parse_special) const {`
`3062`	`3062`	`auto res = tokenize(std::string(text, text_len), add_special, parse_special);`
`3063`	`3063`	`if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {`
`3064`		`- GGML_ABORT("tokenization result size %zu exceeds int32_t limit", res.size());`
	`3064`	`+ LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());`
	`3065`	`+ return -1;`
`3065`	`3066`	`}`
`3066`	`3067`
`3067`	`3068`	`if (n_tokens_max < (int) res.size()) {`