vocab : prevent stack overflow in tokenize

retr0reg · retr0reg · commit cdb6fb5293df · 2025-06-19T21:16:44.000+08:00
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -3060,6 +3060,10 @@ int32_t llama_vocab::tokenize(
                         bool   add_special,
                         bool   parse_special) const {
     auto res = tokenize(std::string(text, text_len), add_special, parse_special);
+    if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
+        GGML_ABORT("tokenization result size %zu exceeds int32_t limit", res.size());
+    }
+    
     if (n_tokens_max < (int) res.size()) {
         // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
         return -((int) res.size());