Skip to content

Commit cdb6fb5

Browse files
committed
vocab : prevent stack overflow in tokenize
1 parent 600e3e9 commit cdb6fb5

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

src/llama-vocab.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3060,6 +3060,10 @@ int32_t llama_vocab::tokenize(
30603060
bool add_special,
30613061
bool parse_special) const {
30623062
auto res = tokenize(std::string(text, text_len), add_special, parse_special);
3063+
if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
3064+
GGML_ABORT("tokenization result size %zu exceeds int32_t limit", res.size());
3065+
}
3066+
30633067
if (n_tokens_max < (int) res.size()) {
30643068
// LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
30653069
return -((int) res.size());

0 commit comments

Comments
 (0)