Skip to content

Commit c55c1ba

Browse files
authored
set mask token lstrip attribute
1 parent cb5d91b commit c55c1ba

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

src/llama-vocab.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,9 +2080,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
20802080

20812081
std::string model_name;
20822082
std::string tokenizer_pre;
2083+
std::string general_arch;
20832084

20842085
ml.get_key(LLM_KV_GENERAL_NAME, model_name, false);
20852086
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
2087+
ml.get_key(LLM_KV_GENERAL_ARCHITECTURE, general_arch, false);
20862088

20872089
// model name to lowercase
20882090
std::transform(model_name.begin(), model_name.end(), model_name.begin(),
@@ -2091,8 +2093,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
20912093
}
20922094
);
20932095

2094-
// set attributes by model/tokenizer name
2095-
if (_contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})) {
2096+
// set attributes by model/tokenizer/architecture name
2097+
if (false
2098+
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
2099+
|| _contains_any(general_arch, {"nomic-bert-moe"})
2100+
) {
20962101
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
20972102
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
20982103
for (auto id : cache_special_tokens) {

0 commit comments

Comments
 (0)