Skip to content

Commit 279ffff

Browse files
committed
Enable SmolLM 3 template.
- Add `replace` method for string type and update unit tests. - Support slice expression with omitted start or end index and update unit tests. - Fix the bug that `in` operator does not work for string type. - Update testing script to support fetching `chat-template.jinja` when the template is not in `tokenizer_config.json`. Test passed with SmolLM 3B model: https://huggingface.co/HuggingFaceTB/SmolLM3-3B
1 parent f06140f commit 279ffff

File tree

4 files changed

+41
-6
lines changed

4 files changed

+41
-6
lines changed

include/minja/minja.hpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,8 +1355,13 @@ class BinaryOpExpr : public Expression {
13551355
case Op::Gt: return l > r;
13561356
case Op::Le: return l <= r;
13571357
case Op::Ge: return l >= r;
1358-
case Op::In: return (r.is_array() || r.is_object()) && r.contains(l);
1359-
case Op::NotIn: return !(r.is_array() && r.contains(l));
1358+
case Op::In: return (((r.is_array() || r.is_object()) && r.contains(l)) ||
1359+
(l.is_string() && r.is_string() &&
1360+
r.to_str().find(l.to_str()) != std::string::npos));
1361+
case Op::NotIn:
1362+
return !(((r.is_array() || r.is_object()) && r.contains(l)) ||
1363+
(l.is_string() && r.is_string() &&
1364+
r.to_str().find(l.to_str()) != std::string::npos));
13601365
default: break;
13611366
}
13621367
throw std::runtime_error("Unknown binary operator");
@@ -1552,6 +1557,19 @@ class MethodCallExpr : public Expression {
15521557
else res[i] = std::tolower(res[i]);
15531558
}
15541559
return res;
1560+
} else if (method->get_name() == "replace") {
1561+
vargs.expectArgs("replace method", {2, 3}, {0, 0});
1562+
auto before = vargs.args[0].get<std::string>();
1563+
auto after = vargs.args[1].get<std::string>();
1564+
auto count = vargs.args.size() == 3 ? vargs.args[2].get<int64_t>()
1565+
: str.length();
1566+
size_t start_pos = 0;
1567+
while ((start_pos = str.find(before, start_pos)) != std::string::npos &&
1568+
count-- > 0) {
1569+
str.replace(start_pos, before.length(), after);
1570+
start_pos += after.length();
1571+
}
1572+
return str;
15551573
}
15561574
}
15571575
throw std::runtime_error("Unknown method: " + method->get_name());
@@ -2128,7 +2146,7 @@ class Parser {
21282146
}
21292147
}
21302148

2131-
if ((has_first_colon || has_second_colon) && (start || end || step)) {
2149+
if ((has_first_colon || has_second_colon)) {
21322150
index = std::make_shared<SliceExpr>(slice_loc, std::move(start), std::move(end), std::move(step));
21332151
} else {
21342152
index = std::move(start);

scripts/fetch_templates_and_goldens.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,15 @@ async def process_model(output_folder: str, model_id: str, contexts: list[Contex
427427
except json.JSONDecodeError:
428428
config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str))
429429

430-
assert 'chat_template' in config, 'No "chat_template" entry in tokenizer_config.json!'
430+
if 'chat_template' not in config:
431+
try:
432+
chat_template = await async_hf_download(model_id, "chat_template.jinja")
433+
config.update({'chat_template': chat_template})
434+
except Exception as e:
435+
logger.error(f"Failed to fetch chat_template.jinja for model {model_id}: {e}")
436+
raise e
437+
438+
assert 'chat_template' in config, 'No "chat_template" entry in tokenizer_config.json or no chat_template.jinja file found!'
431439
chat_template = config['chat_template']
432440
if isinstance(chat_template, str):
433441
await handle_chat_template(output_folder, model_id, None, chat_template, contexts)

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ set(MODEL_IDS
148148
HuggingFaceTB/SmolLM2-1.7B-Instruct
149149
HuggingFaceTB/SmolLM2-135M-Instruct
150150
HuggingFaceTB/SmolLM2-360M-Instruct
151+
HuggingFaceTB/SmolLM3-3B
151152
huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated
152153
huihui-ai/DeepSeek-R1-Distill-Llama-8B-abliterated
153154
huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2

tests/test-syntax.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ TEST(SyntaxTest, SimpleCases) {
8484
EXPECT_EQ(
8585
"Ok",
8686
render("{{ 'ok'.capitalize() }}", {}, {}));
87+
EXPECT_EQ("obcXYZobcXYZobc",
88+
render("{{ 'abcXYZabcXYZabc'.replace('a', 'o') }}", {}, {}));
89+
EXPECT_EQ("okcXYZokcXYZabc",
90+
render("{{ 'abcXYZabcXYZabc'.replace('ab', 'ok', 2) }}", {}, {}));
8791

8892
EXPECT_EQ(
8993
"ok",
@@ -199,6 +203,10 @@ TEST(SyntaxTest, SimpleCases) {
199203
EXPECT_EQ(
200204
"True,False",
201205
render(R"({{ 'a' in ["a"] }},{{ 'a' in [] }})", {}, {}));
206+
EXPECT_EQ("True,False",
207+
render(R"({{ 'a' in 'abc' }},{{ 'd' in 'abc' }})", {}, {}));
208+
EXPECT_EQ("False,True",
209+
render(R"({{ 'a' not in 'abc' }},{{ 'd' not in 'abc' }})", {}, {}));
202210
EXPECT_EQ(
203211
R"([{'a': 1}])",
204212
render(R"({{ [{"a": 1}, {"a": 2}, {}] | selectattr("a", "equalto", 1) | list }})", {}, {}));
@@ -481,8 +489,8 @@ TEST(SyntaxTest, SimpleCases) {
481489
"[1, 2, 3][0, 1][1, 2]",
482490
render("{% set x = [0, 1, 2, 3] %}{{ x[1:] }}{{ x[:2] }}{{ x[1:3] }}", {}, {}));
483491
EXPECT_EQ(
484-
"123;01;12",
485-
render("{% set x = '0123' %}{{ x[1:] }};{{ x[:2] }};{{ x[1:3] }}", {}, {}));
492+
"123;01;12;0123;0123",
493+
render("{% set x = '0123' %}{{ x[1:] }};{{ x[:2] }};{{ x[1:3] }};{{ x[:] }};{{ x[::] }}", {}, {}));
486494
EXPECT_EQ(
487495
"[3, 2, 1, 0][3, 2, 1][2, 1, 0][2, 1][0, 2][3, 1][2, 0]",
488496
render("{% set x = [0, 1, 2, 3] %}{{ x[::-1] }}{{ x[:0:-1] }}{{ x[2::-1] }}{{ x[2:0:-1] }}{{ x[::2] }}{{ x[::-2] }}{{ x[-2::-2] }}", {}, {}));

0 commit comments

Comments
 (0)