Enable SmolLM 3 template.

yichunk · yichunk · commit 279ffff2f649 · 2025-07-09T18:55:35.000Z
- Add `replace` method for string type and update unit tests. - Support slice expression with omitted start or end index and update unit tests. - Fix the bug that `in` operator does not work for string type. - Update testing script to support fetching `chat-template.jinja` when the template is not in `tokenizer_config.json`. Test passed with SmolLM 3B model: https://huggingface.co/HuggingFaceTB/SmolLM3-3B
diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp
@@ -1355,8 +1355,13 @@ class BinaryOpExpr : public Expression {
               case Op::Gt:        return l > r;
               case Op::Le:        return l <= r;
               case Op::Ge:        return l >= r;
-              case Op::In:        return (r.is_array() || r.is_object()) && r.contains(l);
-              case Op::NotIn:     return !(r.is_array() && r.contains(l));
+              case Op::In:        return (((r.is_array() || r.is_object()) && r.contains(l)) ||
+                                          (l.is_string() && r.is_string() &&
+                                            r.to_str().find(l.to_str()) != std::string::npos));
+              case Op::NotIn:
+                                  return !(((r.is_array() || r.is_object()) && r.contains(l)) ||
+                                            (l.is_string() && r.is_string() &&
+                                              r.to_str().find(l.to_str()) != std::string::npos));
               default:            break;
           }
           throw std::runtime_error("Unknown binary operator");
@@ -1552,6 +1557,19 @@ class MethodCallExpr : public Expression {
               else res[i] = std::tolower(res[i]);
             }
             return res;
+          } else if (method->get_name() == "replace") {
+            vargs.expectArgs("replace method", {2, 3}, {0, 0});
+            auto before = vargs.args[0].get<std::string>();
+            auto after = vargs.args[1].get<std::string>();
+            auto count = vargs.args.size() == 3 ? vargs.args[2].get<int64_t>()
+                                                : str.length();
+            size_t start_pos = 0;
+            while ((start_pos = str.find(before, start_pos)) != std::string::npos &&
+                  count-- > 0) {
+              str.replace(start_pos, before.length(), after);
+              start_pos += after.length();
+            }
+            return str;
           }
         }
         throw std::runtime_error("Unknown method: " + method->get_name());
@@ -2128,7 +2146,7 @@ class Parser {
             }
           }
   
-          if ((has_first_colon || has_second_colon) && (start || end || step)) {
+          if ((has_first_colon || has_second_colon)) {
             index = std::make_shared<SliceExpr>(slice_loc, std::move(start), std::move(end), std::move(step));
           } else {
             index = std::move(start);
diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py
@@ -427,7 +427,15 @@ async def process_model(output_folder: str, model_id: str, contexts: list[Contex
         except json.JSONDecodeError:
             config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str))
 
-        assert 'chat_template' in config, 'No "chat_template" entry in tokenizer_config.json!'
+        if 'chat_template' not in config:
+            try:
+                chat_template = await async_hf_download(model_id, "chat_template.jinja")
+                config.update({'chat_template': chat_template})
+            except Exception as e:
+                logger.error(f"Failed to fetch chat_template.jinja for model {model_id}: {e}")
+                raise e
+
+        assert 'chat_template' in config, 'No "chat_template" entry in tokenizer_config.json or no chat_template.jinja file found!'
         chat_template = config['chat_template']
         if isinstance(chat_template, str):
             await handle_chat_template(output_folder, model_id, None, chat_template, contexts)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -148,6 +148,7 @@ set(MODEL_IDS
     HuggingFaceTB/SmolLM2-1.7B-Instruct
     HuggingFaceTB/SmolLM2-135M-Instruct
     HuggingFaceTB/SmolLM2-360M-Instruct
+    HuggingFaceTB/SmolLM3-3B
     huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated
     huihui-ai/DeepSeek-R1-Distill-Llama-8B-abliterated
     huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2
diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp
@@ -84,6 +84,10 @@ TEST(SyntaxTest, SimpleCases) {
     EXPECT_EQ(
         "Ok",
         render("{{ 'ok'.capitalize() }}", {}, {}));
+    EXPECT_EQ("obcXYZobcXYZobc",
+        render("{{ 'abcXYZabcXYZabc'.replace('a', 'o') }}", {}, {}));
+    EXPECT_EQ("okcXYZokcXYZabc",
+        render("{{ 'abcXYZabcXYZabc'.replace('ab', 'ok', 2) }}", {}, {}));
 
     EXPECT_EQ(
         "ok",
@@ -199,6 +203,10 @@ TEST(SyntaxTest, SimpleCases) {
     EXPECT_EQ(
         "True,False",
         render(R"({{ 'a' in ["a"] }},{{ 'a' in [] }})", {}, {}));
+    EXPECT_EQ("True,False",
+        render(R"({{ 'a' in 'abc' }},{{ 'd' in 'abc' }})", {}, {}));
+    EXPECT_EQ("False,True",
+        render(R"({{ 'a' not in 'abc' }},{{ 'd' not in 'abc' }})", {}, {}));
     EXPECT_EQ(
         R"([{'a': 1}])",
         render(R"({{ [{"a": 1}, {"a": 2}, {}] | selectattr("a", "equalto", 1) | list }})", {}, {}));
@@ -481,8 +489,8 @@ TEST(SyntaxTest, SimpleCases) {
         "[1, 2, 3][0, 1][1, 2]",
         render("{% set x = [0, 1, 2, 3] %}{{ x[1:] }}{{ x[:2] }}{{ x[1:3] }}", {}, {}));
     EXPECT_EQ(
-        "123;01;12",
-        render("{% set x = '0123' %}{{ x[1:] }};{{ x[:2] }};{{ x[1:3] }}", {}, {}));
+        "123;01;12;0123;0123",
+        render("{% set x = '0123' %}{{ x[1:] }};{{ x[:2] }};{{ x[1:3] }};{{ x[:] }};{{ x[::] }}", {}, {}));
     EXPECT_EQ(
         "[3, 2, 1, 0][3, 2, 1][2, 1, 0][2, 1][0, 2][3, 1][2, 0]",
         render("{% set x = [0, 1, 2, 3] %}{{ x[::-1] }}{{ x[:0:-1] }}{{ x[2::-1] }}{{ x[2:0:-1] }}{{ x[::2] }}{{ x[::-2] }}{{ x[-2::-2] }}", {}, {}));