diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 943e290..32e80fe 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1446,24 +1446,42 @@ struct ArgumentsExpression { static std::string strip(const std::string & s, const std::string & chars = "", bool left = true, bool right = true) { auto charset = chars.empty() ? " \t\n\r" : chars; auto start = left ? s.find_first_not_of(charset) : 0; - if (start == std::string::npos) return ""; + if (start == std::string::npos) return ""; auto end = right ? s.find_last_not_of(charset) : s.size() - 1; return s.substr(start, end - start + 1); } -static std::vector split(const std::string & s, const std::string & sep) { +static std::vector split(const std::string & s, const std::string & sep, int maxsplit = -1) { std::vector result; size_t start = 0; size_t end = s.find(sep); - while (end != std::string::npos) { + int splits = 0; + while (end != std::string::npos && (maxsplit < 0 || splits < maxsplit)) { result.push_back(s.substr(start, end - start)); start = end + sep.length(); end = s.find(sep, start); + splits++; } result.push_back(s.substr(start)); return result; } +static std::vector rsplit(const std::string & s, const std::string & sep, int maxsplit = -1) { + std::vector result; + size_t end = s.length(); + size_t pos = s.rfind(sep); + int splits = 0; + while (pos != std::string::npos && (maxsplit < 0 || splits < maxsplit)) { + result.insert(result.begin(), s.substr(pos + sep.length(), end - pos - sep.length())); + end = pos; + splits++; + if (pos == 0) break; + pos = s.rfind(sep, pos - 1); + } + result.insert(result.begin(), s.substr(0, end)); + return result; +} + static std::string capitalize(const std::string & s) { if (s.empty()) return s; auto result = s; @@ -1566,9 +1584,20 @@ class MethodCallExpr : public Expression { auto chars = vargs.args.empty() ? "" : vargs.args[0].get(); return Value(strip(str, chars, /* left= */ false, /* right= */ true)); } else if (method->get_name() == "split") { - vargs.expectArgs("split method", {1, 1}, {0, 0}); + vargs.expectArgs("split method", {1, 2}, {0, 0}); + auto sep = vargs.args[0].get(); + int maxsplit = vargs.args.size() > 1 ? vargs.args[1].to_int() : -1; + auto parts = split(str, sep, maxsplit); + Value result = Value::array(); + for (const auto& part : parts) { + result.push_back(Value(part)); + } + return result; + } else if (method->get_name() == "rsplit") { + vargs.expectArgs("rsplit method", {1, 2}, {0, 0}); auto sep = vargs.args[0].get(); - auto parts = split(str, sep); + int maxsplit = vargs.args.size() > 1 ? vargs.args[1].to_int() : -1; + auto parts = rsplit(str, sep, maxsplit); Value result = Value::array(); for (const auto& part : parts) { result.push_back(Value(part)); diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index ebab4eb..9dd7c7c 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -79,6 +79,18 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ("bcXYZab", render("{{ 'abcXYZabc'.strip('ac') }}", {}, {})); EXPECT_EQ(R"(["a", "b"])", render("{{ 'a b'.split(' ') | tojson }}", {}, {})); + + // Test rsplit (reverse split) with maxsplit parameter + // rsplit splits from right to left, which is crucial for extracting content after the last delimiter + // Used in chat templates like DeepSeek-R1: content.rsplit('', 1)[-1] + EXPECT_EQ(R"(["a-b", "c"])", render("{{ 'a-b-c'.rsplit('-', 1) | tojson }}", {}, {})); + EXPECT_EQ(R"(["a", "b-c"])", render("{{ 'a-b-c'.split('-', 1) | tojson }}", {}, {})); + EXPECT_EQ(R"(["prefixmiddle", "suffix"])", render("{{ 'prefixmiddlesuffix'.rsplit('', 1) | tojson }}", {}, {})); + + // Test rsplit with indexing - extract content after the last delimiter + EXPECT_EQ(" suffix", render("{{ 'prefixmiddle suffix'.rsplit('', 1)[-1] }}", {}, {})); + EXPECT_EQ(R"(["a", "b", "c"])", render("{{ 'a-b-c'.rsplit('-') | tojson }}", {}, {})); + EXPECT_EQ( "Ok",