diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java
index e2508af219..b86944c280 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java
@@ -75,6 +75,7 @@ public enum BuiltinFunctionName {
MVAPPEND(FunctionName.of("mvappend")),
MVJOIN(FunctionName.of("mvjoin")),
MVINDEX(FunctionName.of("mvindex")),
+ SPLIT(FunctionName.of("split")),
FORALL(FunctionName.of("forall")),
EXISTS(FunctionName.of("exists")),
FILTER(FunctionName.of("filter")),
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/SplitFunctionImp.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/SplitFunctionImp.java
new file mode 100644
index 0000000000..0672772c0f
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/SplitFunctionImp.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.expression.function.CollectionUDF;
+
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.fun.SqlLibraryOperators;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.opensearch.sql.expression.function.PPLFuncImpTable;
+
+/**
+ * SPLIT function implementation that splits strings by delimiter.
+ *
+ *
Usage: split(str, delimiter)
+ *
+ *
Returns an array of strings split on the delimiter.
+ *
+ *
Special behavior:
+ *
+ *
+ * - Empty delimiter ("") splits into individual characters
+ *
- If delimiter not found, returns array with original string
+ *
- Empty string returns empty array
+ *
+ *
+ * Implementation notes:
+ *
+ *
+ * - Uses Calcite's SPLIT for non-empty delimiters
+ *
- Uses custom character splitting for empty delimiter via REGEXP_REPLACE
+ *
+ */
+public class SplitFunctionImp implements PPLFuncImpTable.FunctionImp {
+
+ @Override
+ public RexNode resolve(RexBuilder builder, RexNode... args) {
+ RexNode str = args[0];
+ RexNode delimiter = args[1];
+
+ // Check if delimiter is empty string
+ // If empty, split into individual characters using a workaround
+ // If not empty, use Calcite's SPLIT function
+
+ // Create condition: delimiter = ''
+ RexNode emptyString = builder.makeLiteral("");
+ RexNode isEmptyDelimiter = builder.makeCall(SqlStdOperatorTable.EQUALS, delimiter, emptyString);
+
+ // For empty delimiter: split into characters
+ // Pattern: Insert a delimiter between each character using regex
+ // 'abcd' -> 'a|b|c|d' -> split on '|'
+ RexNode regexPattern = builder.makeLiteral("(?<=.)(?=.)");
+ RexNode replacement = builder.makeLiteral("|");
+
+ // Use REGEXP_REPLACE to insert delimiter between characters
+ SqlOperator regexpReplace = SqlLibraryOperators.REGEXP_REPLACE_3;
+ RexNode withDelimiters = builder.makeCall(regexpReplace, str, regexPattern, replacement);
+
+ // Then split on the inserted delimiter
+ RexNode pipeDelimiter = builder.makeLiteral("|");
+ RexNode splitChars = builder.makeCall(SqlLibraryOperators.SPLIT, withDelimiters, pipeDelimiter);
+
+ // For non-empty delimiter: use standard SPLIT
+ RexNode normalSplit = builder.makeCall(SqlLibraryOperators.SPLIT, str, delimiter);
+
+ // Use CASE to choose between the two approaches
+ // CASE WHEN isEmptyDelimiter THEN splitChars ELSE normalSplit END
+ return builder.makeCall(SqlStdOperatorTable.CASE, isEmptyDelimiter, splitChars, normalSplit);
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
index bb5160f0a1..da60df6916 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
@@ -193,6 +193,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SINH;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN_BUCKET;
+import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPLIT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SQRT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_POP;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_SAMP;
@@ -285,6 +286,7 @@
import org.opensearch.sql.exception.ExpressionEvaluationException;
import org.opensearch.sql.executor.QueryType;
import org.opensearch.sql.expression.function.CollectionUDF.MVIndexFunctionImp;
+import org.opensearch.sql.expression.function.CollectionUDF.SplitFunctionImp;
public class PPLFuncImpTable {
private static final Logger logger = LogManager.getLogger(PPLFuncImpTable.class);
@@ -975,6 +977,12 @@ void populate() {
builder.makeCall(SqlLibraryOperators.ARRAY_JOIN, array, delimiter),
PPLTypeChecker.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER));
+ // Register SPLIT with custom logic for empty delimiter
+ register(
+ SPLIT,
+ new SplitFunctionImp(),
+ PPLTypeChecker.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER));
+
// Register MVINDEX to use Calcite's ITEM/ARRAY_SLICE with index normalization
register(
MVINDEX,
diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst
index 5c2b7c30f7..d6da194aca 100644
--- a/docs/user/ppl/functions/collection.rst
+++ b/docs/user/ppl/functions/collection.rst
@@ -186,6 +186,60 @@ Example::
| 120 |
+--------+
+SPLIT
+-----
+
+Description
+>>>>>>>>>>>
+
+Usage: split(str, delimiter) splits the string values on the delimiter and returns the string values as a multivalue field (array). Use an empty string ("") to split the original string into one value per character. If the delimiter is not found, returns an array containing the original string. If the input string is empty, returns an empty array.
+
+Argument type: str: STRING, delimiter: STRING
+
+Return type: ARRAY of STRING
+
+Example::
+
+ os> source=people | eval test = 'buttercup;rarity;tenderhoof;dash', result = split(test, ';') | fields result | head 1
+ fetched rows / total rows = 1/1
+ +------------------------------------+
+ | result |
+ |------------------------------------|
+ | [buttercup,rarity,tenderhoof,dash] |
+ +------------------------------------+
+
+ os> source=people | eval test = '1a2b3c4def567890', result = split(test, 'def') | fields result | head 1
+ fetched rows / total rows = 1/1
+ +------------------+
+ | result |
+ |------------------|
+ | [1a2b3c4,567890] |
+ +------------------+
+
+ os> source=people | eval test = 'abcd', result = split(test, '') | fields result | head 1
+ fetched rows / total rows = 1/1
+ +-----------+
+ | result |
+ |-----------|
+ | [a,b,c,d] |
+ +-----------+
+
+ os> source=people | eval test = 'name::value', result = split(test, '::') | fields result | head 1
+ fetched rows / total rows = 1/1
+ +--------------+
+ | result |
+ |--------------|
+ | [name,value] |
+ +--------------+
+
+ os> source=people | eval test = 'hello', result = split(test, ',') | fields result | head 1
+ fetched rows / total rows = 1/1
+ +---------+
+ | result |
+ |---------|
+ | [hello] |
+ +---------+
+
MVJOIN
------
diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java
index c829565768..bbd0808397 100644
--- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java
@@ -489,4 +489,43 @@ public void testMvindexRangeSingleElement() throws IOException {
verifySchema(actual, schema("result", "array"));
verifyDataRows(actual, rows(List.of(3)));
}
+
+ @Test
+ public void testSplitWithSemicolonDelimiter() throws IOException {
+ JSONObject actual =
+ executeQuery(
+ String.format(
+ "source=%s | eval test = 'buttercup;rarity;tenderhoof;dash;mcintosh', result ="
+ + " split(test, ';') | head 1 | fields result",
+ TEST_INDEX_BANK));
+
+ verifySchema(actual, schema("result", "array"));
+ verifyDataRows(actual, rows(List.of("buttercup", "rarity", "tenderhoof", "dash", "mcintosh")));
+ }
+
+ @Test
+ public void testSplitWithMultiCharDelimiter() throws IOException {
+ JSONObject actual =
+ executeQuery(
+ String.format(
+ "source=%s | eval test = '1a2b3c4def567890', result = split(test, 'def') | head 1 |"
+ + " fields result",
+ TEST_INDEX_BANK));
+
+ verifySchema(actual, schema("result", "array"));
+ verifyDataRows(actual, rows(List.of("1a2b3c4", "567890")));
+ }
+
+ @Test
+ public void testSplitWithEmptyDelimiter() throws IOException {
+ JSONObject actual =
+ executeQuery(
+ String.format(
+ "source=%s | eval test = 'abcd', result = split(test, '') | head 1 | fields result",
+ TEST_INDEX_BANK));
+
+ verifySchema(actual, schema("result", "array"));
+ // Empty delimiter splits into individual characters
+ verifyDataRows(actual, rows(List.of("a", "b", "c", "d")));
+ }
}
diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4
index 2e0643fa28..7f65f9a893 100644
--- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4
+++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4
@@ -442,6 +442,7 @@ ARRAY_LENGTH: 'ARRAY_LENGTH';
MVAPPEND: 'MVAPPEND';
MVJOIN: 'MVJOIN';
MVINDEX: 'MVINDEX';
+SPLIT: 'SPLIT';
FORALL: 'FORALL';
FILTER: 'FILTER';
TRANSFORM: 'TRANSFORM';
diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4
index 494adb1571..29ac09bf5a 100644
--- a/ppl/src/main/antlr/OpenSearchPPLParser.g4
+++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4
@@ -1095,6 +1095,7 @@ collectionFunctionName
| MVAPPEND
| MVJOIN
| MVINDEX
+ | SPLIT
| FORALL
| EXISTS
| FILTER
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java
index bffa20175d..5d864e770f 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java
@@ -214,4 +214,81 @@ public void testMvindexRangeNegative() {
+ "LIMIT 1";
verifyPPLToSparkSQL(root, expectedSparkSql);
}
+
+ @Test
+ public void testSplitWithSemicolonDelimiter() {
+ String ppl =
+ "source=EMP | eval test = 'buttercup;rarity;tenderhoof', result = split(test, ';') | head"
+ + " 1 | fields result";
+ RelNode root = getRelNode(ppl);
+
+ String expectedLogical =
+ "LogicalProject(result=[$9])\n"
+ + " LogicalSort(fetch=[1])\n"
+ + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],"
+ + " SAL=[$5], COMM=[$6], DEPTNO=[$7], test=['buttercup;rarity;tenderhoof':VARCHAR],"
+ + " result=[CASE(=(';', ''),"
+ + " SPLIT(REGEXP_REPLACE('buttercup;rarity;tenderhoof':VARCHAR, '(?<=.)(?=.)', '|'),"
+ + " '|'), SPLIT('buttercup;rarity;tenderhoof':VARCHAR, ';'))])\n"
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
+ verifyLogical(root, expectedLogical);
+
+ String expectedSparkSql =
+ "SELECT CASE WHEN ';' = '' THEN SPLIT(REGEXP_REPLACE('buttercup;rarity;tenderhoof', "
+ + "'(?<=.)(?=.)', '|'), '|') ELSE SPLIT('buttercup;rarity;tenderhoof', ';') END "
+ + "`result`\n"
+ + "FROM `scott`.`EMP`\n"
+ + "LIMIT 1";
+ verifyPPLToSparkSQL(root, expectedSparkSql);
+ }
+
+ @Test
+ public void testSplitWithMultiCharDelimiter() {
+ String ppl =
+ "source=EMP | eval test = '1a2b3c4def567890', result = split(test, 'def') | head 1 |"
+ + " fields result";
+ RelNode root = getRelNode(ppl);
+
+ String expectedLogical =
+ "LogicalProject(result=[$9])\n"
+ + " LogicalSort(fetch=[1])\n"
+ + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],"
+ + " SAL=[$5], COMM=[$6], DEPTNO=[$7], test=['1a2b3c4def567890':VARCHAR],"
+ + " result=[CASE(=('def':VARCHAR, ''), SPLIT(REGEXP_REPLACE('1a2b3c4def567890':VARCHAR,"
+ + " '(?<=.)(?=.)', '|'), '|'), SPLIT('1a2b3c4def567890':VARCHAR, 'def':VARCHAR))])\n"
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
+ verifyLogical(root, expectedLogical);
+
+ String expectedSparkSql =
+ "SELECT CASE WHEN 'def' = '' THEN SPLIT(REGEXP_REPLACE('1a2b3c4def567890', "
+ + "'(?<=.)(?=.)', '|'), '|') ELSE SPLIT('1a2b3c4def567890', 'def') END `result`\n"
+ + "FROM `scott`.`EMP`\n"
+ + "LIMIT 1";
+ verifyPPLToSparkSQL(root, expectedSparkSql);
+ }
+
+ @Test
+ public void testSplitWithEmptyDelimiter() {
+ String ppl =
+ "source=EMP | eval test = 'abcd', result = split(test, '') | head 1 | fields result";
+ RelNode root = getRelNode(ppl);
+
+ // With empty delimiter, should split into individual characters
+ String expectedLogical =
+ "LogicalProject(result=[$9])\n"
+ + " LogicalSort(fetch=[1])\n"
+ + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],"
+ + " SAL=[$5], COMM=[$6], DEPTNO=[$7], test=['abcd':VARCHAR],"
+ + " result=[CASE(=('':VARCHAR, ''), SPLIT(REGEXP_REPLACE('abcd':VARCHAR,"
+ + " '(?<=.)(?=.)', '|'), '|'), SPLIT('abcd':VARCHAR, '':VARCHAR))])\n"
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
+ verifyLogical(root, expectedLogical);
+
+ String expectedSparkSql =
+ "SELECT CASE WHEN '' = '' THEN SPLIT(REGEXP_REPLACE('abcd', '(?<=.)(?=.)', '|'), '|') "
+ + "ELSE SPLIT('abcd', '') END `result`\n"
+ + "FROM `scott`.`EMP`\n"
+ + "LIMIT 1";
+ verifyPPLToSparkSQL(root, expectedSparkSql);
+ }
}
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java
index f205b9fe0c..3fb92b6d14 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java
@@ -822,6 +822,22 @@ public void testMvindex() {
anonymize("source=t | eval result=mvindex(array(1, 2, 3, 4, 5), 1, 3) | fields result"));
}
+ @Test
+ public void testSplit() {
+ // Test split with delimiter
+ assertEquals(
+ "source=table | eval identifier=split(***,***) | fields + identifier",
+ anonymize("source=t | eval result=split('a;b;c', ';') | fields result"));
+ // Test split with field reference
+ assertEquals(
+ "source=table | eval identifier=split(identifier,***) | fields + identifier",
+ anonymize("source=t | eval result=split(text, ',') | fields result"));
+ // Test split with empty delimiter (splits into characters)
+ assertEquals(
+ "source=table | eval identifier=split(***,***) | fields + identifier",
+ anonymize("source=t | eval result=split('abcd', '') | fields result"));
+ }
+
@Test
public void testRexWithOffsetField() {
when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10);