cocoindex-io
diff --git a/‎python/cocoindex/functions.py‎
Lines changed: 20 additions & 2 deletions b/‎python/cocoindex/functions.py‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎src/ops/functions/mod.rs‎
Lines changed: 1 addition & 0 deletions b/‎src/ops/functions/mod.rs‎
Lines changed: 1 addition & 0 deletions
@@ -2,13 +2,13 @@
 
 import dataclasses
 import functools
-from typing import Annotated, Any, Literal
+from typing import Any, Literal
 
 import numpy as np
 from numpy.typing import NDArray
 
 from . import llm, op
-from .typing import TypeAttr, Vector
+from .typing import Vector
 
 
 class ParseJson(op.FunctionSpec):
@@ -40,6 +40,24 @@ class SplitRecursively(op.FunctionSpec):
     custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
 
 
+class SplitBySeparators(op.FunctionSpec):
+    """
+    Split text by specified regex separators only (no chunk-size planning).
+    Output schema matches SplitRecursively for drop-in compatibility:
+        KTable rows with fields: location (Range), text (Str), start, end.
+    Args:
+        separators_regex: list[str]  # e.g., [r"\\n\\n+"]
+        keep_separator: Literal["none", "left", "right"] = "none"
+        include_empty: bool = False
+        trim: bool = True
+    """
+
+    separators_regex: list[str] = dataclasses.field(default_factory=list)
+    keep_separator: Literal["none", "left", "right"] = "none"
+    include_empty: bool = False
+    trim: bool = True
+
+
 class EmbedText(op.FunctionSpec):
     """Embed a text into a vector space."""
 
 
@@ -1,6 +1,7 @@
 pub mod embed_text;
 pub mod extract_by_llm;
 pub mod parse_json;
+pub mod split_by_separators;
 pub mod split_recursively;
 
 #[cfg(test)]