Skip to content

Commit acae737

Browse files
updates to factor in quotes during sentence splitting
1 parent 6743df2 commit acae737

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

guardrails/utils/tokenization_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ def postproc_splits(sentences):
5757
sentences, r"\(([^\(\)]{0,250})\n([^\(\)]{0,250})\)", r"(\1 \2)"
5858
)
5959

60+
# Guardrails mods for line breaks within quotes
61+
sentences = replace_til_no_change(
62+
sentences, r'"([^"\n]{0,250})\n([^"\n]{0,250})"', r'"\1 \2"'
63+
)
64+
sentences = replace_til_no_change(
65+
sentences, r"'([^'\n]{0,250})\n([^'\n]{0,250})'", r"'\1 \2'"
66+
)
67+
6068
# Nesting to depth one
6169
sentences = replace_til_no_change(
6270
sentences,

0 commit comments

Comments
 (0)