Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions outlines/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,37 +96,40 @@ def __getattr__(self, name):
integer = Regex(r"[+-]?(0|[1-9][0-9]*)")
boolean = Regex("(True|False)")
number = Regex(rf"{integer.pattern}(\.[0-9]+)?([eE][+-][0-9]+)?")
date = Regex(r"(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])")
time = Regex(r"([0-1][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])")
datetime = Regex(rf"({date.pattern})(\s)({time.pattern})")
date = Regex(r"(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])", requires_quoting=True)
time = Regex(r"([0-1][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])", requires_quoting=True)
datetime = Regex(rf"({date.pattern})(\s)({time.pattern})", requires_quoting=True)

# Basic regex types
digit = Regex(r"\d")
char = Regex(r"\w")
newline = Regex(r"(\r\n|\r|\n)") # Matched new lines on Linux, Windows & MacOS
whitespace = Regex(r"\s")
hex_str = Regex(r"(0x)?[a-fA-F0-9]+")
char = Regex(r"\w", requires_quoting=True)
newline = Regex(r"(\r\n|\r|\n)", requires_quoting=True) # Matched new lines on Linux, Windows & MacOS
whitespace = Regex(r"\s", requires_quoting=True)
hex_str = Regex(r"(0x)?[a-fA-F0-9]+", requires_quoting=True)
uuid4 = Regex(
r"[a-fA-F0-9]{8}-"
r"[a-fA-F0-9]{4}-"
r"4[a-fA-F0-9]{3}-"
r"[89abAB][a-fA-F0-9]{3}-"
r"[a-fA-F0-9]{12}"
r"[a-fA-F0-9]{12}",
requires_quoting=True
)
ipv4 = Regex(
r"((25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})\.){3}"
r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})"
r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})",
requires_quoting=True
)

# Document-specific types
sentence = Regex(r"[A-Z].*\s*[.!?]")
paragraph = Regex(rf"{sentence.pattern}(?:\s+{sentence.pattern})*\n+")
sentence = Regex(r"[A-Z].*\s*[.!?]", requires_quoting=True)
paragraph = Regex(rf"{sentence.pattern}(?:\s+{sentence.pattern})*\n+", requires_quoting=True)


# The following regex is FRC 5322 compliant and was found at:
# https://emailregex.com/
email = Regex(
r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])"""
r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])""",
requires_quoting=True
)

# Matches any ISBN number. Note that this is not completely correct as not all
Expand All @@ -136,5 +139,6 @@ def __getattr__(self, name):
#
# TODO: The check digit can only be computed by calling a function to compute it dynamically
isbn = Regex(
r"(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}$|97[89][0-9]{10}$|(?=(?:[0-9]+[- ]){4})[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]"
r"(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}$|97[89][0-9]{10}$|(?=(?:[0-9]+[- ]){4})[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]",
requires_quoting=True
)
Loading
Loading