Skip to content

Commit 6ad2259

Browse files
authored
Add regex functions to core (#193)
1 parent 33b03f6 commit 6ad2259

File tree

2 files changed

+82
-0
lines changed

2 files changed

+82
-0
lines changed

basilisp/core/__init__.lpy

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,3 +1274,61 @@
12741274
(in-ns (quote ~name))
12751275
~requires
12761276
~@imports)))
1277+
1278+
;;;;;;;;;;;;;;;;;;;;;
1279+
;; Regex Functions ;;
1280+
;;;;;;;;;;;;;;;;;;;;;
1281+
1282+
(import re)
1283+
1284+
(defn re-pattern
1285+
"Return a new re.Pattern instance."
1286+
[s]
1287+
(re/compile s))
1288+
1289+
(defn re-find
1290+
"Returns the first match of a string to a pattern using re.search.
1291+
1292+
If the string matches the pattern exactly and there are no match
1293+
groups, return the string. Otherwise, return a vector with the string
1294+
in the first position and the match groups in the following positions."
1295+
[pattern s]
1296+
(let [match (re/search pattern s)]
1297+
(when match
1298+
(let [groups (.groups match)]
1299+
(if (zero? (count groups))
1300+
(.group match 0)
1301+
(vec (cons (.group match 0) groups)))))))
1302+
1303+
(defn re-matches
1304+
"Returns a match of a string to a pattern using re.fullmatch.
1305+
1306+
If the string matches the pattern exactly and there are no match
1307+
groups, return the string. Otherwise, return a vector with the string
1308+
in the first position and the match groups in the following positions."
1309+
[pattern s]
1310+
(let [match (re/fullmatch pattern s)]
1311+
(when match
1312+
(let [groups (.groups match)]
1313+
(if (zero? (count groups))
1314+
(.group match 0)
1315+
(vec (cons (.group match 0) groups)))))))
1316+
1317+
(defn ^:private lazy-re-seq
1318+
"Return a lazy sequence of the matches in a match iterator."
1319+
[iter]
1320+
(lazy-seq
1321+
(when (first iter)
1322+
(let [match (.group (first iter) 0)]
1323+
(cons match (when (seq (rest iter))
1324+
(lazy-re-seq (rest iter))))))))
1325+
1326+
(defn re-seq
1327+
"Returns a lazy sequence of matches of a string to a pattern using re.finditer.
1328+
1329+
If the string matches the pattern exactly and there are no match
1330+
groups, return the string. Otherwise, return a vector with the string
1331+
in the first position and the match groups in the following positions."
1332+
[pattern s]
1333+
(lazy-re-seq (seq (re/finditer pattern s))))
1334+

tests/core_test.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import itertools
2+
import re
23
from fractions import Fraction
34
from unittest.mock import Mock
45

@@ -610,3 +611,26 @@ def test_merge():
610611
assert lmap.map({kw.keyword("a"): 53, kw.keyword("b"): "hi"}) == core.merge(
611612
lmap.map({kw.keyword("a"): 1, kw.keyword("b"): "hi"}),
612613
lmap.map({kw.keyword("a"): 53}))
614+
615+
616+
def test_re_find():
617+
assert None is core.re_find(re.compile(r"\d+"), "abcdef")
618+
assert "12345" == core.re_find(re.compile(r"\d+"), "abc12345def")
619+
assert vec.v("word then number ", "word then number ", None) == core.re_find(
620+
re.compile(r"(\D+)|(\d+)"), "word then number 57")
621+
assert vec.v("57", None, "57") == core.re_find(
622+
re.compile(r"(\D+)|(\d+)"), "57 number then word")
623+
assert vec.v("lots", "", "l") == core.re_find(re.compile(r"(\d*)(\S)\S+"), "lots o' digits 123456789")
624+
625+
626+
def test_re_matches():
627+
assert None is core.re_matches(re.compile(r"hello"), "hello, world")
628+
assert "hello, world" == core.re_matches(re.compile(r"hello.*"), "hello, world")
629+
assert vec.v("hello, world", "world") == core.re_matches(re.compile(r"hello, (.*)"), "hello, world")
630+
631+
632+
def test_re_seq():
633+
assert None is core.seq(core.re_seq(re.compile(r"[a-zA-Z]+"), "134325235234"))
634+
assert llist.l("1", "1", "0") == core.re_seq(re.compile(r"\d+"), "Basilisp 1.1.0")
635+
assert llist.l("the", "man", "who", "sold", "the", "world") == core.re_seq(
636+
re.compile(r"\w+"), "the man who sold the world")

0 commit comments

Comments
 (0)