Skip to content

Commit 7cf9eb6

Browse files
committed
improve submatch perf
1 parent f3a17c5 commit 7cf9eb6

File tree

2 files changed

+17
-23
lines changed

2 files changed

+17
-23
lines changed

src/nregex/private/dfamacro.nim

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,9 @@ macro genSubmatch(
123123
echo "==== genSubmatch ===="
124124
echo repr(result)
125125

126-
func submatch(
127-
smA, smB: var Submatches,
128-
capts: var Capts,
129-
regex: static Regex,
130-
i: int,
131-
qt, cprev, c: int32
132-
) {.inline.} =
126+
template submatch(
127+
smA, smB, capts, regex, i, qt, cprev, c: untyped
128+
): untyped =
133129
var captx: int32
134130
var matched = true
135131
for n, capt in smA.items:
@@ -319,8 +315,8 @@ func matchImpl*(
319315
namedGroups {.used.} = regex.namedGroups
320316
when hasTransitionsZ:
321317
var
322-
smA = newSubmatches()
323-
smB = newSubmatches()
318+
smA = newSubmatches(regex.transitions.all.len)
319+
smB = newSubmatches(regex.transitions.all.len)
324320
capts: Capts
325321
smA.add((0'i16, -1'i32))
326322
while i < len(text):

src/nregex/private/dfamatch.nim

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,37 +55,35 @@ type
5555
sx: seq[(NodeIdx, CaptIdx)]
5656
# use custom len because setLen(0) is slower,
5757
# and {.noInit.} makes no difference
58-
si: int
59-
ss: set[int16]
58+
si: int16
59+
ss: seq[int16]
6060

61-
func newSubmatches*(): Submatches {.inline.} =
61+
func newSubmatches*(size: int): Submatches {.inline.} =
6262
result = new Submatches
6363
result.sx = newSeq[(NodeIdx, CaptIdx)](8)
64+
result.ss = newSeq[int16](size)
6465
result.si = 0
6566

6667
func `[]`*(sm: Submatches, i: int): (NodeIdx, CaptIdx) {.inline.} =
6768
assert i < sm.si
6869
sm.sx[i]
6970

71+
func hasState*(sm: Submatches, n: int16): bool {.inline.} =
72+
sm.ss[n] < sm.si and sm.sx[sm.ss[n]][0] == n
73+
7074
func add*(sm: var Submatches, item: (NodeIdx, CaptIdx)) {.inline.} =
71-
assert item[0] notin sm.ss
75+
assert not sm.hasState(item[0])
7276
assert sm.si <= sm.sx.len
7377
if (sm.si == sm.sx.len).unlikely:
7478
sm.sx.setLen(sm.sx.len * 2)
7579
sm.sx[sm.si] = item
76-
sm.si += 1
77-
sm.ss.incl(item[0])
80+
sm.ss[item[0]] = sm.si
81+
sm.si += 1'i16
7882

7983
func len*(sm: Submatches): int {.inline.} =
8084
sm.si
8185

82-
func hasState*(sm: Submatches, n: int16): bool {.inline.} =
83-
n in sm.ss
84-
8586
func clear*(sm: var Submatches) {.inline.} =
86-
for i in 0 .. sm.len-1:
87-
assert sm.sx[i][0] in sm.ss
88-
sm.ss.excl sm.sx[i][0]
8987
sm.si = 0
9088

9189
iterator items*(sm: Submatches): (NodeIdx, CaptIdx) {.inline.} =
@@ -263,8 +261,8 @@ func matchImpl*(
263261
captLong {.used.} = -1
264262
iPrevLong {.used.} = start
265263
if hasTransitionsZ:
266-
smA = newSubmatches()
267-
smB = newSubmatches()
264+
smA = newSubmatches(regex.transitions.all.len)
265+
smB = newSubmatches(regex.transitions.all.len)
268266
smA.add((0'i16, -1'i32))
269267
#echo regex.dfa
270268
while i < len(text):

0 commit comments

Comments
 (0)