Skip to content

Commit 5335bdb

Browse files
committed
[core:text/regex] Follow up to fix #6323 and add test case. As was said in the issue discussion I had suspicion that there may be a sibling bug in .Assert_Non_Word_Boundary implementation and I was able to confirm that with re.findall(rB", ") python code. Odin implementation outputed an empty string wherase python gave "'". That is the same bug related to incorrect logic on string ends. This commit makes implementation of those 2 instructions cleaner and adds a test case.
1 parent 3bcddb4 commit 5335bdb

File tree

2 files changed

+12
-16
lines changed

2 files changed

+12
-16
lines changed

core/text/regex/virtual_machine/virtual_machine.odin

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -204,15 +204,8 @@ add_thread :: proc(vm: ^Machine, saved: ^[2 * common.MAX_CAPTURE_GROUPS]int, pc:
204204
case .Assert_Word_Boundary:
205205
sp := vm.string_pointer+vm.current_rune_size
206206

207-
left_is_wc := false
208-
if sp > 0 {
209-
left_is_wc = is_word_class(vm.current_rune)
210-
}
211-
212-
right_is_wc := false
213-
if sp < len(vm.memory) {
214-
right_is_wc = is_word_class(vm.next_rune)
215-
}
207+
left_is_wc := sp > 0 && is_word_class(vm.current_rune)
208+
right_is_wc := sp < len(vm.memory) && is_word_class(vm.next_rune)
216209

217210
if left_is_wc != right_is_wc {
218211
pc += size_of(Opcode)
@@ -221,14 +214,13 @@ add_thread :: proc(vm: ^Machine, saved: ^[2 * common.MAX_CAPTURE_GROUPS]int, pc:
221214

222215
case .Assert_Non_Word_Boundary:
223216
sp := vm.string_pointer+vm.current_rune_size
224-
if sp != 0 && sp != len(vm.memory) {
225-
last_rune_is_wc := is_word_class(vm.current_rune)
226-
this_rune_is_wc := is_word_class(vm.next_rune)
227217

228-
if last_rune_is_wc && this_rune_is_wc || !last_rune_is_wc && !this_rune_is_wc {
229-
pc += size_of(Opcode)
230-
continue
231-
}
218+
left_is_wc := sp > 0 && is_word_class(vm.current_rune)
219+
right_is_wc := sp < len(vm.memory) && is_word_class(vm.next_rune)
220+
221+
if left_is_wc == right_is_wc {
222+
pc += size_of(Opcode)
223+
continue
232224
}
233225

234226
case .Wait_For_Byte:

tests/core/text/regex/test_core_text_regex.odin

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,10 @@ test_non_word_boundaries :: proc(t: ^testing.T) {
564564
EXPR :: `.+\B`
565565
check_expression(t, EXPR, "abc", "ab")
566566
}
567+
{
568+
EXPR :: `\B'`
569+
check_expression(t, EXPR, "'", "'")
570+
}
567571
}
568572

569573
@test

0 commit comments

Comments
 (0)