Skip to content

Commit 44871cf

Browse files
jneenjneen
andauthored
Performance fix for StringScanner#peek and #peek_behind (#16593)
Co-authored-by: jneen <jneen@jneen.net>
1 parent f016cfc commit 44871cf

File tree

2 files changed

+54
-4
lines changed

2 files changed

+54
-4
lines changed

spec/std/string_scanner_spec.cr

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,45 @@ describe StringScanner do
417417
s.peek(7).should eq("this is")
418418
s.offset.should eq(0)
419419
end
420+
421+
it "shows the next len characters for multi-byte strings" do
422+
s = StringScanner.new("これは文字列である")
423+
s.offset.should eq(0)
424+
s.peek(3).should eq("これは")
425+
s.offset.should eq(0)
426+
s.peek(6).should eq("これは文字列")
427+
s.offset.should eq(0)
428+
end
429+
430+
it "errors on negative input" do
431+
s = StringScanner.new("abcde")
432+
s.scan(2)
433+
expect_raises(ArgumentError, "Negative lookahead count: -1") { s.peek(-1) }
434+
end
435+
end
436+
437+
describe "#peek_behind" do
438+
it "shows characters behind the scan head" do
439+
s = StringScanner.new("abcdefg")
440+
s.peek_behind(10).should eq("")
441+
s.scan(3)
442+
s.peek_behind(10).should eq("abc")
443+
s.peek_behind(2).should eq("bc")
444+
end
445+
446+
it "shows characters behind the scan head for multi-byte strings" do
447+
s = StringScanner.new("あいうえお")
448+
s.peek_behind(10).should eq("")
449+
s.scan(3)
450+
s.peek_behind(10).should eq("あいう")
451+
s.peek_behind(2).should eq("いう")
452+
end
453+
454+
it "errors on negative input" do
455+
s = StringScanner.new("abcde")
456+
s.scan(3)
457+
expect_raises(ArgumentError, "Negative lookbehind count") { s.peek_behind(-1) }
458+
end
420459
end
421460

422461
describe "#reset" do

src/string_scanner.cr

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@
4141
# * `#skip`
4242
# * `#skip_until`
4343
#
44-
# Methods that look ahead:
44+
# Methods that look ahead or behind:
4545
# * `#peek`
46+
# * `#peek_behind`
4647
# * `#check`
4748
# * `#check_until`
4849
#
@@ -418,10 +419,20 @@ class StringScanner
418419
@str
419420
end
420421

421-
# Extracts a string corresponding to string[offset,*len*], without advancing
422-
# the scan offset.
422+
# Extracts a string by looking ahead *len* characters, without advancing the
423+
# scan offset. The return value has at most *len* characters, but may have fewer
424+
# if the scan head is close to the end of the string.
423425
def peek(len) : String
424-
@str[offset, len]
426+
byte_len = lookahead_byte_length(len) || @str.bytesize
427+
@str.byte_slice(@byte_offset, byte_len)
428+
end
429+
430+
# Extracts a string by looking behind *len* characters, without moving the
431+
# scan offset. The return value has at most *len* characters, but may have fewer
432+
# if the scan head is close to the beginning of the string.
433+
def peek_behind(len) : String
434+
byte_len = lookbehind_byte_length(len) || @byte_offset
435+
@str.byte_slice(@byte_offset - byte_len, byte_len)
425436
end
426437

427438
# Returns the remainder of the string after the scan offset.

0 commit comments

Comments
 (0)