Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions spec/std/string_scanner_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,132 @@ describe StringScanner do
end
end

describe "#rewind" do
it "rewinds a single byte optimizable string" do
s = StringScanner.new("abcde")

expect_raises(IndexError, "Index out of range") { s.rewind(10) }

s.offset.should eq(0)

expect_raises(ArgumentError, "Negative lookbehind count") { s.rewind(-1) }

s.skip(3)
s.current_char.should eq('d')
s.offset.should eq(3)

s.rewind(0)
s.offset.should eq(3)

s.rewind(2)
s.offset.should eq(1)

expect_raises(IndexError, "Index out of range") { s.rewind(1000) }
s.offset.should eq(1)
end

it "rewinds a multibyte char string" do
s = StringScanner.new("あいうえお")
expect_raises(IndexError, "Index out of range") { s.rewind(10) }
s.offset.should eq(0)

s.skip(3)
s.current_char.should eq('え')
s.offset.should eq(3)

s.rewind(2)
s.offset.should eq(1)

expect_raises(IndexError, "Index out of range") { s.rewind(1000) }
s.offset.should eq(1)
end
end

describe "#previous_char? and #previous_byte?" do
it "finds the previous byte or char for single-byte strings" do
s = StringScanner.new("abcde")
s.previous_byte?.should be_nil
expect_raises(IndexError, "No previous byte") { s.previous_byte }
s.previous_char?.should be_nil
expect_raises(IndexError) { s.previous_char }

s.scan(1)
s.previous_byte?.should eq('a'.ord)
s.previous_byte.should eq('a'.ord)
s.previous_char?.should eq('a')
s.previous_char.should eq('a')
end

it "finds the previous byte or char for multi-byte strings" do
s = StringScanner.new("あいうえお")
s.previous_byte?.should be_nil
expect_raises(IndexError, "No previous byte") { s.previous_byte }
s.previous_char?.should be_nil
expect_raises(IndexError) { s.previous_char }

s.scan(1)
s.previous_byte?.should eq('あ'.bytes.last)
s.previous_byte.should eq('あ'.bytes.last)
s.previous_char?.should eq('あ')
s.previous_char.should eq('あ')
end
end

describe "#current_char and #current_byte" do
it "finds the current byte and char for single-byte strings" do
s = StringScanner.new("abcde")
s.current_char.should eq('a')
s.current_char?.should eq('a')
s.current_byte.should eq('a'.ord)
s.current_byte?.should eq('a'.ord)

s.scan(2)
s.current_char.should eq('c')
s.current_char?.should eq('c')
s.current_byte.should eq('c'.ord)
s.current_byte?.should eq('c'.ord)

s.terminate
s.current_char?.should be_nil
expect_raises(IndexError) { s.current_char }
s.current_byte?.should be_nil
expect_raises(IndexError) { s.current_byte }
end

it "finds the current byte and char for multi-byte strings" do
s = StringScanner.new("あいうえお")
s.current_char.should eq('あ')
s.current_char?.should eq('あ')
s.current_byte.should eq('あ'.bytes.first)
s.current_byte?.should eq('あ'.bytes.first)

s.scan(2)
s.current_char.should eq('う')
s.current_char?.should eq('う')
s.current_byte.should eq('う'.bytes.first)
s.current_byte?.should eq('う'.bytes.first)

s.terminate
s.current_char?.should be_nil
expect_raises(IndexError) { s.current_char }
s.current_byte?.should be_nil
expect_raises(IndexError) { s.current_byte }
end
end

describe "#beginning_of_line?" do
it "checks backwards for a newline or start of string" do
s = StringScanner.new("a\nb\nc\n")
s.beginning_of_line?.should be_true
s.skip(1)
s.beginning_of_line?.should be_false
s.skip(1)
s.beginning_of_line?.should be_true
s.terminate
s.beginning_of_line?.should be_false
end
end

describe "#reset" do
it "resets the scan offset to the beginning and clears the last match" do
s = StringScanner.new("this is a string")
Expand All @@ -444,4 +570,45 @@ describe StringScanner do
s.eos?.should be_true
end
end

describe "#matched?" do
s = StringScanner.new("sphinx of black quartz, judge my vow")
s.matched?.should eq(false)

s.check(1000)
s.matched?.should eq(false)
s.check(10)
s.matched?.should eq(true)

s.check(/Sphinx/)
s.matched?.should eq(false)
s.check(/sphinx/)
s.matched?.should eq(true)

s.skip("nonsense")
s.matched?.should eq(false)
s.skip("sphinx ")
s.matched?.should eq(true)

s.skip(1000)
s.matched?.should eq(false)
s.skip(3)
s.matched?.should eq(true)

s.scan(/\d+/)
s.matched?.should eq(false)
s.scan(/\w+/)
s.matched?.should eq(true)

s.scan('b')
s.matched?.should eq(false)
s.scan(' ')
s.matched?.should eq(true)

# unaffected by #peek
s.scan(1000)
s.matched?.should eq(false)
s.peek(10)
s.matched?.should eq(false)
end
end
102 changes: 99 additions & 3 deletions src/string_scanner.cr
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,24 @@
# * `#peek`
# * `#check`
# * `#check_until`
# * `#rest`
# * `#current_char`, `#current_char?`
# * `#previous_char`, `#previous_char?`
# * `#current_byte`, `#current_byte?`
# * `#previous_byte`, `#previous_byte?`
#
# Methods that deal with the position of the offset:
# * `#offset`
# * `#offset=`
# * `#rewind`
# * `#eos?`
# * `#reset`
# * `#terminate`
#
# Methods that deal with the last match:
# * `#[]`
# * `#[]?`
# * `#matched?`
#
# Miscellaneous methods:
# * `#inspect`
Expand All @@ -73,9 +80,11 @@ class StringScanner

# Sets the *position* of the scan offset.
#
# NOTE: Moving the scan head with this method can cause performance issues in
# multibyte strings. For a more performant way to move the head, see
# [`#skip(Int)`](#skip%28len%3AInt%29%3AInt32%7CNil-instance-method).
# NOTE: Moving the scan head to a non-zero index with this method
# can cause performance issues in multibyte strings. For a more
# performant way to move the head, see
# [`#skip(Int)`](#skip%28len%3AInt%29%3AInt32%7CNil-instance-method)
# or `#rewind`.
def offset=(position : Int)
raise IndexError.new unless position >= 0
@byte_offset = @str.char_index_to_byte_index(position) || @str.bytesize
Expand All @@ -86,6 +95,15 @@ class StringScanner
@str.byte_index_to_char_index(@byte_offset).not_nil!
end

# Rewinds the scan head by *len* characters.
#
# Raises IndexError if this would go off the beginning of the stream.
def rewind(len : Int) : Nil
byte_len = lookbehind_byte_length(len)
raise IndexError.new("Index out of range") if byte_len.nil?
@byte_offset -= byte_len
end

# Tries to match with *pattern* at the current position. If there's a match,
# the scanner advances the scan offset, the last match is saved, and it
# returns the matched string. Otherwise, the scanner returns `nil`.
Expand Down Expand Up @@ -424,6 +442,71 @@ class StringScanner
@str[offset, len]
end

# Returns the current byte at the scan head, or nil if at the end.
# Does no multi-byte character checking, and may return part of a
# multi-byte character. See `#current_char?`.
def current_byte? : UInt8?
@str.byte_at?(@byte_offset)
end

# Returns the current byte at the scan head, and errors if at the end.
# Does not move the scan head.
# Does no multi-byte character checking, and may return part of a
# multi-byte character. See `#current_char`.
def current_byte : UInt8
@str.byte_at(@byte_offset)
end

# Returns the byte before the scan head, or nil if at the beginning.
# Does not move the scan head.
# This performs no multi-byte checking and may return part of a multi-byte
# character. See `#previous_char?`.
def previous_byte? : UInt8?
return nil if @byte_offset.zero?
@str.byte_at?(@byte_offset - 1)
end

# Returns the byte before the scan head, and errors if at the beginning.
# Does not move the scan head.
# This performs no multi-byte checking and may return part of a multi-byte
# character. See `#previous_char`
def previous_byte : UInt8
raise IndexError.new("No previous byte") if @byte_offset.zero?
@str.byte_at(@byte_offset - 1)
end

# Returns the character at the scan head, or nil if at the end. Does not
# move the scan head. This will properly decode the next character from the
# string, and may return a multi-byte character.
def current_char? : Char?
make_char_reader.current_char?
end

# Returns the character at the scan head, and errors if at the end. Does not
# move the scan head. This will properly decode the next character from the
# string, and may return a multi-byte character.
def current_char : Char
# [jneen] Using the nilable version here and manually raising instead of
# the perhaps more obvious `make_char_reader.current_char`. This is
# because we want to raise an IndexError at the end of the stream, but that
# method would return '\0' and not raise any error.
current_char? || raise IndexError.new
end

# Returns the character before the scan head, or nil if at the beginning. Does
# not move the scan head. This will properly decode the previous character from the
# string, and may return a multi-byte character.
def previous_char? : Char?
make_char_reader.previous_char?
end

# Returns the character before the scan head, and errors if at the beginning.
# Does not move the scan head. This will properly decode the previous character
# from the string, and may return a multi-byte character.
def previous_char : Char
make_char_reader.previous_char
end

# Returns the remainder of the string after the scan offset.
#
# ```
Expand Down Expand Up @@ -503,6 +586,19 @@ class StringScanner
@byte_offset - reader.pos
end

# Returns true if the stream is at the beginning of a line and not at EOS.
def beginning_of_line? : Bool
return false if eos?
return true if @byte_offset.zero?

previous_char == '\n'
end

# Returns true if the last `#scan` resulted in a match
def matched? : Bool
!@last_match.nil?
end

# :nodoc:
struct StringMatchData
def initialize(@str : String)
Expand Down
Loading