Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions lib/prism/polyfill/append_as_bytes.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

# Polyfill for String#append_as_bytes, which didn't exist until Ruby 3.4.
if !("".respond_to?(:append_as_bytes))
String.include(
Module.new {
def append_as_bytes(*args)
args.each { self.<<(_1.b) } # steep:ignore
end
}
)
end
16 changes: 8 additions & 8 deletions lib/prism/translation/parser/lexer.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# frozen_string_literal: true

require "strscan"
require_relative "../../polyfill/append_as_bytes"

module Prism
module Translation
Expand Down Expand Up @@ -638,39 +639,38 @@ def unescape_string(string, quote)
scanner = StringScanner.new(string)
while (skipped = scanner.skip_until(/\\/))
# Append what was just skipped over, excluding the found backslash.
result << string.byteslice(scanner.pos - skipped, skipped - 1)
result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))

# Simple single-character escape sequences like \n
if (replacement = ESCAPES[scanner.peek(1)])
result << replacement
result.append_as_bytes(replacement)
scanner.pos += 1
elsif (octal = scanner.check(/[0-7]{1,3}/))
# \nnn
# NOTE: When Ruby 3.4 is required, this can become result.append_as_bytes(chr)
result << octal.to_i(8).chr.b
result.append_as_bytes(octal.to_i(8).chr)
scanner.pos += octal.bytesize
elsif (hex = scanner.check(/x([0-9a-fA-F]{1,2})/))
# \xnn
result << hex[1..].to_i(16).chr.b
result.append_as_bytes(hex[1..].to_i(16).chr)
scanner.pos += hex.bytesize
elsif (unicode = scanner.check(/u([0-9a-fA-F]{4})/))
# \unnnn
result << unicode[1..].hex.chr(Encoding::UTF_8).b
result.append_as_bytes(unicode[1..].hex.chr(Encoding::UTF_8))
scanner.pos += unicode.bytesize
elsif scanner.peek(3) == "u{}"
# https://github.com/whitequark/parser/issues/856
scanner.pos += 3
elsif (unicode_parts = scanner.check(/u{.*}/))
# \u{nnnn ...}
unicode_parts[2..-2].split.each do |unicode|
result << unicode.hex.chr(Encoding::UTF_8).b
result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
end
scanner.pos += unicode_parts.bytesize
end
end

# Add remainging chars
result << string.byteslice(scanner.pos..)
result.append_as_bytes(string.byteslice(scanner.pos..))

result.force_encoding(source_buffer.source.encoding)

Expand Down
1 change: 1 addition & 0 deletions prism.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ Gem::Specification.new do |spec|
"lib/prism/parse_result/errors.rb",
"lib/prism/parse_result/newlines.rb",
"lib/prism/pattern.rb",
"lib/prism/polyfill/append_as_bytes.rb",
"lib/prism/polyfill/byteindex.rb",
"lib/prism/polyfill/unpack1.rb",
"lib/prism/reflection.rb",
Expand Down
2 changes: 2 additions & 0 deletions test/prism/fixtures/strings.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ baz

"\7 \43 \141"

"ち\xE3\x81\xFF"

%[abc]

%(abc)
Expand Down
1 change: 1 addition & 0 deletions test/prism/ruby/ruby_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class RubyParserTest < TestCase
"seattlerb/op_asgn_primary_colon_const_command_call.txt",
"seattlerb/regexp_esc_C_slash.txt",
"seattlerb/str_lit_concat_bad_encodings.txt",
"strings.txt",
"unescaping.txt",
"unparser/corpus/literal/kwbegin.txt",
"unparser/corpus/literal/send.txt",
Expand Down
120 changes: 63 additions & 57 deletions test/prism/snapshots/strings.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading