@@ -637,18 +637,34 @@ def trim_heredoc_whitespace(string, heredoc)
637637 DELIMITER_SYMETRY = { "[" => "]" , "(" => ")" , "{" => "}" , "<" => ">" } . freeze
638638 private_constant :DELIMITER_SYMETRY
639639
640+
641+ # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/lexer-strings.rl#L14
642+ REGEXP_META_CHARACTERS = [ "\\ " , "$" , "(" , ")" , "*" , "+" , "." , "<" , ">" , "?" , "[" , "]" , "^" , "{" , "|" , "}" ]
643+ private_constant :REGEXP_META_CHARACTERS
644+
640645 # Apply Ruby string escaping rules
641646 def unescape_string ( string , quote )
642647 # In single-quoted heredocs, everything is taken literally.
643648 return string if quote == "<<'"
644649
645- # TODO: Implement regexp escaping
646- return string if quote == "/" || quote . start_with? ( "%r" )
647-
648650 # OPTIMIZATION: Assume that few strings need escaping to speed up the common case.
649651 return string unless string . include? ( "\\ " )
650652
651- if interpolation? ( quote )
653+ # Enclosing character for the string. `"` for `"foo"`, `{` for `%w{foo}`, etc.
654+ delimiter = quote [ -1 ]
655+
656+ if regexp? ( quote )
657+ # Should be escaped handled to single-quoted heredocs. The only character that is
658+ # allowed to be escaped is the delimiter, except when that also has special meaning
659+ # in the regexp. Since all the symetry delimiters have special meaning, they don't need
660+ # to be considered separately.
661+ if REGEXP_META_CHARACTERS . include? ( delimiter )
662+ string
663+ else
664+ # There can never be an even amount of backslashes. It would be a syntax error.
665+ string . gsub ( /\\ (#{ Regexp . escape ( delimiter ) } )/ , '\1' )
666+ end
667+ elsif interpolation? ( quote )
652668 # Appending individual escape sequences may force the string out of its intended
653669 # encoding. Start out with binary and force it back later.
654670 result = "" . b
@@ -693,12 +709,6 @@ def unescape_string(string, quote)
693709
694710 result
695711 else
696- if quote == "'"
697- delimiter = "'"
698- else
699- delimiter = quote [ 2 ]
700- end
701-
702712 delimiters = Regexp . escape ( "#{ delimiter } #{ DELIMITER_SYMETRY [ delimiter ] } " )
703713 string . gsub ( /\\ ([\\ #{ delimiters } ])/ , '\1' )
704714 end
@@ -730,6 +740,11 @@ def interpolation?(quote)
730740 quote != "'" && !quote . start_with? ( "%q" , "%w" , "%i" )
731741 end
732742
743+ # Regexp allow interpolation but are handled differently during unescaping
744+ def regexp? ( quote )
745+ quote == "/" || quote . start_with? ( "%r" )
746+ end
747+
733748 # Determine if the string is part of a %-style array.
734749 def percent_array? ( quote )
735750 quote . start_with? ( "%w" , "%W" , "%i" , "%I" )
0 commit comments