@@ -1511,13 +1511,9 @@ def visit_redo_node(node)
15111511 # /foo/
15121512 # ^^^^^
15131513 def visit_regular_expression_node ( node )
1514- content = node . content
15151514 parts =
1516- if content . include? ( "\n " )
1517- offset = node . content_loc . start_offset
1518- content . lines . map do |line |
1519- builder . string_internal ( [ line , srange_offsets ( offset , offset += line . bytesize ) ] )
1520- end
1515+ if node . content . include? ( "\n " )
1516+ string_nodes_from_line_continuations ( node , node . content_loc . start_offset , node . opening )
15211517 else
15221518 [ builder . string_internal ( token ( node . content_loc ) ) ]
15231519 end
@@ -2074,55 +2070,7 @@ def visit_heredoc(node)
20742070 node . parts . each do |part |
20752071 pushing =
20762072 if part . is_a? ( StringNode ) && part . unescaped . include? ( "\n " )
2077- unescaped = part . unescaped . lines
2078- escaped = part . content . lines
2079-
2080- escaped_lengths = [ ]
2081- normalized_lengths = [ ]
2082- # Keeps track of where an unescaped line should start a new token. An unescaped
2083- # \n would otherwise be indistinguishable from the actual newline at the end of
2084- # of the line. The parser gem only emits a new string node at "real" newlines,
2085- # line continuations don't start a new node as well.
2086- do_next_tokens = [ ]
2087-
2088- if node . opening . end_with? ( "'" )
2089- escaped . each do |line |
2090- escaped_lengths << line . bytesize
2091- normalized_lengths << chomped_bytesize ( line )
2092- do_next_tokens << true
2093- end
2094- else
2095- escaped
2096- . chunk_while { |before , after | before [ /(\\ *)\r ?\n $/ , 1 ] &.length &.odd? || false }
2097- . each do |lines |
2098- escaped_lengths << lines . sum ( &:bytesize )
2099- normalized_lengths << lines . sum { |line | chomped_bytesize ( line ) }
2100- unescaped_lines_count = lines . sum do |line |
2101- line . scan ( /(\\ *)n/ ) . count { |( backslashes ) | backslashes &.length &.odd? || false }
2102- end
2103- do_next_tokens . concat ( Array . new ( unescaped_lines_count + 1 , false ) )
2104- do_next_tokens [ -1 ] = true
2105- end
2106- end
2107-
2108- start_offset = part . location . start_offset
2109- current_line = +""
2110- current_normalized_length = 0
2111-
2112- unescaped . filter_map . with_index do |unescaped_line , index |
2113- current_line << unescaped_line
2114- current_normalized_length += normalized_lengths . fetch ( index , 0 )
2115-
2116- if do_next_tokens [ index ]
2117- inner_part = builder . string_internal ( [ current_line , srange_offsets ( start_offset , start_offset + current_normalized_length ) ] )
2118- start_offset += escaped_lengths . fetch ( index , 0 )
2119- current_line = +""
2120- current_normalized_length = 0
2121- inner_part
2122- else
2123- nil
2124- end
2125- end
2073+ string_nodes_from_line_continuations ( part , part . location . start_offset , node . opening )
21262074 else
21272075 [ visit ( part ) ]
21282076 end
@@ -2172,6 +2120,59 @@ def within_pattern
21722120 parser . pattern_variables . pop
21732121 end
21742122 end
2123+
2124+ # Create parser string nodes from a single prism node. The parser gem
2125+ # "glues" strings together when a line continuation is encountered.
2126+ def string_nodes_from_line_continuations ( node , start_offset , opening )
2127+ unescaped = node . unescaped . lines
2128+ escaped = node . content . lines
2129+
2130+ escaped_lengths = [ ]
2131+ normalized_lengths = [ ]
2132+ # Keeps track of where an unescaped line should start a new token. An unescaped
2133+ # \n would otherwise be indistinguishable from the actual newline at the end of
2134+ # of the line. The parser gem only emits a new string node at "real" newlines,
2135+ # line continuations don't start a new node as well.
2136+ do_next_tokens = [ ]
2137+
2138+ if opening . end_with? ( "'" )
2139+ escaped . each do |line |
2140+ escaped_lengths << line . bytesize
2141+ normalized_lengths << chomped_bytesize ( line )
2142+ do_next_tokens << true
2143+ end
2144+ else
2145+ escaped
2146+ . chunk_while { |before , after | before [ /(\\ *)\r ?\n $/ , 1 ] &.length &.odd? || false }
2147+ . each do |lines |
2148+ escaped_lengths << lines . sum ( &:bytesize )
2149+ normalized_lengths << lines . sum { |line | chomped_bytesize ( line ) }
2150+ unescaped_lines_count = lines . sum do |line |
2151+ line . scan ( /(\\ *)n/ ) . count { |( backslashes ) | backslashes &.length &.odd? || false }
2152+ end
2153+ do_next_tokens . concat ( Array . new ( unescaped_lines_count + 1 , false ) )
2154+ do_next_tokens [ -1 ] = true
2155+ end
2156+ end
2157+
2158+ current_line = +""
2159+ current_normalized_length = 0
2160+
2161+ unescaped . filter_map . with_index do |unescaped_line , index |
2162+ current_line << unescaped_line
2163+ current_normalized_length += normalized_lengths . fetch ( index , 0 )
2164+
2165+ if do_next_tokens [ index ]
2166+ inner_part = builder . string_internal ( [ current_line , srange_offsets ( start_offset , start_offset + current_normalized_length ) ] )
2167+ start_offset += escaped_lengths . fetch ( index , 0 )
2168+ current_line = +""
2169+ current_normalized_length = 0
2170+ inner_part
2171+ else
2172+ nil
2173+ end
2174+ end
2175+ end
21752176 end
21762177 end
21772178 end
0 commit comments