@@ -74,7 +74,29 @@ def visit_and_node(node)
7474 # []
7575 # ^^
7676 def visit_array_node ( node )
77- builder . array ( token ( node . opening_loc ) , visit_all ( node . elements ) , token ( node . closing_loc ) )
77+ if node . opening &.start_with? ( "%w" , "%W" , "%i" , "%I" )
78+ elements = node . elements . flat_map do |element |
79+ if element . is_a? ( StringNode )
80+ if element . content . include? ( "\n " )
81+ string_nodes_from_line_continuations ( element . unescaped , element . content , element . content_loc . start_offset , node . opening )
82+ else
83+ [ builder . string_internal ( [ element . unescaped , srange ( element . content_loc ) ] ) ]
84+ end
85+ elsif element . is_a? ( InterpolatedStringNode )
86+ builder . string_compose (
87+ token ( element . opening_loc ) ,
88+ string_nodes_from_interpolation ( element , node . opening ) ,
89+ token ( element . closing_loc )
90+ )
91+ else
92+ [ visit ( element ) ]
93+ end
94+ end
95+ else
96+ elements = visit_all ( node . elements )
97+ end
98+
99+ builder . array ( token ( node . opening_loc ) , elements , token ( node . closing_loc ) )
78100 end
79101
80102 # foo => [bar]
@@ -1085,19 +1107,9 @@ def visit_interpolated_string_node(node)
10851107 return visit_heredoc ( node ) { |children , closing | builder . string_compose ( token ( node . opening_loc ) , children , closing ) }
10861108 end
10871109
1088- parts = node . parts . flat_map do |part |
1089- # When the content of a string node is split across multiple lines, the
1090- # parser gem creates individual string nodes for each line the content is part of.
1091- if part . type == :string_node && part . content . include? ( "\n " ) && part . opening_loc . nil?
1092- string_nodes_from_line_continuations ( part . unescaped , part . content , part . content_loc . start_offset , node . opening )
1093- else
1094- visit ( part )
1095- end
1096- end
1097-
10981110 builder . string_compose (
10991111 token ( node . opening_loc ) ,
1100- parts ,
1112+ string_nodes_from_interpolation ( node , node . opening ) ,
11011113 token ( node . closing_loc )
11021114 )
11031115 end
@@ -1116,14 +1128,14 @@ def visit_interpolated_symbol_node(node)
11161128 # ^^^^^^^^^^^^
11171129 def visit_interpolated_x_string_node ( node )
11181130 if node . heredoc?
1119- visit_heredoc ( node ) { |children , closing | builder . xstring_compose ( token ( node . opening_loc ) , children , closing ) }
1120- else
1121- builder . xstring_compose (
1122- token ( node . opening_loc ) ,
1123- visit_all ( node . parts ) ,
1124- token ( node . closing_loc )
1125- )
1131+ return visit_heredoc ( node ) { |children , closing | builder . xstring_compose ( token ( node . opening_loc ) , children , closing ) }
11261132 end
1133+
1134+ builder . xstring_compose (
1135+ token ( node . opening_loc ) ,
1136+ string_nodes_from_interpolation ( node , node . opening ) ,
1137+ token ( node . closing_loc )
1138+ )
11271139 end
11281140
11291141 # -> { it }
@@ -2011,13 +2023,6 @@ def visit_block(call, block)
20112023 end
20122024 end
20132025
2014- # The parser gem automatically converts \r\n to \n, meaning our offsets
2015- # need to be adjusted to always subtract 1 from the length.
2016- def chomped_bytesize ( line )
2017- chomped = line . chomp
2018- chomped . bytesize + ( chomped == line ? 0 : 1 )
2019- end
2020-
20212026 # Visit a heredoc that can be either a string or an xstring.
20222027 def visit_heredoc ( node )
20232028 children = Array . new
@@ -2086,55 +2091,88 @@ def within_pattern
20862091 end
20872092 end
20882093
2094+ def string_nodes_from_interpolation ( node , opening )
2095+ node . parts . flat_map do |part |
2096+ # When the content of a string node is split across multiple lines, the
2097+ # parser gem creates individual string nodes for each line the content is part of.
2098+ if part . type == :string_node && part . content . include? ( "\n " ) && part . opening_loc . nil?
2099+ string_nodes_from_line_continuations ( part . unescaped , part . content , part . content_loc . start_offset , opening )
2100+ else
2101+ visit ( part )
2102+ end
2103+ end
2104+ end
2105+
20892106 # Create parser string nodes from a single prism node. The parser gem
20902107 # "glues" strings together when a line continuation is encountered.
20912108 def string_nodes_from_line_continuations ( unescaped , escaped , start_offset , opening )
20922109 unescaped = unescaped . lines
20932110 escaped = escaped . lines
2111+ percent_array = opening &.start_with? ( "%w" , "%W" , "%i" , "%I" )
2112+
2113+ # Non-interpolating strings
2114+ if opening &.end_with? ( "'" ) || opening &.start_with? ( "%q" , "%s" , "%w" , "%i" )
2115+ current_length = 0
2116+ current_line = +""
20942117
2095- escaped_lengths = [ ]
2096- normalized_lengths = [ ]
2097- # Keeps track of where an unescaped line should start a new token. An unescaped
2098- # \n would otherwise be indistinguishable from the actual newline at the end of
2099- # of the line. The parser gem only emits a new string node at "real" newlines,
2100- # line continuations don't start a new node as well.
2101- do_next_tokens = [ ]
2102-
2103- if opening &.end_with? ( "'" )
2104- escaped . each do |line |
2105- escaped_lengths << line . bytesize
2106- normalized_lengths << chomped_bytesize ( line )
2107- do_next_tokens << true
2118+ escaped . filter_map . with_index do |escaped_line , index |
2119+ unescaped_line = unescaped . fetch ( index , "" )
2120+ current_length += escaped_line . bytesize
2121+ current_line << unescaped_line
2122+
2123+ # Glue line continuations together. Only %w and %i arrays can contain these.
2124+ if percent_array && escaped_line [ /(\\ )*\n $/ , 1 ] &.length &.odd?
2125+ next unless index == escaped . count - 1
2126+ end
2127+ s = builder . string_internal ( [ current_line , srange_offsets ( start_offset , start_offset + current_length ) ] )
2128+ start_offset += escaped_line . bytesize
2129+ current_line = +""
2130+ current_length = 0
2131+ s
21082132 end
21092133 else
2134+ escaped_lengths = [ ]
2135+ normalized_lengths = [ ]
2136+ # Keeps track of where an unescaped line should start a new token. An unescaped
2137+ # \n would otherwise be indistinguishable from the actual newline at the end of
2138+ # of the line. The parser gem only emits a new string node at "real" newlines,
2139+ # line continuations don't start a new node as well.
2140+ do_next_tokens = [ ]
2141+
21102142 escaped
21112143 . chunk_while { |before , after | before [ /(\\ *)\r ?\n $/ , 1 ] &.length &.odd? || false }
21122144 . each do |lines |
21132145 escaped_lengths << lines . sum ( &:bytesize )
2114- normalized_lengths << lines . sum { |line | chomped_bytesize ( line ) }
21152146 unescaped_lines_count = lines . sum do |line |
21162147 line . scan ( /(\\ *)n/ ) . count { |( backslashes ) | backslashes &.length &.odd? || false }
21172148 end
2118- do_next_tokens . concat ( Array . new ( unescaped_lines_count + 1 , false ) )
2149+ extra = 1
2150+ extra = lines . count if percent_array # Account for line continuations in percent arrays
2151+
2152+ normalized_lengths . concat ( Array . new ( unescaped_lines_count + extra , 0 ) )
2153+ normalized_lengths [ -1 ] = lines . sum { |line | line . bytesize }
2154+ do_next_tokens . concat ( Array . new ( unescaped_lines_count + extra , false ) )
21192155 do_next_tokens [ -1 ] = true
21202156 end
2121- end
2122-
2123- current_line = +""
2124- current_normalized_length = 0
21252157
2126- unescaped . filter_map . with_index do |unescaped_line , index |
2127- current_line << unescaped_line
2128- current_normalized_length += normalized_lengths . fetch ( index , 0 )
2129-
2130- if do_next_tokens [ index ]
2131- inner_part = builder . string_internal ( [ current_line , srange_offsets ( start_offset , start_offset + current_normalized_length ) ] )
2132- start_offset += escaped_lengths . fetch ( index , 0 )
2133- current_line = +""
2134- current_normalized_length = 0
2135- inner_part
2136- else
2137- nil
2158+ current_line = +""
2159+ current_normalized_length = 0
2160+
2161+ emitted_count = 0
2162+ unescaped . filter_map . with_index do |unescaped_line , index |
2163+ current_line << unescaped_line
2164+ current_normalized_length += normalized_lengths . fetch ( index , 0 )
2165+
2166+ if do_next_tokens [ index ]
2167+ inner_part = builder . string_internal ( [ current_line , srange_offsets ( start_offset , start_offset + current_normalized_length ) ] )
2168+ start_offset += escaped_lengths . fetch ( emitted_count , 0 )
2169+ current_line = +""
2170+ current_normalized_length = 0
2171+ emitted_count += 1
2172+ inner_part
2173+ else
2174+ nil
2175+ end
21382176 end
21392177 end
21402178 end
0 commit comments