Skip to content

Commit 283037f

Browse files
Earlopainkddnewton
authored andcommitted
[ruby/prism] Better handle all kinds of multiline strings in the parser translator
This is a followup to ruby#3373, where the implementation was extracted ruby/prism@2637007929
1 parent 80fe9a1 commit 283037f

File tree

5 files changed

+62
-70
lines changed

5 files changed

+62
-70
lines changed

lib/prism/translation/parser/compiler.rb

Lines changed: 35 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,21 +1085,13 @@ def visit_interpolated_string_node(node)
10851085
return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
10861086
end
10871087

1088-
parts = node.parts.flat_map do |node|
1088+
parts = node.parts.flat_map do |part|
10891089
# When the content of a string node is split across multiple lines, the
10901090
# parser gem creates individual string nodes for each line the content is part of.
1091-
if node.type == :string_node && node.content.include?("\n") && node.opening_loc.nil?
1092-
start_offset = node.content_loc.start_offset
1093-
1094-
node.unescaped.lines.map do |line|
1095-
end_offset = start_offset + line.bytesize
1096-
offsets = srange_offsets(start_offset, end_offset)
1097-
start_offset = end_offset
1098-
1099-
builder.string_internal([line, offsets])
1100-
end
1091+
if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
1092+
string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, node.opening)
11011093
else
1102-
visit(node)
1094+
visit(part)
11031095
end
11041096
end
11051097

@@ -1513,7 +1505,7 @@ def visit_regular_expression_node(node)
15131505
if node.content == ""
15141506
[]
15151507
elsif node.content.include?("\n")
1516-
string_nodes_from_line_continuations(node, node.content_loc.start_offset, node.opening)
1508+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
15171509
else
15181510
[builder.string_internal(token(node.content_loc))]
15191511
end
@@ -1672,28 +1664,11 @@ def visit_string_node(node)
16721664
elsif node.opening&.start_with?("%") && node.unescaped.empty?
16731665
builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
16741666
else
1675-
content_lines = node.content.lines
1676-
unescaped_lines = node.unescaped.lines
1677-
16781667
parts =
1679-
if content_lines.length <= 1 || unescaped_lines.length <= 1
1680-
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
1681-
elsif content_lines.length != unescaped_lines.length
1682-
# This occurs when we have line continuations in the string. We
1683-
# need to come back and fix this, but for now this stops the
1684-
# code from breaking when we encounter it because of trying to
1685-
# transpose arrays of different lengths.
1686-
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
1668+
if node.content.include?("\n")
1669+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
16871670
else
1688-
start_offset = node.content_loc.start_offset
1689-
1690-
[content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
1691-
end_offset = start_offset + content_line.bytesize
1692-
offsets = srange_offsets(start_offset, end_offset)
1693-
start_offset = end_offset
1694-
1695-
builder.string_internal([unescaped_line, offsets])
1696-
end
1671+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
16971672
end
16981673

16991674
builder.string_compose(
@@ -1737,19 +1712,14 @@ def visit_symbol_node(node)
17371712
builder.symbol([node.unescaped, srange(node.location)])
17381713
end
17391714
else
1740-
parts = if node.value.lines.one?
1741-
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
1742-
else
1743-
start_offset = node.value_loc.start_offset
1744-
1745-
node.value.lines.map do |line|
1746-
end_offset = start_offset + line.bytesize
1747-
offsets = srange_offsets(start_offset, end_offset)
1748-
start_offset = end_offset
1749-
1750-
builder.string_internal([line, offsets])
1715+
parts =
1716+
if node.value == ""
1717+
[]
1718+
elsif node.value.include?("\n")
1719+
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
1720+
else
1721+
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
17511722
end
1752-
end
17531723

17541724
builder.symbol_compose(
17551725
token(node.opening_loc),
@@ -1878,28 +1848,23 @@ def visit_while_node(node)
18781848
# ^^^^^
18791849
def visit_x_string_node(node)
18801850
if node.heredoc?
1881-
visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1882-
else
1883-
parts = if node.unescaped.lines.one?
1884-
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
1885-
else
1886-
start_offset = node.content_loc.start_offset
1887-
1888-
node.unescaped.lines.map do |line|
1889-
end_offset = start_offset + line.bytesize
1890-
offsets = srange_offsets(start_offset, end_offset)
1891-
start_offset = end_offset
1851+
return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1852+
end
18921853

1893-
builder.string_internal([line, offsets])
1894-
end
1854+
parts =
1855+
if node.content == ""
1856+
[]
1857+
elsif node.content.include?("\n")
1858+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1859+
else
1860+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
18951861
end
18961862

1897-
builder.xstring_compose(
1898-
token(node.opening_loc),
1899-
parts,
1900-
token(node.closing_loc)
1901-
)
1902-
end
1863+
builder.xstring_compose(
1864+
token(node.opening_loc),
1865+
parts,
1866+
token(node.closing_loc)
1867+
)
19031868
end
19041869

19051870
# yield
@@ -2069,8 +2034,8 @@ def visit_heredoc(node)
20692034

20702035
node.parts.each do |part|
20712036
pushing =
2072-
if part.is_a?(StringNode) && part.unescaped.include?("\n")
2073-
string_nodes_from_line_continuations(part, part.location.start_offset, node.opening)
2037+
if part.is_a?(StringNode) && part.content.include?("\n")
2038+
string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
20742039
else
20752040
[visit(part)]
20762041
end
@@ -2123,9 +2088,9 @@ def within_pattern
21232088

21242089
# Create parser string nodes from a single prism node. The parser gem
21252090
# "glues" strings together when a line continuation is encountered.
2126-
def string_nodes_from_line_continuations(node, start_offset, opening)
2127-
unescaped = node.unescaped.lines
2128-
escaped = node.content.lines
2091+
def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
2092+
unescaped = unescaped.lines
2093+
escaped = escaped.lines
21292094

21302095
escaped_lengths = []
21312096
normalized_lengths = []
@@ -2135,7 +2100,7 @@ def string_nodes_from_line_continuations(node, start_offset, opening)
21352100
# line continuations don't start a new node as well.
21362101
do_next_tokens = []
21372102

2138-
if opening.end_with?("'")
2103+
if opening&.end_with?("'")
21392104
escaped.each do |line|
21402105
escaped_lengths << line.bytesize
21412106
normalized_lengths << chomped_bytesize(line)

test/prism/fixtures/dstring.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,11 @@ foo\\\\
2828
foo\\\\\
2929
"
3030

31+
"
32+
foo\
33+
b\nar
34+
#{}
35+
"
36+
3137
"
3238
’"

test/prism/fixtures/strings.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@
4040
#
4141
"bar"
4242

43+
"
44+
foo\
45+
b\nar
46+
"
47+
4348
%q{abc}
4449

4550
%s[abc]

test/prism/fixtures/symbols.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@
44

55
:"abc#{1}"
66

7+
"
8+
foo\
9+
b\nar
10+
"
11+
12+
"
13+
foo\
14+
b\nar
15+
#{}
16+
"
17+
718
[:Υ, :ά, :ŗ, :ρ]
819

920
:-@

test/prism/fixtures/xstring.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,10 @@
1212

1313
%x{}
1414

15+
`
16+
foo\
17+
b\nar
18+
`
19+
1520
`
1621
’`

0 commit comments

Comments
 (0)