Skip to content

Commit 14decf5

Browse files
authored
Refactor heredoc handling for complex use cases (#818)
1 parent 1ce3e53 commit 14decf5

File tree

7 files changed

+314
-40
lines changed

7 files changed

+314
-40
lines changed

fixtures/small/pathological_heredocs_actual.rb

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,57 @@ def foo
3535
Otherwise, it's probably in your best interest not to write things like this.
3636
END
3737
end
38+
39+
<<EOD
40+
part 1 of heredoc #{"not a heredoc" + <<EOM} after brace before newline
41+
contents of EOM
42+
EOM
43+
contents of EOD
44+
EOD
45+
46+
# Multiple heredocs on same line
47+
<<OUTER
48+
first #{<<A} middle #{<<B} last
49+
content A
50+
A
51+
content B
52+
B
53+
OUTER
54+
55+
# Heredoc-only interpolation with text after
56+
<<OUTER
57+
#{<<INNER} after
58+
inner content
59+
INNER
60+
more outer
61+
OUTER
62+
63+
# Heredoc with escape sequences preserved
64+
<<EOD
65+
line with escape \n in middle
66+
another line
67+
EOD
68+
69+
# Nested heredoc with escape sequence in interpolation
70+
<<OUTER
71+
prefix #{"text\n" + <<INNER} after
72+
inner content
73+
INNER
74+
more outer
75+
OUTER
76+
77+
# Squiggly heredoc with nested bare heredoc
78+
<<~OUTER
79+
prefix #{<<INNER} after
80+
inner content
81+
INNER
82+
more outer
83+
OUTER
84+
85+
# Squiggly heredoc with nested squiggly heredoc
86+
<<~OUTER
87+
prefix #{<<~INNER} after
88+
inner content
89+
INNER
90+
more outer
91+
OUTER

fixtures/small/pathological_heredocs_expected.rb

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,58 @@ def foo
4040
.join
4141
.strip
4242
end
43+
44+
<<EOD
45+
part 1 of heredoc #{"not a heredoc" + <<EOM} after brace before newline
46+
contents of EOM
47+
EOM
48+
contents of EOD
49+
EOD
50+
51+
# Multiple heredocs on same line
52+
<<OUTER
53+
first #{<<A} middle #{<<B} last
54+
content A
55+
A
56+
content B
57+
B
58+
59+
OUTER
60+
61+
# Heredoc-only interpolation with text after
62+
<<OUTER
63+
#{<<INNER} after
64+
inner content
65+
INNER
66+
more outer
67+
OUTER
68+
69+
# Heredoc with escape sequences preserved
70+
<<EOD
71+
line with escape \n in middle
72+
another line
73+
EOD
74+
75+
# Nested heredoc with escape sequence in interpolation
76+
<<OUTER
77+
prefix #{"text\n" + <<INNER} after
78+
inner content
79+
INNER
80+
more outer
81+
OUTER
82+
83+
# Squiggly heredoc with nested bare heredoc
84+
<<~OUTER
85+
prefix #{<<INNER} after
86+
inner content
87+
INNER
88+
more outer
89+
OUTER
90+
91+
# Squiggly heredoc with nested squiggly heredoc
92+
<<~OUTER
93+
prefix #{<<~INNER} after
94+
inner content
95+
INNER
96+
more outer
97+
OUTER

librubyfmt/src/format_prism.rs

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -980,14 +980,50 @@ fn format_inner_string<'src>(
980980
}
981981
};
982982

983-
prev_ended_with_newline = contents.ends_with('\n');
983+
// If there are pending heredocs and the content contains a newline
984+
// (but doesn't start with one), we need to split the content at the
985+
// first newline, emit the part before, then a HardNewLine, then
986+
// render the heredocs (with a proper newline after
987+
// heredoc close), then emit the rest. This handles cases like:
988+
// <<EOD
989+
// text #{<<INNER} after brace
990+
// inner content
991+
// INNER
992+
// more outer content
993+
// EOD
994+
let mut rendered_heredocs = false;
995+
if ps.has_pending_heredocs()
996+
&& !contents.starts_with('\n')
997+
&& let Some(newline_idx) = contents.find('\n')
998+
{
999+
let before_newline = &contents[..newline_idx];
1000+
if !before_newline.is_empty() {
1001+
ps.emit_string_content(before_newline.to_string());
1002+
}
1003+
ps.emit_hard_newline_in_heredoc();
1004+
// Use skip=false to ensure proper newline after heredoc close
1005+
ps.render_heredocs(false);
1006+
contents = contents[newline_idx + 1..].to_string();
1007+
rendered_heredocs = true;
1008+
}
1009+
1010+
// If we rendered heredocs, they end with a newline (since skip=false),
1011+
// so the next StringNode should have its first line treated as starting
1012+
// at a line boundary for common_indent stripping purposes.
1013+
prev_ended_with_newline = if rendered_heredocs && contents.is_empty() {
1014+
true
1015+
} else {
1016+
contents.ends_with('\n')
1017+
};
9841018

9851019
if peekable.peek().is_none() && contents.ends_with('\n') {
9861020
contents.pop();
9871021
}
9881022

9891023
ps.at_offset(part.location().end_offset());
990-
ps.emit_string_content(contents);
1024+
if !contents.is_empty() {
1025+
ps.emit_string_content(contents);
1026+
}
9911027
}
9921028
prism::Node::InterpolatedStringNode { .. } => {
9931029
ps.at_offset(part.location().start_offset());

librubyfmt/src/heredoc_string.rs

Lines changed: 60 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,42 +30,86 @@ impl HeredocKind {
3030
}
3131
}
3232

33+
/// A segment of heredoc content. Used to distinguish between content that should
34+
/// receive squiggly indentation and content from nested non-squiggly heredocs
35+
/// that should not be indented.
36+
#[derive(Debug, Clone)]
37+
pub enum HeredocSegment {
38+
Normal(String),
39+
/// Content from nested non-squiggly heredocs, should never receive squiggly indentation.
40+
/// This includes both the heredoc content and the closing identifier.
41+
Raw(String),
42+
}
43+
3344
#[derive(Debug, Clone)]
3445
pub struct HeredocString<'src> {
3546
symbol: Cow<'src, str>,
3647
pub kind: HeredocKind,
37-
pub buf: Vec<u8>,
48+
pub segments: Vec<HeredocSegment>,
3849
pub indent: ColNumber,
3950
}
4051

4152
impl<'src> HeredocString<'src> {
42-
pub fn new(symbol: Cow<'src, str>, kind: HeredocKind, buf: Vec<u8>, indent: ColNumber) -> Self {
53+
pub fn new(
54+
symbol: Cow<'src, str>,
55+
kind: HeredocKind,
56+
segments: Vec<HeredocSegment>,
57+
indent: ColNumber,
58+
) -> Self {
4359
HeredocString {
4460
symbol,
4561
kind,
46-
buf,
62+
segments,
4763
indent,
4864
}
4965
}
5066

5167
pub fn render_as_string(self) -> String {
5268
let indent = self.indent;
53-
let string = String::from_utf8(self.buf).expect("heredoc is utf8");
5469

5570
if self.kind.is_squiggly() {
56-
string
57-
.split('\n')
58-
.map(|l| {
59-
String::from(format!("{}{}", get_indent(indent as usize + 2), l).trim_end())
60-
})
61-
.collect::<Vec<String>>()
62-
.join("\n")
71+
// For squiggly heredocs, we need to apply indentation to Normal segments
72+
// but not to Raw segments (which come from nested non-squiggly heredocs).
73+
let mut result = String::new();
74+
for segment in self.segments {
75+
match segment {
76+
HeredocSegment::Normal(content) => {
77+
// Apply squiggly indentation to each line
78+
for (i, line) in content.split('\n').enumerate() {
79+
if i > 0 {
80+
result.push('\n');
81+
}
82+
let indented = format!("{}{}", get_indent(indent as usize + 2), line);
83+
result.push_str(indented.trim_end());
84+
}
85+
}
86+
HeredocSegment::Raw(content) => {
87+
// No indentation for raw content (nested non-squiggly heredocs)
88+
for (i, line) in content.split('\n').enumerate() {
89+
if i > 0 {
90+
result.push('\n');
91+
}
92+
result.push_str(line.trim_end());
93+
}
94+
}
95+
}
96+
}
97+
result
6398
} else {
64-
string
65-
.split('\n')
66-
.map(|l| l.trim_end())
67-
.collect::<Vec<&str>>()
68-
.join("\n")
99+
// For non-squiggly heredocs, just join segments and trim line endings
100+
let mut result = String::new();
101+
for segment in self.segments {
102+
let content = match segment {
103+
HeredocSegment::Normal(s) | HeredocSegment::Raw(s) => s,
104+
};
105+
for (i, line) in content.split('\n').enumerate() {
106+
if i > 0 {
107+
result.push('\n');
108+
}
109+
result.push_str(line.trim_end());
110+
}
111+
}
112+
result
69113
}
70114
}
71115

librubyfmt/src/line_tokens.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ pub enum ConcreteLineToken<'src> {
8787
kind: HeredocKind,
8888
symbol: &'src str,
8989
},
90+
RawHeredocContent {
91+
content: String,
92+
},
9093
}
9194

9295
impl<'src> ConcreteLineToken<'src> {
@@ -123,6 +126,7 @@ impl<'src> ConcreteLineToken<'src> {
123126
Self::End => Cow::Borrowed("end"),
124127
Self::HeredocClose { symbol } => Cow::Owned(symbol),
125128
Self::HeredocStart { symbol, .. } => Cow::Borrowed(symbol),
129+
Self::RawHeredocContent { content } => Cow::Owned(content),
126130
// no-op, this is purely semantic information
127131
// for the render queue
128132
Self::AfterCallChain | Self::BeginCallChainIndent | Self::EndCallChainIndent => {
@@ -145,7 +149,9 @@ impl<'src> ConcreteLineToken<'src> {
145149
Keyword { keyword: contents } | ConditionalKeyword { contents } => contents.len(),
146150
Op { op } | MethodName { name: op } => op.len(),
147151
DirectPart { part: contents } | LTStringContent { content: contents } => contents.len(),
148-
Comment { contents } | HeredocClose { symbol: contents } => contents.len(),
152+
Comment { contents }
153+
| HeredocClose { symbol: contents }
154+
| RawHeredocContent { content: contents } => contents.len(),
149155
HardNewLine | Comma | Space | Dot | OpenSquareBracket | CloseSquareBracket
150156
| OpenCurlyBracket | CloseCurlyBracket | OpenParen | CloseParen | SingleSlash
151157
| DoubleQuote => 1,

0 commit comments

Comments
 (0)