Skip to content

Commit a528dc5

Browse files
authored
Remove trailing whitespace on first line of multiline string. (#1236)
Remove trailing whitespace on first line of multiline string. Normalizes, but retains, backslashes on the first ignored line of a multiline string. (There should probably be a fix to remove them.)
1 parent 53dc7e1 commit a528dc5

File tree

3 files changed

+202
-6
lines changed

3 files changed

+202
-6
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* Add `tall-style` experiment flag to enable the in-progress unstable new
44
formatting style (#1253).
55
* Format extension types.
6+
* Normalize ignored whitespace and "escaped whitespace" on first line
7+
of multiline string literals. (#1235)
68

79
## 2.3.3
810

lib/src/source_visitor.dart

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4158,6 +4158,8 @@ class SourceVisitor extends ThrowingAstVisitor {
41584158
_endBody(rightBracket, forceSplit: nodes.isNotEmpty);
41594159
}
41604160

4161+
static final _lineTerminatorRE = RegExp(r'\r\n?|\n');
4162+
41614163
/// Writes the string literal [string] to the output.
41624164
///
41634165
/// Splits multiline strings into separate chunks so that the line splitter
@@ -4167,21 +4169,96 @@ class SourceVisitor extends ThrowingAstVisitor {
41674169
// comments are written first.
41684170
writePrecedingCommentsAndNewlines(string);
41694171

4170-
// Split each line of a multiline string into separate chunks.
4171-
var lines = string.lexeme.split(_formatter.lineEnding!);
4172+
var lines = string.lexeme.split(_lineTerminatorRE);
41724173
var offset = string.offset;
4174+
var firstLine = lines.first;
4175+
if (lines.length > 1) {
4176+
// Special case for multiline string which contains
4177+
// at least one newline.
4178+
_writeStringFirstLine(firstLine, string, offset: offset);
4179+
} else {
4180+
_writeText(firstLine, string, offset: offset);
4181+
}
4182+
offset += firstLine.length;
41734183

4174-
_writeText(lines.first, string, offset: offset);
4175-
offset += lines.first.length;
4176-
4177-
for (var line in lines.skip(1)) {
4184+
for (var i = 1; i < lines.length; i++) {
4185+
var line = lines[i];
41784186
builder.writeNewline(flushLeft: true, nest: true);
41794187
offset++;
41804188
_writeText(line, string, offset: offset, mergeEmptySplits: false);
41814189
offset += line.length;
41824190
}
41834191
}
41844192

4193+
/// Writes the first line of a multi-line string.
4194+
///
4195+
/// If the string is a multiline string, and it has only whitespace
4196+
/// and escaped whitespace before a first line break,
4197+
/// omit the non-escaped trailing whitespace.
4198+
/// Normalize escaped non-final whitspace to spaces.
4199+
///
4200+
/// More specifically:
4201+
/// If a multiline string literal contains at least one line-break
4202+
/// (a CR, LF or CR+LF) as part of the source character content
4203+
/// (characters inside interpolation expressions do not count),
4204+
/// and the source characters from the starting quote to the first
4205+
/// line-break contains only the characters space, tab and backslash,
4206+
/// with no two adjacent backslashes, then that part of the string source,
4207+
/// including the following line break, is excluded from particiapting
4208+
/// code points to the string value.
4209+
///
4210+
/// This function normalizes such excluded character sequences
4211+
/// to just the back-slashes, separated by space characters.
4212+
void _writeStringFirstLine(String line, Token string, {required int offset}) {
4213+
// Detect leading whitespace on the first line of multiline strings.
4214+
var quoteStart = line.startsWith('r') ? 1 : 0;
4215+
var quoteEnd = quoteStart + 3;
4216+
var backslashCount = 0;
4217+
if (line.length > quoteEnd &&
4218+
(line.startsWith("'''", quoteStart) ||
4219+
line.startsWith('"""', quoteStart))) {
4220+
// Start of a multiline string literal.
4221+
// Check if rest of the line is whitespace, possibly preceded by
4222+
// backslash, or has a single trailing backslash preceding the newline.
4223+
// Count the backslashes.
4224+
var cursor = quoteEnd;
4225+
const backslash = 0x5c;
4226+
const space = 0x20;
4227+
const tab = 0x09;
4228+
4229+
do {
4230+
var char = line.codeUnitAt(cursor);
4231+
if (char == backslash) {
4232+
cursor += 1;
4233+
backslashCount++;
4234+
if (cursor >= line.length) {
4235+
break;
4236+
}
4237+
char = line.codeUnitAt(cursor);
4238+
}
4239+
if (char != space && char != tab) break;
4240+
cursor++;
4241+
} while (cursor < line.length);
4242+
if (cursor == line.length) {
4243+
// No invalid character sequence found before end of line.
4244+
// Normalize the ignored "escaped" whitespace which has no
4245+
// effect on string content.
4246+
var firstLineText = line.substring(0, quoteEnd);
4247+
if (backslashCount > 0) {
4248+
var buffer = StringBuffer(firstLineText);
4249+
buffer.write(r'\');
4250+
while (--backslashCount > 0) {
4251+
buffer.write(r' \');
4252+
}
4253+
firstLineText = buffer.toString();
4254+
}
4255+
_writeText(firstLineText, string, offset: offset);
4256+
return;
4257+
}
4258+
}
4259+
_writeText(line, string, offset: offset);
4260+
}
4261+
41854262
/// Write the comma token following [node], if there is one.
41864263
void _writeCommaAfter(AstNode node) {
41874264
token(node.commaAfter);
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
40 columns |
2+
>>> trailing all-space multiline string (×20 = space, ×09 = tab)
3+
var x = '''×20×09×20×09
4+
×20×09
5+
×09×20''';
6+
<<<
7+
var x = '''
8+
×20×09
9+
×09×20''';
10+
>>> single trailing space
11+
var x = '''×20
12+
z''';
13+
<<<
14+
var x = '''
15+
z''';
16+
>>> single trailing tab
17+
var x = '''×09
18+
z''';
19+
<<<
20+
var x = '''
21+
z''';
22+
>>> trailing all-space multiline raw string
23+
var x = r'''×20×09×20×09
24+
×20×09
25+
×09×20''';
26+
<<<
27+
var x = r'''
28+
×20×09
29+
×09×20''';
30+
>>> no trailing characters, nothing happens
31+
var x = '''
32+
×20×09''';
33+
<<<
34+
var x = '''
35+
×20×09''';
36+
>>> no line break, nothing happnes
37+
var x = '''×20×09''';
38+
<<<
39+
var x = '''×20×09''';
40+
>>> line break not part of string.
41+
var x = '''×20×09×20×09${
42+
''}×20×09''';
43+
<<<
44+
var x = '''×20×09×20×09${''}×20×09''';
45+
>>> "escapes" allowed, not removed, but normalized
46+
var x = '''×20×09\×20\×09×20×09
47+
''';
48+
<<<
49+
var x = '''\×20\
50+
''';
51+
>>> single escaped space
52+
var x = '''\×20
53+
z''';
54+
<<<
55+
var x = '''\
56+
z''';
57+
>>> single escaped tab
58+
var x = '''\×09
59+
z''';
60+
<<<
61+
var x = '''\
62+
z''';
63+
>>> single trailing escape
64+
var x = '''\
65+
z''';
66+
<<<
67+
var x = '''\
68+
z''';
69+
>>> final "escape" allowed too, not removed, but normalized
70+
var x = '''×20×09\×20\×09×20×09\
71+
''';
72+
<<<
73+
var x = '''\×20\×20\
74+
''';
75+
>>> "escape" allowed in raw strings, not removed, but normalized
76+
var x = r'''×20×09\×20\×09×20×09\
77+
''';
78+
<<<
79+
var x = r'''\×20\×20\
80+
''';
81+
>>> A "double-escape" is not an escaped whitspace
82+
var x = '''×20×09\\×20
83+
''';
84+
<<<
85+
var x = '''×20×09\\×20
86+
''';
87+
>>> Non-whitispace character zero-content part on first line
88+
var x = ''' ${''}×20×09
89+
''';
90+
<<<
91+
var x = ''' ${''}×20×09
92+
''';
93+
>>> interpolations do not start a new "first line"
94+
var x = '''×20×09
95+
${''}×20×09
96+
''';
97+
<<<
98+
var x = '''
99+
${''}×20×09
100+
''';
101+
>>> Works with any line break - U+000A
102+
var x = '''×20×0a×20''';
103+
<<<
104+
var x = '''
105+
×20''';
106+
>>> Works with any line break - U+000D
107+
var x = '''×20×0d×20''';
108+
<<<
109+
var x = '''
110+
×20''';
111+
>>> Works with any line break - U+000D U+000A
112+
// First linebreak is not \r\n.
113+
var x = '''×20×0d×0az×20''';
114+
<<<
115+
// First linebreak is not \r\n.
116+
var x = '''
117+
z×20''';

0 commit comments

Comments
 (0)