Skip to content

Commit 77c70e9

Browse files
committed
[SourceKit/CodeFormat] Indent lines in multi-line strings
When the multi-line string is unterminated, the indentation of a line will be the same as the first previous line that had contents, unless that line is the line containing the start quotes. In that case the indentation will be: - the same as the start quote indentation, if the quotes are the only contents on the line, or - an extra indentation level to the start quote line otherwise Lines within a terminated multi-line string or lines with content will only ever be indented if their current indentation is invalid (ie. their indentation level is less than that of their end quotes). This rule is to prevent any signficant (and possibly unintended) whitespace being added to existing strings during a whole file/range format - Xcode does not remove whitespace from whitespace-only lines by default. This could be improved if the reformat was sent the actual range rather than a line at a time. Different indentation could then be chosen if the range was in fact a single line. Resolves rdar://32181422
1 parent 7594cfb commit 77c70e9

7 files changed

+322
-39
lines changed

lib/IDE/Formatting.cpp

Lines changed: 127 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ getLocForContentStartOnSameLine(SourceManager &SM, SourceLoc Loc) {
6161
return LineStart.getAdvancedLoc(Indentation.size());
6262
}
6363

64+
/// \returns true if the line at \c Loc is either empty or only contains whitespace
65+
static bool isLineAtLocEmpty(SourceManager &SM, SourceLoc Loc) {
66+
SourceLoc LineStart = Lexer::getLocForStartOfLine(SM, Loc);
67+
SourceLoc Next = Lexer::getTokenAtLocation(SM, LineStart, CommentRetentionMode::ReturnAsTokens).getLoc();
68+
return Next.isInvalid() || !isOnSameLine(SM, Loc, Next);
69+
}
70+
6471
/// \returns the first token after the token at \c Loc.
6572
static Optional<Token>
6673
getTokenAfter(SourceManager &SM, SourceLoc Loc, bool SkipComments = true) {
@@ -136,6 +143,21 @@ static ClosureExpr *findTrailingClosureFromArgument(Expr *arg) {
136143
return nullptr;
137144
}
138145

146+
static size_t calcVisibleWhitespacePrefix(StringRef Line,
147+
CodeFormatOptions Options) {
148+
size_t Indent = 0;
149+
for (auto Char : Line) {
150+
if (Char == '\t') {
151+
Indent += Options.TabWidth;
152+
} else if (Char == ' ' || Char == '\v' || Char == '\f') {
153+
Indent++;
154+
} else {
155+
break;
156+
}
157+
}
158+
return Indent;
159+
}
160+
139161
/// An indentation context of the target location
140162
struct IndentContext {
141163
enum ContextKind { Exact, LineStart };
@@ -258,16 +280,14 @@ class FormatContext {
258280
Optional<IndentContext> InnermostCtx;
259281
bool InDocCommentBlock;
260282
bool InCommentLine;
261-
bool InStringLiteral;
262283

263284
public:
264285
FormatContext(SourceManager &SM,
265286
Optional<IndentContext> IndentCtx,
266287
bool InDocCommentBlock = false,
267-
bool InCommentLine = false,
268-
bool InStringLiteral = false)
288+
bool InCommentLine = false)
269289
:SM(SM), InnermostCtx(IndentCtx), InDocCommentBlock(InDocCommentBlock),
270-
InCommentLine(InCommentLine), InStringLiteral(InStringLiteral) { }
290+
InCommentLine(InCommentLine) { }
271291

272292
bool IsInDocCommentBlock() {
273293
return InDocCommentBlock;
@@ -277,10 +297,6 @@ class FormatContext {
277297
return InCommentLine;
278298
}
279299

280-
bool IsInStringLiteral() const {
281-
return InStringLiteral;
282-
}
283-
284300
void padToExactColumn(StringBuilder &Builder,
285301
const CodeFormatOptions &FmtOptions) {
286302
assert(isExact() && "Context is not exact?");
@@ -1208,8 +1224,9 @@ class FormatWalker : public ASTWalker {
12081224
bool InDocCommentBlock = false;
12091225
/// Whether the target location appears within a line comment.
12101226
bool InCommentLine = false;
1211-
/// Whether the target location appears within a string literal.
1212-
bool InStringLiteral = false;
1227+
1228+
/// The range of the string literal the target is inside of (if any, invalid otherwise).
1229+
CharSourceRange StringLiteralRange;
12131230

12141231
public:
12151232
explicit FormatWalker(SourceFile &SF, SourceManager &SM, CodeFormatOptions &Options)
@@ -1224,7 +1241,8 @@ class FormatWalker : public ASTWalker {
12241241
CtxOverride.clear();
12251242
TargetLocation = Loc;
12261243
TargetLineLoc = Lexer::getLocForStartOfLine(SM, TargetLocation);
1227-
InDocCommentBlock = InCommentLine = InStringLiteral = false;
1244+
InDocCommentBlock = InCommentLine = false;
1245+
StringLiteralRange = CharSourceRange();
12281246
NodesToSkip.clear();
12291247
CurrentTokIt = TokenList.begin();
12301248

@@ -1234,14 +1252,99 @@ class FormatWalker : public ASTWalker {
12341252
if (InnermostCtx)
12351253
CtxOverride.applyIfNeeded(SM, *InnermostCtx);
12361254

1255+
if (StringLiteralRange.isValid()) {
1256+
assert(!InDocCommentBlock && !InCommentLine &&
1257+
"Target is in both a string and comment");
1258+
InnermostCtx = indentWithinStringLiteral();
1259+
} else {
1260+
assert(!InDocCommentBlock || !InCommentLine &&
1261+
"Target is in both a doc comment block and comment line");
1262+
}
1263+
12371264
return FormatContext(SM, InnermostCtx, InDocCommentBlock,
1238-
InCommentLine, InStringLiteral);
1265+
InCommentLine);
12391266
}
12401267

1268+
private:
1269+
1270+
Optional<IndentContext> indentWithinStringLiteral() {
1271+
assert(StringLiteralRange.isValid() && "Target is not within a string literal");
1272+
1273+
// This isn't ideal since if the user types """""" and then an enter
1274+
// inside after, we won't indent the end quotes. But indenting the end
1275+
// quotes could lead to an error in the rest of the string, so best to
1276+
// avoid it entirely for now.
1277+
if (isOnSameLine(SM, TargetLineLoc, StringLiteralRange.getEnd()))
1278+
return IndentContext {TargetLocation, false, IndentContext::Exact};
1279+
1280+
// If there's contents before the end quotes then it's likely the quotes
1281+
// are actually the start quotes of the next string in the file. Pretend
1282+
// they don't exist so their indent doesn't affect the indenting.
1283+
SourceLoc EndLineContentLoc =
1284+
getLocForContentStartOnSameLine(SM, StringLiteralRange.getEnd());
1285+
bool HaveEndQuotes = CharSourceRange(SM, EndLineContentLoc,
1286+
StringLiteralRange.getEnd())
1287+
.str().equals(StringRef("\"\"\""));
1288+
1289+
if (!HaveEndQuotes) {
1290+
// Indent to the same indentation level as the first non-empty line
1291+
// before the target. If that line is the start line then either use
1292+
// the same indentation of the start quotes if they are on their own
1293+
// line, or an extra indentation otherwise.
1294+
//
1295+
// This will indent lines with content on it as well, which should be
1296+
// fine since it is quite unlikely anyone would format a range that
1297+
// includes an unterminated string.
1298+
1299+
SourceLoc AlignLoc = TargetLineLoc;
1300+
while (SM.isBeforeInBuffer(StringLiteralRange.getStart(), AlignLoc)) {
1301+
AlignLoc = Lexer::getLocForStartOfLine(SM, AlignLoc.getAdvancedLoc(-1));
1302+
if (!isLineAtLocEmpty(SM, AlignLoc)) {
1303+
AlignLoc = getLocForContentStartOnSameLine(SM, AlignLoc);
1304+
break;
1305+
}
1306+
}
1307+
1308+
if (isOnSameLine(SM, AlignLoc, StringLiteralRange.getStart())) {
1309+
SourceLoc StartLineContentLoc =
1310+
getLocForContentStartOnSameLine(SM, StringLiteralRange.getStart());
1311+
bool StartLineOnlyQuotes = CharSourceRange(SM, StartLineContentLoc,
1312+
StringLiteralRange.getEnd())
1313+
.str().startswith(StringRef("\"\"\""));
1314+
if (!StartLineOnlyQuotes)
1315+
return IndentContext {StringLiteralRange.getStart(), true};
1316+
1317+
AlignLoc = StringLiteralRange.getStart();
1318+
}
1319+
1320+
return IndentContext {AlignLoc, false, IndentContext::Exact};
1321+
}
1322+
1323+
// If there are end quotes, only enforce a minimum indentation. We don't
1324+
// want to add any other indentation since that could add unintended
1325+
// whitespace to existing strings. Could change this if the full range
1326+
// was passed rather than a single line - in that case we *would* indent
1327+
// if the range was a single empty line.
1328+
1329+
CharSourceRange TargetIndentRange =
1330+
CharSourceRange(SM, Lexer::getLocForStartOfLine(SM, TargetLocation),
1331+
TargetLocation);
1332+
CharSourceRange EndIndentRange =
1333+
CharSourceRange(SM, Lexer::getLocForStartOfLine(SM, EndLineContentLoc),
1334+
EndLineContentLoc);
1335+
1336+
size_t TargetIndent =
1337+
calcVisibleWhitespacePrefix(TargetIndentRange.str(), FmtOptions);
1338+
size_t EndIndent =
1339+
calcVisibleWhitespacePrefix(EndIndentRange.str(), FmtOptions);
1340+
if (TargetIndent >= EndIndent)
1341+
return IndentContext {TargetLocation, false, IndentContext::Exact};
1342+
1343+
return IndentContext {EndLineContentLoc, false, IndentContext::Exact};
1344+
}
12411345

12421346
#pragma mark ASTWalker overrides and helpers
12431347

1244-
private:
12451348
bool walkCustomAttributes(Decl *D) {
12461349
// CustomAttrs of non-param VarDecls are handled when this method is called
12471350
// on their containing PatternBindingDecls (below).
@@ -1330,7 +1433,7 @@ class FormatWalker : public ASTWalker {
13301433
SM.isBeforeInBuffer(E->getStartLoc(), TargetLocation) &&
13311434
SM.isBeforeInBuffer(TargetLocation,
13321435
Lexer::getLocForEndOfToken(SM, E->getEndLoc()))) {
1333-
InStringLiteral = true;
1436+
StringLiteralRange = Lexer::getCharSourceRangeFromSourceRange(SM, E->getSourceRange());
13341437
}
13351438

13361439
// Create a default indent context for all top-level expressions
@@ -1352,7 +1455,7 @@ class FormatWalker : public ASTWalker {
13521455

13531456
// Don't visit the child expressions of interpolated strings directly -
13541457
// visit only the argument of each appendInterpolation call instead, and
1355-
// update InStringLiteral for each segment.
1458+
// set StringLiteralRange if needed for each segment.
13561459
if (auto *ISL = dyn_cast<InterpolatedStringLiteralExpr>(E)) {
13571460
if (Action.shouldVisitChildren()) {
13581461
llvm::SaveAndRestore<ASTWalker::ParentTy>(Parent, ISL);
@@ -1364,7 +1467,8 @@ class FormatWalker : public ASTWalker {
13641467
// Handle the preceeding string segment.
13651468
CharSourceRange StringRange(SM, PrevStringStart, CE->getStartLoc());
13661469
if (StringRange.contains(TargetLocation)) {
1367-
InStringLiteral = true;
1470+
StringLiteralRange =
1471+
Lexer::getCharSourceRangeFromSourceRange(SM, E->getSourceRange());
13681472
return;
13691473
}
13701474
// Walk into the interpolation segment.
@@ -1378,7 +1482,8 @@ class FormatWalker : public ASTWalker {
13781482
SourceLoc End = Lexer::getLocForEndOfToken(SM, ISL->getStartLoc());
13791483
CharSourceRange StringRange(SM, PrevStringStart, End);
13801484
if (StringRange.contains(TargetLocation))
1381-
InStringLiteral = true;
1485+
StringLiteralRange =
1486+
Lexer::getCharSourceRangeFromSourceRange(SM, E->getSourceRange());
13821487

13831488
return {false, E};
13841489
}
@@ -1467,7 +1572,7 @@ class FormatWalker : public ASTWalker {
14671572
}
14681573

14691574
void scanTokensUntil(SourceLoc Loc) {
1470-
if (InDocCommentBlock || InCommentLine)
1575+
if (InDocCommentBlock || InCommentLine || StringLiteralRange.isValid())
14711576
return;
14721577
for (auto Invalid = Loc.isInvalid(); CurrentTokIt != TokenList.end() &&
14731578
(Invalid || SM.isBeforeInBuffer(CurrentTokIt->getLoc(), Loc));
@@ -1477,16 +1582,16 @@ class FormatWalker : public ASTWalker {
14771582
SourceLoc StartLineLoc = Lexer::getLocForStartOfLine(
14781583
SM, CommentRange.getStart());
14791584

1480-
// The -1 is needed in case the past-the-end position is a newline
1481-
// character. In that case getLocForStartOfLine returns the start of
1482-
// the next line.
14831585
SourceLoc EndLineLoc = Lexer::getLocForStartOfLine(
1484-
SM, CommentRange.getEnd().getAdvancedLoc(-1));
1586+
SM, CommentRange.getEnd());
14851587
auto TokenStr = CurrentTokIt->getRange().str();
14861588
InDocCommentBlock |= SM.isBeforeInBuffer(StartLineLoc, TargetLineLoc) && !SM.isBeforeInBuffer(EndLineLoc, TargetLineLoc) &&
14871589
TokenStr.startswith("/*");
14881590
InCommentLine |= StartLineLoc == TargetLineLoc &&
14891591
TokenStr.startswith("//");
1592+
} else if (CurrentTokIt->getKind() == tok::unknown &&
1593+
CurrentTokIt->getRange().str().startswith("\"\"\"")) {
1594+
StringLiteralRange = CurrentTokIt->getRange();
14901595
}
14911596
}
14921597
}
@@ -2797,12 +2902,6 @@ class CodeFormatter {
27972902
std::pair<LineRange, std::string> indent(unsigned LineIndex,
27982903
FormatContext &FC,
27992904
StringRef Text) {
2800-
if (FC.IsInStringLiteral()) {
2801-
return std::make_pair(
2802-
LineRange(LineIndex, 1),
2803-
swift::ide::getTextForLine(LineIndex, Text, /*Trim*/ false).str());
2804-
}
2805-
28062905
if (FC.isExact()) {
28072906
StringRef Line = swift::ide::getTextForLine(LineIndex, Text, /*Trim*/true);
28082907
StringBuilder Builder;
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
let s1 = """
2+
\(
3+
45
4+
)
5+
"""
6+
7+
let s2 = """
8+
\(
9+
45
10+
)
11+
"""
12+
13+
let s3 = """
14+
\(
15+
45
16+
)
17+
"""
18+
19+
let s4 = """
20+
foo \( 45 /*
21+
comment content
22+
*/) bar
23+
"""
24+
25+
// RUN: %sourcekitd-test -req=format -line=2 %s >%t.response
26+
// RUN: %sourcekitd-test -req=format -line=3 %s >>%t.response
27+
// RUN: %sourcekitd-test -req=format -line=4 %s >>%t.response
28+
29+
// RUN: %sourcekitd-test -req=format -line=8 %s >>%t.response
30+
// RUN: %sourcekitd-test -req=format -line=9 %s >>%t.response
31+
// RUN: %sourcekitd-test -req=format -line=10 %s >>%t.response
32+
33+
// RUN: %sourcekitd-test -req=format -line=14 %s >>%t.response
34+
// RUN: %sourcekitd-test -req=format -line=15 %s >>%t.response
35+
// RUN: %sourcekitd-test -req=format -line=16 %s >>%t.response
36+
37+
// RUN: %sourcekitd-test -req=format -line=20 %s >>%t.response
38+
// RUN: %sourcekitd-test -req=format -line=21 %s >>%t.response
39+
// RUN: %sourcekitd-test -req=format -line=22 %s >>%t.response
40+
41+
// RUN: %FileCheck --strict-whitespace %s <%t.response
42+
43+
// CHECK: key.sourcetext: " \\("
44+
// CHECK: key.sourcetext: " 45"
45+
// CHECK: key.sourcetext: " )"
46+
47+
// CHECK: key.sourcetext: " \\("
48+
// CHECK: key.sourcetext: " 45"
49+
// CHECK: key.sourcetext: " )"
50+
51+
// CHECK: key.sourcetext: " \\("
52+
// CHECK: key.sourcetext: " 45"
53+
// CHECK: key.sourcetext: ")"
54+
55+
// CHECK: key.sourcetext: " foo \\( 45 /*"
56+
// CHECK: key.sourcetext: " comment content"
57+
// CHECK: key.sourcetext: " */) bar"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
func foo() {
2+
let s1 = """
3+
this is line1 in outer string \("""
4+
nested string in interpolation
5+
""")
6+
7+
this is line2 in outer string
8+
"""
9+
}
10+
11+
// RUN: %sourcekitd-test -req=format -line=3 %s >%t.response
12+
// RUN: %sourcekitd-test -req=format -line=4 %s >>%t.response
13+
// RUN: %sourcekitd-test -req=format -line=5 %s >>%t.response
14+
// RUN: %sourcekitd-test -req=format -line=6 %s >>%t.response
15+
// RUN: %sourcekitd-test -req=format -line=7 %s >>%t.response
16+
// RUN: %sourcekitd-test -req=format -line=8 %s >>%t.response
17+
18+
// RUN: %FileCheck --strict-whitespace %s <%t.response
19+
20+
// CHECK: key.sourcetext: " this is line1 in outer string \\(\"\"\""
21+
// CHECK: key.sourcetext: " nested string in interpolation"
22+
// CHECK: key.sourcetext: " \"\"\")"
23+
// CHECK: key.sourcetext: " "
24+
// CHECK: key.sourcetext: " this is line2 in outer string"
25+
// CHECK: key.sourcetext: " \"\"\""
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %sourcekitd-test -req=format -line=24 %s >%t.response
2+
// RUN: %sourcekitd-test -req=format -line=25 %s >>%t.response
3+
// RUN: %sourcekitd-test -req=format -line=26 %s >>%t.response
4+
// RUN: %sourcekitd-test -req=format -line=27 %s >>%t.response
5+
// RUN: %sourcekitd-test -req=format -line=28 %s >>%t.response
6+
// RUN: %sourcekitd-test -req=format -line=29 %s >>%t.response
7+
// RUN: %sourcekitd-test -req=format -line=30 %s >>%t.response
8+
// RUN: %sourcekitd-test -req=format -line=31 %s >>%t.response
9+
10+
// RUN: %FileCheck --strict-whitespace %s <%t.response
11+
12+
// The end quotes are ignored as it would be impossible to know whether
13+
// they are part of the interpolation or another string, etc.
14+
// CHECK: key.sourcetext: " "
15+
// CHECK: key.sourcetext: " this is line1"
16+
// CHECK: key.sourcetext: ""
17+
// CHECK: key.sourcetext: "this is line2"
18+
// CHECK: key.sourcetext: " "
19+
// CHECK: key.sourcetext: " and this is a line with trailing interpolation \\(1 +"
20+
// CHECK: key.sourcetext: " "
21+
// CHECK: key.sourcetext: " \"\"\""
22+
23+
let s1 = """
24+
25+
this is line1
26+
27+
this is line2
28+
29+
and this is a line with trailing interpolation \(1 +
30+
31+
"""

0 commit comments

Comments
 (0)