Skip to content

Commit 2ef85c1

Browse files
authored
Merge pull request swiftlang#33397 from bnbarham/benb/multiline-indentation-32181422
2 parents 576d0b8 + 77c70e9 commit 2ef85c1

9 files changed

+335
-55
lines changed

include/swift/Parse/Lexer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ class Lexer {
352352
static SourceLoc getLocForStartOfLine(SourceManager &SM, SourceLoc Loc);
353353

354354
/// Retrieve the source location for the end of the line containing the
355-
/// given token, which is the location of the start of the next line.
355+
/// given location, which is the location of the start of the next line.
356356
static SourceLoc getLocForEndOfLine(SourceManager &SM, SourceLoc Loc);
357357

358358
/// Retrieve the string used to indent the line that contains the given

lib/IDE/Formatting.cpp

Lines changed: 127 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ getLocForContentStartOnSameLine(SourceManager &SM, SourceLoc Loc) {
6161
return LineStart.getAdvancedLoc(Indentation.size());
6262
}
6363

64+
/// \returns true if the line at \c Loc is either empty or only contains whitespace
65+
static bool isLineAtLocEmpty(SourceManager &SM, SourceLoc Loc) {
66+
SourceLoc LineStart = Lexer::getLocForStartOfLine(SM, Loc);
67+
SourceLoc Next = Lexer::getTokenAtLocation(SM, LineStart, CommentRetentionMode::ReturnAsTokens).getLoc();
68+
return Next.isInvalid() || !isOnSameLine(SM, Loc, Next);
69+
}
70+
6471
/// \returns the first token after the token at \c Loc.
6572
static Optional<Token>
6673
getTokenAfter(SourceManager &SM, SourceLoc Loc, bool SkipComments = true) {
@@ -136,6 +143,21 @@ static ClosureExpr *findTrailingClosureFromArgument(Expr *arg) {
136143
return nullptr;
137144
}
138145

146+
static size_t calcVisibleWhitespacePrefix(StringRef Line,
147+
CodeFormatOptions Options) {
148+
size_t Indent = 0;
149+
for (auto Char : Line) {
150+
if (Char == '\t') {
151+
Indent += Options.TabWidth;
152+
} else if (Char == ' ' || Char == '\v' || Char == '\f') {
153+
Indent++;
154+
} else {
155+
break;
156+
}
157+
}
158+
return Indent;
159+
}
160+
139161
/// An indentation context of the target location
140162
struct IndentContext {
141163
enum ContextKind { Exact, LineStart };
@@ -258,16 +280,14 @@ class FormatContext {
258280
Optional<IndentContext> InnermostCtx;
259281
bool InDocCommentBlock;
260282
bool InCommentLine;
261-
bool InStringLiteral;
262283

263284
public:
264285
FormatContext(SourceManager &SM,
265286
Optional<IndentContext> IndentCtx,
266287
bool InDocCommentBlock = false,
267-
bool InCommentLine = false,
268-
bool InStringLiteral = false)
288+
bool InCommentLine = false)
269289
:SM(SM), InnermostCtx(IndentCtx), InDocCommentBlock(InDocCommentBlock),
270-
InCommentLine(InCommentLine), InStringLiteral(InStringLiteral) { }
290+
InCommentLine(InCommentLine) { }
271291

272292
bool IsInDocCommentBlock() {
273293
return InDocCommentBlock;
@@ -277,10 +297,6 @@ class FormatContext {
277297
return InCommentLine;
278298
}
279299

280-
bool IsInStringLiteral() const {
281-
return InStringLiteral;
282-
}
283-
284300
void padToExactColumn(StringBuilder &Builder,
285301
const CodeFormatOptions &FmtOptions) {
286302
assert(isExact() && "Context is not exact?");
@@ -1208,8 +1224,9 @@ class FormatWalker : public ASTWalker {
12081224
bool InDocCommentBlock = false;
12091225
/// Whether the target location appears within a line comment.
12101226
bool InCommentLine = false;
1211-
/// Whether the target location appears within a string literal.
1212-
bool InStringLiteral = false;
1227+
1228+
/// The range of the string literal the target is inside of (if any, invalid otherwise).
1229+
CharSourceRange StringLiteralRange;
12131230

12141231
public:
12151232
explicit FormatWalker(SourceFile &SF, SourceManager &SM, CodeFormatOptions &Options)
@@ -1224,7 +1241,8 @@ class FormatWalker : public ASTWalker {
12241241
CtxOverride.clear();
12251242
TargetLocation = Loc;
12261243
TargetLineLoc = Lexer::getLocForStartOfLine(SM, TargetLocation);
1227-
InDocCommentBlock = InCommentLine = InStringLiteral = false;
1244+
InDocCommentBlock = InCommentLine = false;
1245+
StringLiteralRange = CharSourceRange();
12281246
NodesToSkip.clear();
12291247
CurrentTokIt = TokenList.begin();
12301248

@@ -1234,14 +1252,99 @@ class FormatWalker : public ASTWalker {
12341252
if (InnermostCtx)
12351253
CtxOverride.applyIfNeeded(SM, *InnermostCtx);
12361254

1255+
if (StringLiteralRange.isValid()) {
1256+
assert(!InDocCommentBlock && !InCommentLine &&
1257+
"Target is in both a string and comment");
1258+
InnermostCtx = indentWithinStringLiteral();
1259+
} else {
1260+
assert(!InDocCommentBlock || !InCommentLine &&
1261+
"Target is in both a doc comment block and comment line");
1262+
}
1263+
12371264
return FormatContext(SM, InnermostCtx, InDocCommentBlock,
1238-
InCommentLine, InStringLiteral);
1265+
InCommentLine);
12391266
}
12401267

1268+
private:
1269+
1270+
Optional<IndentContext> indentWithinStringLiteral() {
1271+
assert(StringLiteralRange.isValid() && "Target is not within a string literal");
1272+
1273+
// This isn't ideal since if the user types """""" and then an enter
1274+
// inside after, we won't indent the end quotes. But indenting the end
1275+
// quotes could lead to an error in the rest of the string, so best to
1276+
// avoid it entirely for now.
1277+
if (isOnSameLine(SM, TargetLineLoc, StringLiteralRange.getEnd()))
1278+
return IndentContext {TargetLocation, false, IndentContext::Exact};
1279+
1280+
// If there's contents before the end quotes then it's likely the quotes
1281+
// are actually the start quotes of the next string in the file. Pretend
1282+
// they don't exist so their indent doesn't affect the indenting.
1283+
SourceLoc EndLineContentLoc =
1284+
getLocForContentStartOnSameLine(SM, StringLiteralRange.getEnd());
1285+
bool HaveEndQuotes = CharSourceRange(SM, EndLineContentLoc,
1286+
StringLiteralRange.getEnd())
1287+
.str().equals(StringRef("\"\"\""));
1288+
1289+
if (!HaveEndQuotes) {
1290+
// Indent to the same indentation level as the first non-empty line
1291+
// before the target. If that line is the start line then either use
1292+
// the same indentation of the start quotes if they are on their own
1293+
// line, or an extra indentation otherwise.
1294+
//
1295+
// This will indent lines with content on it as well, which should be
1296+
// fine since it is quite unlikely anyone would format a range that
1297+
// includes an unterminated string.
1298+
1299+
SourceLoc AlignLoc = TargetLineLoc;
1300+
while (SM.isBeforeInBuffer(StringLiteralRange.getStart(), AlignLoc)) {
1301+
AlignLoc = Lexer::getLocForStartOfLine(SM, AlignLoc.getAdvancedLoc(-1));
1302+
if (!isLineAtLocEmpty(SM, AlignLoc)) {
1303+
AlignLoc = getLocForContentStartOnSameLine(SM, AlignLoc);
1304+
break;
1305+
}
1306+
}
1307+
1308+
if (isOnSameLine(SM, AlignLoc, StringLiteralRange.getStart())) {
1309+
SourceLoc StartLineContentLoc =
1310+
getLocForContentStartOnSameLine(SM, StringLiteralRange.getStart());
1311+
bool StartLineOnlyQuotes = CharSourceRange(SM, StartLineContentLoc,
1312+
StringLiteralRange.getEnd())
1313+
.str().startswith(StringRef("\"\"\""));
1314+
if (!StartLineOnlyQuotes)
1315+
return IndentContext {StringLiteralRange.getStart(), true};
1316+
1317+
AlignLoc = StringLiteralRange.getStart();
1318+
}
1319+
1320+
return IndentContext {AlignLoc, false, IndentContext::Exact};
1321+
}
1322+
1323+
// If there are end quotes, only enforce a minimum indentation. We don't
1324+
// want to add any other indentation since that could add unintended
1325+
// whitespace to existing strings. Could change this if the full range
1326+
// was passed rather than a single line - in that case we *would* indent
1327+
// if the range was a single empty line.
1328+
1329+
CharSourceRange TargetIndentRange =
1330+
CharSourceRange(SM, Lexer::getLocForStartOfLine(SM, TargetLocation),
1331+
TargetLocation);
1332+
CharSourceRange EndIndentRange =
1333+
CharSourceRange(SM, Lexer::getLocForStartOfLine(SM, EndLineContentLoc),
1334+
EndLineContentLoc);
1335+
1336+
size_t TargetIndent =
1337+
calcVisibleWhitespacePrefix(TargetIndentRange.str(), FmtOptions);
1338+
size_t EndIndent =
1339+
calcVisibleWhitespacePrefix(EndIndentRange.str(), FmtOptions);
1340+
if (TargetIndent >= EndIndent)
1341+
return IndentContext {TargetLocation, false, IndentContext::Exact};
1342+
1343+
return IndentContext {EndLineContentLoc, false, IndentContext::Exact};
1344+
}
12411345

12421346
#pragma mark ASTWalker overrides and helpers
12431347

1244-
private:
12451348
bool walkCustomAttributes(Decl *D) {
12461349
// CustomAttrs of non-param VarDecls are handled when this method is called
12471350
// on their containing PatternBindingDecls (below).
@@ -1330,7 +1433,7 @@ class FormatWalker : public ASTWalker {
13301433
SM.isBeforeInBuffer(E->getStartLoc(), TargetLocation) &&
13311434
SM.isBeforeInBuffer(TargetLocation,
13321435
Lexer::getLocForEndOfToken(SM, E->getEndLoc()))) {
1333-
InStringLiteral = true;
1436+
StringLiteralRange = Lexer::getCharSourceRangeFromSourceRange(SM, E->getSourceRange());
13341437
}
13351438

13361439
// Create a default indent context for all top-level expressions
@@ -1352,7 +1455,7 @@ class FormatWalker : public ASTWalker {
13521455

13531456
// Don't visit the child expressions of interpolated strings directly -
13541457
// visit only the argument of each appendInterpolation call instead, and
1355-
// update InStringLiteral for each segment.
1458+
// set StringLiteralRange if needed for each segment.
13561459
if (auto *ISL = dyn_cast<InterpolatedStringLiteralExpr>(E)) {
13571460
if (Action.shouldVisitChildren()) {
13581461
llvm::SaveAndRestore<ASTWalker::ParentTy>(Parent, ISL);
@@ -1364,7 +1467,8 @@ class FormatWalker : public ASTWalker {
13641467
// Handle the preceeding string segment.
13651468
CharSourceRange StringRange(SM, PrevStringStart, CE->getStartLoc());
13661469
if (StringRange.contains(TargetLocation)) {
1367-
InStringLiteral = true;
1470+
StringLiteralRange =
1471+
Lexer::getCharSourceRangeFromSourceRange(SM, E->getSourceRange());
13681472
return;
13691473
}
13701474
// Walk into the interpolation segment.
@@ -1378,7 +1482,8 @@ class FormatWalker : public ASTWalker {
13781482
SourceLoc End = Lexer::getLocForEndOfToken(SM, ISL->getStartLoc());
13791483
CharSourceRange StringRange(SM, PrevStringStart, End);
13801484
if (StringRange.contains(TargetLocation))
1381-
InStringLiteral = true;
1485+
StringLiteralRange =
1486+
Lexer::getCharSourceRangeFromSourceRange(SM, E->getSourceRange());
13821487

13831488
return {false, E};
13841489
}
@@ -1467,7 +1572,7 @@ class FormatWalker : public ASTWalker {
14671572
}
14681573

14691574
void scanTokensUntil(SourceLoc Loc) {
1470-
if (InDocCommentBlock || InCommentLine)
1575+
if (InDocCommentBlock || InCommentLine || StringLiteralRange.isValid())
14711576
return;
14721577
for (auto Invalid = Loc.isInvalid(); CurrentTokIt != TokenList.end() &&
14731578
(Invalid || SM.isBeforeInBuffer(CurrentTokIt->getLoc(), Loc));
@@ -1477,16 +1582,16 @@ class FormatWalker : public ASTWalker {
14771582
SourceLoc StartLineLoc = Lexer::getLocForStartOfLine(
14781583
SM, CommentRange.getStart());
14791584

1480-
// The -1 is needed in case the past-the-end position is a newline
1481-
// character. In that case getLocForStartOfLine returns the start of
1482-
// the next line.
14831585
SourceLoc EndLineLoc = Lexer::getLocForStartOfLine(
1484-
SM, CommentRange.getEnd().getAdvancedLoc(-1));
1586+
SM, CommentRange.getEnd());
14851587
auto TokenStr = CurrentTokIt->getRange().str();
14861588
InDocCommentBlock |= SM.isBeforeInBuffer(StartLineLoc, TargetLineLoc) && !SM.isBeforeInBuffer(EndLineLoc, TargetLineLoc) &&
14871589
TokenStr.startswith("/*");
14881590
InCommentLine |= StartLineLoc == TargetLineLoc &&
14891591
TokenStr.startswith("//");
1592+
} else if (CurrentTokIt->getKind() == tok::unknown &&
1593+
CurrentTokIt->getRange().str().startswith("\"\"\"")) {
1594+
StringLiteralRange = CurrentTokIt->getRange();
14901595
}
14911596
}
14921597
}
@@ -2797,12 +2902,6 @@ class CodeFormatter {
27972902
std::pair<LineRange, std::string> indent(unsigned LineIndex,
27982903
FormatContext &FC,
27992904
StringRef Text) {
2800-
if (FC.IsInStringLiteral()) {
2801-
return std::make_pair(
2802-
LineRange(LineIndex, 1),
2803-
swift::ide::getTextForLine(LineIndex, Text, /*Trim*/ false).str());
2804-
}
2805-
28062905
if (FC.isExact()) {
28072906
StringRef Line = swift::ide::getTextForLine(LineIndex, Text, /*Trim*/true);
28082907
StringBuilder Builder;

lib/Parse/Lexer.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2706,12 +2706,12 @@ static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
27062706
// Find the start of the given line.
27072707
static const char *findStartOfLine(const char *bufStart, const char *current) {
27082708
while (current != bufStart) {
2709-
if (current[0] == '\n' || current[0] == '\r') {
2709+
--current;
2710+
2711+
if (current[0] == '\n') {
27102712
++current;
27112713
break;
27122714
}
2713-
2714-
--current;
27152715
}
27162716

27172717
return current;
@@ -2779,19 +2779,16 @@ SourceLoc Lexer::getLocForEndOfLine(SourceManager &SM, SourceLoc Loc) {
27792779
if (BufferID < 0)
27802780
return SourceLoc();
27812781

2782-
// Use fake language options; language options only affect validity
2783-
// and the exact token produced.
2784-
LangOptions FakeLangOpts;
2782+
CharSourceRange entireRange = SM.getRangeForBuffer(BufferID);
2783+
StringRef Buffer = SM.extractText(entireRange);
27852784

2786-
// Here we return comments as tokens because either the caller skipped
2787-
// comments and normally we won't be at the beginning of a comment token
2788-
// (making this option irrelevant), or the caller lexed comments and
2789-
// we need to lex just the comment token.
2790-
Lexer L(FakeLangOpts, SM, BufferID, nullptr, LexerMode::Swift,
2791-
HashbangMode::Allowed, CommentRetentionMode::ReturnAsTokens);
2792-
L.restoreState(State(Loc));
2793-
L.skipToEndOfLine(/*EatNewline=*/true);
2794-
return getSourceLoc(L.CurPtr);
2785+
// Windows line endings are \r\n. Since we want the start of the next
2786+
// line, just look for \n so the \r is skipped through.
2787+
size_t Offset = SM.getLocOffsetInBuffer(Loc, BufferID);
2788+
Offset = Buffer.find('\n', Offset);
2789+
if (Offset == StringRef::npos)
2790+
return SourceLoc();
2791+
return getSourceLoc(Buffer.data() + Offset + 1);
27952792
}
27962793

27972794
StringRef Lexer::getIndentationForLine(SourceManager &SM, SourceLoc Loc,
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
let s1 = """
2+
\(
3+
45
4+
)
5+
"""
6+
7+
let s2 = """
8+
\(
9+
45
10+
)
11+
"""
12+
13+
let s3 = """
14+
\(
15+
45
16+
)
17+
"""
18+
19+
let s4 = """
20+
foo \( 45 /*
21+
comment content
22+
*/) bar
23+
"""
24+
25+
// RUN: %sourcekitd-test -req=format -line=2 %s >%t.response
26+
// RUN: %sourcekitd-test -req=format -line=3 %s >>%t.response
27+
// RUN: %sourcekitd-test -req=format -line=4 %s >>%t.response
28+
29+
// RUN: %sourcekitd-test -req=format -line=8 %s >>%t.response
30+
// RUN: %sourcekitd-test -req=format -line=9 %s >>%t.response
31+
// RUN: %sourcekitd-test -req=format -line=10 %s >>%t.response
32+
33+
// RUN: %sourcekitd-test -req=format -line=14 %s >>%t.response
34+
// RUN: %sourcekitd-test -req=format -line=15 %s >>%t.response
35+
// RUN: %sourcekitd-test -req=format -line=16 %s >>%t.response
36+
37+
// RUN: %sourcekitd-test -req=format -line=20 %s >>%t.response
38+
// RUN: %sourcekitd-test -req=format -line=21 %s >>%t.response
39+
// RUN: %sourcekitd-test -req=format -line=22 %s >>%t.response
40+
41+
// RUN: %FileCheck --strict-whitespace %s <%t.response
42+
43+
// CHECK: key.sourcetext: " \\("
44+
// CHECK: key.sourcetext: " 45"
45+
// CHECK: key.sourcetext: " )"
46+
47+
// CHECK: key.sourcetext: " \\("
48+
// CHECK: key.sourcetext: " 45"
49+
// CHECK: key.sourcetext: " )"
50+
51+
// CHECK: key.sourcetext: " \\("
52+
// CHECK: key.sourcetext: " 45"
53+
// CHECK: key.sourcetext: ")"
54+
55+
// CHECK: key.sourcetext: " foo \\( 45 /*"
56+
// CHECK: key.sourcetext: " comment content"
57+
// CHECK: key.sourcetext: " */) bar"

0 commit comments

Comments
 (0)