Skip to content

Commit 2a7722b

Browse files
committed
Store lexer error offset relative to leading trivia start
1 parent 1159abd commit 2a7722b

File tree

7 files changed

+73
-35
lines changed

7 files changed

+73
-35
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -255,13 +255,15 @@ extension Lexer {
255255
struct Result {
256256
let tokenKind: RawTokenKind
257257
let flags: Lexer.Lexeme.Flags
258-
let error: LexerError?
258+
/// The error kind and the cursor pointing to the character at which the
259+
/// error occurred
260+
let error: (kind: LexerError.Kind, position: Lexer.Cursor)?
259261
let stateTransition: StateTransition?
260262

261263
init(
262264
_ tokenKind: RawTokenKind,
263265
flags: Lexer.Lexeme.Flags = [],
264-
error: LexerError? = nil,
266+
error: (kind: LexerError.Kind, position: Cursor)? = nil,
265267
stateTransition: StateTransition? = nil
266268
) {
267269
self.tokenKind = tokenKind
@@ -335,10 +337,14 @@ extension Lexer.Cursor {
335337
flags.insert(.isAtStartOfLine)
336338
}
337339

340+
let error = result.error.map { error in
341+
return LexerError(error.kind, byteOffset: cursor.distance(to: error.position))
342+
}
343+
338344
return .init(
339345
tokenKind: result.tokenKind,
340346
flags: flags,
341-
error: result.error,
347+
error: error,
342348
start: leadingTriviaStart.pointer,
343349
leadingTriviaLength: leadingTriviaStart.distance(to: textStart),
344350
textLength: textStart.distance(to: trailingTriviaStart),
@@ -1194,11 +1200,11 @@ extension Lexer.Cursor {
11941200
let oConsumed = self.advance(matching: "o") // Consome 'o'
11951201
assert(zeroConsumed && oConsumed)
11961202
if let peeked = self.peek(), peeked < UInt8(ascii: "0") || peeked > UInt8(ascii: "7") {
1197-
let errorOffset = tokenStart.distance(to: self)
1203+
let errorPos = self
11981204
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
11991205
return Lexer.Result(
12001206
.integerLiteral,
1201-
error: LexerError(.invalidOctalDigitInIntegerLiteral, byteOffset: errorOffset)
1207+
error: (.invalidOctalDigitInIntegerLiteral, errorPos)
12021208
)
12031209
}
12041210

@@ -1208,11 +1214,11 @@ extension Lexer.Cursor {
12081214

12091215
let tmp = self
12101216
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
1211-
let errorOffset = tokenStart.distance(to: tmp)
1217+
let errorPos = tmp
12121218
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
12131219
return Lexer.Result(
12141220
.integerLiteral,
1215-
error: LexerError(.invalidOctalDigitInIntegerLiteral, byteOffset: errorOffset)
1221+
error: (.invalidOctalDigitInIntegerLiteral, errorPos)
12161222
)
12171223
}
12181224

@@ -1225,11 +1231,11 @@ extension Lexer.Cursor {
12251231
let bConsumed = self.advance(matching: "b") // Consume 'b'
12261232
assert(zeroConsumed && bConsumed)
12271233
if self.is(notAt: "0", "1") {
1228-
let errorOffset = tokenStart.distance(to: self)
1234+
let errorPos = self
12291235
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
12301236
return Lexer.Result(
12311237
.integerLiteral,
1232-
error: LexerError(.invalidBinaryDigitInIntegerLiteral, byteOffset: errorOffset)
1238+
error: (.invalidBinaryDigitInIntegerLiteral, errorPos)
12331239
)
12341240
}
12351241

@@ -1239,11 +1245,11 @@ extension Lexer.Cursor {
12391245

12401246
let tmp = self
12411247
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
1242-
let errorOffset = tokenStart.distance(to: tmp)
1248+
let errorPos = tmp
12431249
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
12441250
return Lexer.Result(
12451251
.integerLiteral,
1246-
error: LexerError(.invalidBinaryDigitInIntegerLiteral, byteOffset: errorOffset)
1252+
error: (.invalidBinaryDigitInIntegerLiteral, errorPos)
12471253
)
12481254
}
12491255

@@ -1268,11 +1274,11 @@ extension Lexer.Cursor {
12681274
// something else, then this is the end of the token.
12691275
let tmp = self
12701276
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
1271-
let errorOffset = tokenStart.distance(to: tmp)
1277+
let errorPos = tmp
12721278
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
12731279
return Lexer.Result(
12741280
.integerLiteral,
1275-
error: LexerError(.invalidDecimalDigitInIntegerLiteral, byteOffset: errorOffset)
1281+
error: (.invalidDecimalDigitInIntegerLiteral, errorPos)
12761282
)
12771283
}
12781284

@@ -1305,20 +1311,23 @@ extension Lexer.Cursor {
13051311
errorKind = .expectedDigitInFloatLiteral
13061312
}
13071313

1308-
let errorOffset = tokenStart.distance(to: tmp)
1314+
let errorPos = tmp
13091315
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
1310-
return Lexer.Result(.floatingLiteral, error: LexerError(errorKind, byteOffset: errorOffset))
1316+
return Lexer.Result(
1317+
.floatingLiteral,
1318+
error: (errorKind, errorPos)
1319+
)
13111320
}
13121321

13131322
self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") })
13141323

13151324
let tmp = self
13161325
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
1317-
let errorOffset = tokenStart.distance(to: tmp)
1326+
let errorPos = tmp
13181327
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
13191328
return Lexer.Result(
13201329
.floatingLiteral,
1321-
error: LexerError(.invalidFloatingPointExponentDigit, byteOffset: errorOffset)
1330+
error: (.invalidFloatingPointExponentDigit, errorPos)
13221331
)
13231332
}
13241333
}
@@ -1339,11 +1348,11 @@ extension Lexer.Cursor {
13391348
return Lexer.Result(.integerLiteral)
13401349
}
13411350
guard let peeked = self.peek(), Unicode.Scalar(peeked).isHexDigit else {
1342-
let errorOffset = tokStart.distance(to: self)
1351+
let errorPos = self
13431352
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
13441353
return Lexer.Result(
13451354
.integerLiteral,
1346-
error: LexerError(.invalidHexDigitInIntegerLiteral, byteOffset: errorOffset)
1355+
error: (.invalidHexDigitInIntegerLiteral, errorPos)
13471356
)
13481357
}
13491358

@@ -1352,11 +1361,11 @@ extension Lexer.Cursor {
13521361
if self.isAtEndOfFile || self.is(notAt: ".", "p", "P") {
13531362
let tmp = self
13541363
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
1355-
let errorOffset = tokStart.distance(to: tmp)
1364+
let errorPos = tmp
13561365
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
13571366
return Lexer.Result(
13581367
.integerLiteral,
1359-
error: LexerError(.invalidHexDigitInIntegerLiteral, byteOffset: errorOffset)
1368+
error: (.invalidHexDigitInIntegerLiteral, errorPos)
13601369
)
13611370
} else {
13621371
return Lexer.Result(.integerLiteral)
@@ -1385,7 +1394,7 @@ extension Lexer.Cursor {
13851394
}
13861395
return Lexer.Result(
13871396
.integerLiteral,
1388-
error: LexerError(.expectedBinaryExponentInHexFloatLiteral, byteOffset: tokStart.distance(to: self))
1397+
error: (.expectedBinaryExponentInHexFloatLiteral, self)
13891398
)
13901399
}
13911400
} else {
@@ -1424,20 +1433,23 @@ extension Lexer.Cursor {
14241433
} else {
14251434
errorKind = .expectedDigitInFloatLiteral
14261435
}
1427-
let errorOffset = tokStart.distance(to: tmp)
1436+
let errorPos = tmp
14281437
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
1429-
return Lexer.Result(.floatingLiteral, error: LexerError(errorKind, byteOffset: errorOffset))
1438+
return Lexer.Result(
1439+
.floatingLiteral,
1440+
error: (errorKind, errorPos)
1441+
)
14301442
}
14311443

14321444
self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") })
14331445

14341446
let tmp = self
14351447
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
1436-
let errorOffset = tokStart.distance(to: tmp)
1448+
let errorPos = tmp
14371449
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
14381450
return Lexer.Result(
14391451
.floatingLiteral,
1440-
error: LexerError(.invalidFloatingPointExponentDigit, byteOffset: errorOffset)
1452+
error: (.invalidFloatingPointExponentDigit, errorPos)
14411453
)
14421454
}
14431455
return Lexer.Result(.floatingLiteral)

Sources/SwiftParser/Parser.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ extension Parser {
512512

513513
let endIndex = current.textRange.lowerBound.advanced(by: prefix.count)
514514
var lexerError = current.error
515-
if let error = lexerError, error.byteOffset > prefix.count {
515+
if let error = lexerError, error.byteOffset > prefix.count + current.leadingTriviaByteLength {
516516
// The lexer error isn't in the prefix. Drop it.
517517
lexerError = nil
518518
}

Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@ public extension SwiftSyntax.LexerError {
9898
/// `tokenText` is the entire text of the token in which the `LexerError`
9999
/// occurred, including trivia.
100100
@_spi(RawSyntax)
101-
func diagnostic(tokenText: SyntaxText) -> DiagnosticMessage {
101+
func diagnostic(wholeText: SyntaxText) -> DiagnosticMessage {
102102
var scalarAtErrorOffset: UnicodeScalar {
103-
Unicode.Scalar(tokenText[Int(self.byteOffset)])
103+
Unicode.Scalar(wholeText[Int(self.byteOffset)])
104104
}
105105

106106
switch self.kind {
@@ -130,6 +130,8 @@ public extension SwiftSyntax.LexerError {
130130
}
131131

132132
func diagnostic(in token: TokenSyntax) -> DiagnosticMessage {
133-
return self.diagnostic(tokenText: token.tokenView.rawText)
133+
return token.tokenView.wholeText { wholeText in
134+
return self.diagnostic(wholeText: token.tokenView.rawText)
135+
}
134136
}
135137
}

Sources/SwiftParserDiagnostics/ParseDiagnosticsGenerator.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
338338
handleMissingToken(token)
339339
} else {
340340
if let lexerError = token.lexerError {
341-
self.addDiagnostic(token, position: token.positionAfterSkippingLeadingTrivia.advanced(by: Int(lexerError.byteOffset)), lexerError.diagnostic(in: token))
341+
self.addDiagnostic(token, position: token.position.advanced(by: Int(lexerError.byteOffset)), lexerError.diagnostic(in: token))
342342
}
343343
}
344344

Sources/SwiftSyntax/LexerError.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ public struct LexerError: Hashable {
3333

3434
public let kind: Kind
3535

36-
/// The offset at which the error is, in bytes relative to the token's content
37-
/// start (i.e. relative to the tokens `positionAfterSkippingLeadingTrivia`)
36+
/// The offset at which the error is, in bytes relative to the token's leading
37+
/// trivia start (i.e. relative to the token's `position`)
3838
public let byteOffset: UInt16
3939

4040
public init(_ kind: Kind, byteOffset: UInt16) {
@@ -43,6 +43,7 @@ public struct LexerError: Hashable {
4343
}
4444

4545
public init(_ kind: Kind, byteOffset: Int) {
46+
assert(byteOffset >= 0)
4647
// `type(of: self.byteOffset).max` gets optimized to a constant
4748
if byteOffset > type(of: self.byteOffset).max {
4849
self.kind = .lexerErrorOffsetOverflow

Sources/SwiftSyntax/Raw/RawSyntaxTokenView.swift

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,29 @@ public struct RawSyntaxTokenView {
6565
}
6666
}
6767

68+
@_spi(RawSyntax)
69+
public func wholeText<T>(_ body: (SyntaxText) -> T) -> T {
70+
switch raw.rawData.payload {
71+
case .parsedToken(let dat):
72+
return body(dat.wholeText)
73+
case .materializedToken(let dat):
74+
var wholeText: [UInt8] = []
75+
wholeText.reserveCapacity(leadingTriviaByteLength + textByteLength + trailingTriviaByteLength)
76+
for leadingTriviaPiece in dat.leadingTrivia {
77+
leadingTriviaPiece.withSyntaxText { wholeText.append(contentsOf: $0) }
78+
}
79+
wholeText.append(contentsOf: self.rawText)
80+
for trailingTriviaPiece in dat.trailingTrivia {
81+
trailingTriviaPiece.withSyntaxText { wholeText.append(contentsOf: $0) }
82+
}
83+
return wholeText.withUnsafeBufferPointer { buffer in
84+
return body(SyntaxText(buffer: buffer))
85+
}
86+
case .layout(_):
87+
preconditionFailure("'wholeText' is not available for non-token node")
88+
}
89+
}
90+
6891
/// The UTF-8 byte length of the leading trivia.
6992
@_spi(RawSyntax)
7093
public var leadingTriviaByteLength: Int {

Tests/SwiftParserTest/Assertions.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,15 +143,15 @@ private func AssertTokens(
143143
)
144144
case (let actualError?, let expectedError?):
145145
AssertStringsEqualWithDiff(
146-
actualError.diagnostic(tokenText: actualLexeme.tokenText).message,
146+
actualError.diagnostic(wholeText: actualLexeme.wholeText).message,
147147
expectedError,
148148
file: expectedLexeme.file,
149149
line: expectedLexeme.line
150150
)
151151
if let location = markerLocations[expectedLexeme.errorLocationMarker] {
152152
XCTAssertEqual(
153153
Int(actualError.byteOffset),
154-
location - lexemeStartOffset - actualLexeme.leadingTriviaByteLength,
154+
location - lexemeStartOffset,
155155
"Expected location did not match",
156156
file: expectedLexeme.file,
157157
line: expectedLexeme.line

0 commit comments

Comments
 (0)