Store lexer error offset relative to leading trivia start

ahoppen · ahoppen · commit 2a7722ba3f11 · 2023-01-26T09:08:16.000+01:00
diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift
@@ -255,13 +255,15 @@ extension Lexer {
   struct Result {
     let tokenKind: RawTokenKind
     let flags: Lexer.Lexeme.Flags
-    let error: LexerError?
+    /// The error kind and the cursor pointing to the character at which the
+    /// error occurred
+    let error: (kind: LexerError.Kind, position: Lexer.Cursor)?
     let stateTransition: StateTransition?
 
     init(
       _ tokenKind: RawTokenKind,
       flags: Lexer.Lexeme.Flags = [],
-      error: LexerError? = nil,
+      error: (kind: LexerError.Kind, position: Cursor)? = nil,
       stateTransition: StateTransition? = nil
     ) {
       self.tokenKind = tokenKind
@@ -335,10 +337,14 @@ extension Lexer.Cursor {
       flags.insert(.isAtStartOfLine)
     }
 
+    let error = result.error.map { error in
+      return LexerError(error.kind, byteOffset: cursor.distance(to: error.position))
+    }
+
     return .init(
       tokenKind: result.tokenKind,
       flags: flags,
-      error: result.error,
+      error: error,
       start: leadingTriviaStart.pointer,
       leadingTriviaLength: leadingTriviaStart.distance(to: textStart),
       textLength: textStart.distance(to: trailingTriviaStart),
@@ -1194,11 +1200,11 @@ extension Lexer.Cursor {
       let oConsumed = self.advance(matching: "o")  // Consome 'o'
       assert(zeroConsumed && oConsumed)
       if let peeked = self.peek(), peeked < UInt8(ascii: "0") || peeked > UInt8(ascii: "7") {
-        let errorOffset = tokenStart.distance(to: self)
+        let errorPos = self
         self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
         return Lexer.Result(
           .integerLiteral,
-          error: LexerError(.invalidOctalDigitInIntegerLiteral, byteOffset: errorOffset)
+          error: (.invalidOctalDigitInIntegerLiteral, errorPos)
         )
       }
 
@@ -1208,11 +1214,11 @@ extension Lexer.Cursor {
 
       let tmp = self
       if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
-        let errorOffset = tokenStart.distance(to: tmp)
+        let errorPos = tmp
         self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
         return Lexer.Result(
           .integerLiteral,
-          error: LexerError(.invalidOctalDigitInIntegerLiteral, byteOffset: errorOffset)
+          error: (.invalidOctalDigitInIntegerLiteral, errorPos)
         )
       }
 
@@ -1225,11 +1231,11 @@ extension Lexer.Cursor {
       let bConsumed = self.advance(matching: "b")  // Consume 'b'
       assert(zeroConsumed && bConsumed)
       if self.is(notAt: "0", "1") {
-        let errorOffset = tokenStart.distance(to: self)
+        let errorPos = self
         self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
         return Lexer.Result(
           .integerLiteral,
-          error: LexerError(.invalidBinaryDigitInIntegerLiteral, byteOffset: errorOffset)
+          error: (.invalidBinaryDigitInIntegerLiteral, errorPos)
         )
       }
 
@@ -1239,11 +1245,11 @@ extension Lexer.Cursor {
 
       let tmp = self
       if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
-        let errorOffset = tokenStart.distance(to: tmp)
+        let errorPos = tmp
         self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
         return Lexer.Result(
           .integerLiteral,
-          error: LexerError(.invalidBinaryDigitInIntegerLiteral, byteOffset: errorOffset)
+          error: (.invalidBinaryDigitInIntegerLiteral, errorPos)
         )
       }
 
@@ -1268,11 +1274,11 @@ extension Lexer.Cursor {
       // something else, then this is the end of the token.
       let tmp = self
       if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
-        let errorOffset = tokenStart.distance(to: tmp)
+        let errorPos = tmp
         self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
         return Lexer.Result(
           .integerLiteral,
-          error: LexerError(.invalidDecimalDigitInIntegerLiteral, byteOffset: errorOffset)
+          error: (.invalidDecimalDigitInIntegerLiteral, errorPos)
         )
       }
 
@@ -1305,20 +1311,23 @@ extension Lexer.Cursor {
           errorKind = .expectedDigitInFloatLiteral
         }
 
-        let errorOffset = tokenStart.distance(to: tmp)
+        let errorPos = tmp
         self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
-        return Lexer.Result(.floatingLiteral, error: LexerError(errorKind, byteOffset: errorOffset))
+        return Lexer.Result(
+          .floatingLiteral,
+          error: (errorKind, errorPos)
+        )
       }
 
       self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") })
 
       let tmp = self
       if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
-        let errorOffset = tokenStart.distance(to: tmp)
+        let errorPos = tmp
         self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
         return Lexer.Result(
           .floatingLiteral,
-          error: LexerError(.invalidFloatingPointExponentDigit, byteOffset: errorOffset)
+          error: (.invalidFloatingPointExponentDigit, errorPos)
         )
       }
     }
@@ -1339,11 +1348,11 @@ extension Lexer.Cursor {
       return Lexer.Result(.integerLiteral)
     }
     guard let peeked = self.peek(), Unicode.Scalar(peeked).isHexDigit else {
-      let errorOffset = tokStart.distance(to: self)
+      let errorPos = self
       self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
       return Lexer.Result(
         .integerLiteral,
-        error: LexerError(.invalidHexDigitInIntegerLiteral, byteOffset: errorOffset)
+        error: (.invalidHexDigitInIntegerLiteral, errorPos)
       )
     }
 
@@ -1352,11 +1361,11 @@ extension Lexer.Cursor {
     if self.isAtEndOfFile || self.is(notAt: ".", "p", "P") {
       let tmp = self
       if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
-        let errorOffset = tokStart.distance(to: tmp)
+        let errorPos = tmp
         self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
         return Lexer.Result(
           .integerLiteral,
-          error: LexerError(.invalidHexDigitInIntegerLiteral, byteOffset: errorOffset)
+          error: (.invalidHexDigitInIntegerLiteral, errorPos)
         )
       } else {
         return Lexer.Result(.integerLiteral)
@@ -1385,7 +1394,7 @@ extension Lexer.Cursor {
         }
         return Lexer.Result(
           .integerLiteral,
-          error: LexerError(.expectedBinaryExponentInHexFloatLiteral, byteOffset: tokStart.distance(to: self))
+          error: (.expectedBinaryExponentInHexFloatLiteral, self)
         )
       }
     } else {
@@ -1424,20 +1433,23 @@ extension Lexer.Cursor {
       } else {
         errorKind = .expectedDigitInFloatLiteral
       }
-      let errorOffset = tokStart.distance(to: tmp)
+      let errorPos = tmp
       self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
-      return Lexer.Result(.floatingLiteral, error: LexerError(errorKind, byteOffset: errorOffset))
+      return Lexer.Result(
+        .floatingLiteral,
+        error: (errorKind, errorPos)
+      )
     }
 
     self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") })
 
     let tmp = self
     if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
-      let errorOffset = tokStart.distance(to: tmp)
+      let errorPos = tmp
       self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
       return Lexer.Result(
         .floatingLiteral,
-        error: LexerError(.invalidFloatingPointExponentDigit, byteOffset: errorOffset)
+        error: (.invalidFloatingPointExponentDigit, errorPos)
       )
     }
     return Lexer.Result(.floatingLiteral)
diff --git a/Sources/SwiftParser/Parser.swift b/Sources/SwiftParser/Parser.swift
@@ -512,7 +512,7 @@ extension Parser {
 
     let endIndex = current.textRange.lowerBound.advanced(by: prefix.count)
     var lexerError = current.error
-    if let error = lexerError, error.byteOffset > prefix.count {
+    if let error = lexerError, error.byteOffset > prefix.count + current.leadingTriviaByteLength {
       // The lexer error isn't in the prefix. Drop it.
       lexerError = nil
     }
diff --git a/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift b/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift
@@ -98,9 +98,9 @@ public extension SwiftSyntax.LexerError {
   /// `tokenText` is the entire text of the token in which the `LexerError`
   /// occurred, including trivia.
   @_spi(RawSyntax)
-  func diagnostic(tokenText: SyntaxText) -> DiagnosticMessage {
+  func diagnostic(wholeText: SyntaxText) -> DiagnosticMessage {
     var scalarAtErrorOffset: UnicodeScalar {
-      Unicode.Scalar(tokenText[Int(self.byteOffset)])
+      Unicode.Scalar(wholeText[Int(self.byteOffset)])
     }
 
     switch self.kind {
@@ -130,6 +130,8 @@ public extension SwiftSyntax.LexerError {
   }
 
   func diagnostic(in token: TokenSyntax) -> DiagnosticMessage {
-    return self.diagnostic(tokenText: token.tokenView.rawText)
+    return token.tokenView.wholeText { wholeText in
+      return self.diagnostic(wholeText: token.tokenView.rawText)
+    }
   }
 }
diff --git a/Sources/SwiftParserDiagnostics/ParseDiagnosticsGenerator.swift b/Sources/SwiftParserDiagnostics/ParseDiagnosticsGenerator.swift
@@ -338,7 +338,7 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
       handleMissingToken(token)
     } else {
       if let lexerError = token.lexerError {
-        self.addDiagnostic(token, position: token.positionAfterSkippingLeadingTrivia.advanced(by: Int(lexerError.byteOffset)), lexerError.diagnostic(in: token))
+        self.addDiagnostic(token, position: token.position.advanced(by: Int(lexerError.byteOffset)), lexerError.diagnostic(in: token))
       }
     }
 
diff --git a/Sources/SwiftSyntax/LexerError.swift b/Sources/SwiftSyntax/LexerError.swift
@@ -33,8 +33,8 @@ public struct LexerError: Hashable {
 
   public let kind: Kind
 
-  /// The offset at which the error is, in bytes relative to the token's content
-  /// start (i.e. relative to the tokens `positionAfterSkippingLeadingTrivia`)
+  /// The offset at which the error is, in bytes relative to the token's leading
+  /// trivia start (i.e. relative to the token's `position`)
   public let byteOffset: UInt16
 
   public init(_ kind: Kind, byteOffset: UInt16) {
@@ -43,6 +43,7 @@ public struct LexerError: Hashable {
   }
 
   public init(_ kind: Kind, byteOffset: Int) {
+    assert(byteOffset >= 0)
     // `type(of: self.byteOffset).max` gets optimized to a constant
     if byteOffset > type(of: self.byteOffset).max {
       self.kind = .lexerErrorOffsetOverflow
diff --git a/Sources/SwiftSyntax/Raw/RawSyntaxTokenView.swift b/Sources/SwiftSyntax/Raw/RawSyntaxTokenView.swift
@@ -65,6 +65,29 @@ public struct RawSyntaxTokenView {
     }
   }
 
+  @_spi(RawSyntax)
+  public func wholeText<T>(_ body: (SyntaxText) -> T) -> T {
+    switch raw.rawData.payload {
+    case .parsedToken(let dat):
+      return body(dat.wholeText)
+    case .materializedToken(let dat):
+      var wholeText: [UInt8] = []
+      wholeText.reserveCapacity(leadingTriviaByteLength + textByteLength + trailingTriviaByteLength)
+      for leadingTriviaPiece in dat.leadingTrivia {
+        leadingTriviaPiece.withSyntaxText { wholeText.append(contentsOf: $0) }
+      }
+      wholeText.append(contentsOf: self.rawText)
+      for trailingTriviaPiece in dat.trailingTrivia {
+        trailingTriviaPiece.withSyntaxText { wholeText.append(contentsOf: $0) }
+      }
+      return wholeText.withUnsafeBufferPointer { buffer in
+        return body(SyntaxText(buffer: buffer))
+      }
+    case .layout(_):
+      preconditionFailure("'wholeText' is not available for non-token node")
+    }
+  }
+
   /// The UTF-8 byte length of the leading trivia.
   @_spi(RawSyntax)
   public var leadingTriviaByteLength: Int {
diff --git a/Tests/SwiftParserTest/Assertions.swift b/Tests/SwiftParserTest/Assertions.swift
@@ -143,15 +143,15 @@ private func AssertTokens(
       )
     case (let actualError?, let expectedError?):
       AssertStringsEqualWithDiff(
-        actualError.diagnostic(tokenText: actualLexeme.tokenText).message,
+        actualError.diagnostic(wholeText: actualLexeme.wholeText).message,
         expectedError,
         file: expectedLexeme.file,
         line: expectedLexeme.line
       )
       if let location = markerLocations[expectedLexeme.errorLocationMarker] {
         XCTAssertEqual(
           Int(actualError.byteOffset),
-          location - lexemeStartOffset - actualLexeme.leadingTriviaByteLength,
+          location - lexemeStartOffset,
           "Expected location did not match",
           file: expectedLexeme.file,
           line: expectedLexeme.line

Original file line number	Diff line number	Diff line change
`@@ -512,7 +512,7 @@ extension Parser {`
`512`	`512`
`513`	`513`	`let endIndex = current.textRange.lowerBound.advanced(by: prefix.count)`
`514`	`514`	`var lexerError = current.error`
`515`		`- if let error = lexerError, error.byteOffset > prefix.count {`
	`515`	`+ if let error = lexerError, error.byteOffset > prefix.count + current.leadingTriviaByteLength {`
`516`	`516`	`// The lexer error isn't in the prefix. Drop it.`
`517`	`517`	`lexerError = nil`
`518`	`518`	`}`
Original file line number	Diff line number	Diff line change
`@@ -98,9 +98,9 @@ public extension SwiftSyntax.LexerError {`
`98`	`98`	/// `tokenText` is the entire text of the token in which the `LexerError`
`99`	`99`	`/// occurred, including trivia.`
`100`	`100`	`@_spi(RawSyntax)`
`101`		`- func diagnostic(tokenText: SyntaxText) -> DiagnosticMessage {`
	`101`	`+ func diagnostic(wholeText: SyntaxText) -> DiagnosticMessage {`
`102`	`102`	`var scalarAtErrorOffset: UnicodeScalar {`
`103`		`- Unicode.Scalar(tokenText[Int(self.byteOffset)])`
	`103`	`+ Unicode.Scalar(wholeText[Int(self.byteOffset)])`
`104`	`104`	`}`
`105`	`105`
`106`	`106`	`switch self.kind {`
`@@ -130,6 +130,8 @@ public extension SwiftSyntax.LexerError {`
`130`	`130`	`}`
`131`	`131`
`132`	`132`	`func diagnostic(in token: TokenSyntax) -> DiagnosticMessage {`
`133`		`- return self.diagnostic(tokenText: token.tokenView.rawText)`
	`133`	`+ return token.tokenView.wholeText { wholeText in`
	`134`	`+ return self.diagnostic(wholeText: token.tokenView.rawText)`
	`135`	`+ }`
`134`	`136`	`}`
`135`	`137`	`}`
Original file line number	Diff line number	Diff line change
`@@ -338,7 +338,7 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {`
`338`	`338`	`handleMissingToken(token)`
`339`	`339`	`} else {`
`340`	`340`	`if let lexerError = token.lexerError {`
`341`		`- self.addDiagnostic(token, position: token.positionAfterSkippingLeadingTrivia.advanced(by: Int(lexerError.byteOffset)), lexerError.diagnostic(in: token))`
	`341`	`+ self.addDiagnostic(token, position: token.position.advanced(by: Int(lexerError.byteOffset)), lexerError.diagnostic(in: token))`
`342`	`342`	`}`
`343`	`343`	`}`
`344`	`344`