Skip to content

Commit 5886def

Browse files
committed
[Parser] Escaped character in string literal cleanup
Make both EscapedCharacterLex and CharacterLex return 'Unicode.Scalar' for escaped character result. It didn't have to be 'UTF32' or 'Character'. Avoid unnecessary conversions.
1 parent d36f0c1 commit 5886def

File tree

2 files changed

+32
-31
lines changed

2 files changed

+32
-31
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1545,8 +1545,8 @@ extension Lexer.Cursor {
15451545
case success(Unicode.Scalar)
15461546

15471547
/// An escaped character, e.g. `\n` or `\u{1234}`. It has been validated that
1548-
/// this is a valid character
1549-
case validatedEscapeSequence(Character)
1548+
/// this is a valid unicode scalar.
1549+
case validatedEscapeSequence(Unicode.Scalar)
15501550

15511551
/// The end of a string literal has been reached.
15521552
case endOfString
@@ -1608,13 +1608,8 @@ extension Lexer.Cursor {
16081608
return .success(Unicode.Scalar("\\"))
16091609
}
16101610
switch self.lexEscapedCharacter(isMultilineString: stringLiteralKind == .multiLine) {
1611-
case .success(let escapedCharacterCode):
1612-
// Check to see if the encoding is valid.
1613-
if let validatedScalar = Unicode.Scalar(escapedCharacterCode) {
1614-
return .validatedEscapeSequence(Character(validatedScalar))
1615-
} else {
1616-
return .error(.invalidEscapeSequenceInStringLiteral)
1617-
}
1611+
case .success(let codePoint):
1612+
return .validatedEscapeSequence(codePoint)
16181613
case .error(let kind):
16191614
return .error(kind)
16201615
}
@@ -1635,7 +1630,7 @@ extension Lexer.Cursor {
16351630
enum EscapedCharacterLex {
16361631
// Successfully lexed an escape sequence that represents the Unicode character
16371632
// at the given codepoint
1638-
case success(UInt32)
1633+
case success(Unicode.Scalar)
16391634
case error(TokenDiagnostic.Kind)
16401635
}
16411636

@@ -1649,13 +1644,13 @@ extension Lexer.Cursor {
16491644
// Escape processing. We already ate the "\".
16501645
switch self.peek() {
16511646
// Simple single-character escapes.
1652-
case "0": _ = self.advance(); return .success(UInt32(UInt8(ascii: "\0")))
1653-
case "n": _ = self.advance(); return .success(UInt32(UInt8(ascii: "\n")))
1654-
case "r": _ = self.advance(); return .success(UInt32(UInt8(ascii: "\r")))
1655-
case "t": _ = self.advance(); return .success(UInt32(UInt8(ascii: "\t")))
1656-
case #"""#: _ = self.advance(); return .success(UInt32(UInt8(ascii: #"""#)))
1657-
case "'": _ = self.advance(); return .success(UInt32(UInt8(ascii: "'")))
1658-
case "\\": _ = self.advance(); return .success(UInt32(UInt8(ascii: "\\")))
1647+
case "0": _ = self.advance(); return .success("\0")
1648+
case "n": _ = self.advance(); return .success("\n")
1649+
case "r": _ = self.advance(); return .success("\r")
1650+
case "t": _ = self.advance(); return .success("\t")
1651+
case #"""#: _ = self.advance(); return .success(#"""#)
1652+
case "'": _ = self.advance(); return .success("'")
1653+
case "\\": _ = self.advance(); return .success("\\")
16591654

16601655
case "u": // e.g. \u{1234}
16611656
_ = self.advance()
@@ -1667,7 +1662,7 @@ extension Lexer.Cursor {
16671662
return self.lexUnicodeEscape()
16681663
case "\n", "\r":
16691664
if isMultilineString && self.maybeConsumeNewlineEscape() {
1670-
return .success(UInt32(UInt8(ascii: "\n")))
1665+
return .success("\n")
16711666
}
16721667
return .error(.invalidEscapeSequenceInStringLiteral)
16731668
case nil:
@@ -1692,24 +1687,30 @@ extension Lexer.Cursor {
16921687
precondition(quoteConsumed)
16931688

16941689
let digitStart = self
1695-
var numDigits = 0
1696-
while self.advance(if: { $0.isHexDigit }) {
1697-
numDigits += 1
1698-
}
1690+
self.advance(while: { $0.isHexDigit })
1691+
1692+
let digitText = SyntaxText(
1693+
baseAddress: digitStart.pointer,
1694+
count: digitStart.distance(to: self)
1695+
)
16991696

17001697
guard self.advance(matching: "}") else {
17011698
return .error(.expectedClosingBraceInUnicodeEscape)
17021699
}
17031700

1704-
if numDigits == 0 || numDigits > 8 {
1701+
guard 1 <= digitText.count && digitText.count <= 8 else {
17051702
return .error(.invalidNumberOfHexDigitsInUnicodeEscape)
17061703
}
17071704

1708-
if let codePoint = UInt32(String(decoding: digitStart.input[0..<numDigits], as: UTF8.self), radix: 16) {
1709-
return .success(codePoint)
1710-
} else {
1705+
guard
1706+
// FIXME: Implement 'UInt32(_: SyntaxText, radix:)'.
1707+
let codePoint = UInt32(String(syntaxText: digitText), radix: 16),
1708+
let scalar = Unicode.Scalar.init(codePoint)
1709+
else {
17111710
return .error(.invalidEscapeSequenceInStringLiteral)
17121711
}
1712+
1713+
return .success(scalar)
17131714
}
17141715

17151716
private mutating func maybeConsumeNewlineEscape() -> Bool {

Sources/SwiftParser/StringLiteralRepresentedLiteralValue.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,9 @@ extension StringSegmentSyntax {
7171
) {
7272
precondition(!hasError, "appendUnescapedLiteralValue relies on properly parsed literals")
7373

74-
var text = content.text
75-
text.withUTF8 { buffer in
74+
let rawText = content.rawText
75+
76+
rawText.withBuffer { buffer in
7677
var cursor = Lexer.Cursor(input: buffer, previous: 0)
7778

7879
// Put the cursor in the string literal lexing state. This is just
@@ -88,10 +89,9 @@ extension StringSegmentSyntax {
8889
)
8990

9091
switch lex {
91-
case .success(let scalar):
92+
case .success(let scalar),
93+
.validatedEscapeSequence(let scalar):
9294
output.append(Character(scalar))
93-
case .validatedEscapeSequence(let character):
94-
output.append(character)
9595
case .endOfString, .error:
9696
// We get an error at the end of the string because
9797
// `lexCharacterInStringLiteral` expects the closing quote.

0 commit comments

Comments
 (0)