Skip to content

Commit b14d2d2

Browse files
author
Igor Palaguta
committed
Fix performance
1 parent 1ab3c90 commit b14d2d2

File tree

2 files changed

+71
-45
lines changed

2 files changed

+71
-45
lines changed

Sources/GraphQL/Language/Lexer.swift

Lines changed: 62 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ func advanceLexer(lexer: Lexer) throws -> Token {
3333
lexer.lastToken = lexer.token
3434
var token = lexer.lastToken
3535

36-
if token.kind != .eof {
36+
if token.kind != .eof {
3737
repeat {
3838
token.next = try readToken(lexer: lexer, prev: token)
3939
token = token.next!
4040
} while token.kind == .comment
4141

4242
lexer.token = token
43-
}
43+
}
4444

4545
return token
4646
}
@@ -105,6 +105,17 @@ func getTokenDesc(_ token: Token) -> String {
105105
}
106106

107107
extension String {
108+
func offset(of index: Index) -> Int {
109+
return utf8.distance(from: startIndex, to: index)
110+
}
111+
112+
func charCode(at index: Index) -> UInt8? {
113+
guard index < utf8.endIndex else {
114+
return nil
115+
}
116+
return utf8[index]
117+
}
118+
108119
func charCode(at position: Int) -> UInt8? {
109120
guard position < utf8.count else {
110121
return nil
@@ -121,7 +132,7 @@ extension String {
121132
}
122133

123134
func character(_ code: UInt8) -> Character {
124-
return Character(UnicodeScalar(code))
135+
return Character(UnicodeScalar(code))
125136
}
126137

127138
/**
@@ -220,16 +231,16 @@ func readToken(lexer: Lexer, prev: Token) throws -> Token {
220231
)
221232
// .
222233
case 46:
223-
if body.charCode(at: position + 1) == 46 && body.charCode(at: position + 2) == 46 {
224-
return Token(
225-
kind: .spread,
226-
start: position,
227-
end: position + 3,
228-
line: line,
229-
column: col,
230-
prev: prev
231-
)
232-
}
234+
if body.charCode(at: position + 1) == 46 && body.charCode(at: position + 2) == 46 {
235+
return Token(
236+
kind: .spread,
237+
start: position,
238+
end: position + 3,
239+
line: line,
240+
column: col,
241+
prev: prev
242+
)
243+
}
233244
// :
234245
case 58:
235246
return Token(
@@ -535,14 +546,13 @@ func readDigits(source: Source, start: Int, firstCode: UInt8) throws -> Int {
535546
*/
536547
func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) throws -> Token {
537548
let body = source.body
538-
let bodyLength = body.utf8.count
539-
var position = start + 1
540-
var chunkStart = position
549+
var positionIndex = body.utf8.index(body.utf8.startIndex, offsetBy: start + 1)
550+
var chunkStartIndex = positionIndex
541551
var currentCode: UInt8? = 0
542552
var value = ""
543553

544-
while position < bodyLength {
545-
currentCode = body.charCode(at: position)
554+
while positionIndex < body.utf8.endIndex {
555+
currentCode = body.charCode(at: positionIndex)
546556

547557
// not LineTerminator not Quote (")
548558
guard let code = currentCode, code != 0x000A && code != 0x000D && code != 34 else {
@@ -553,16 +563,17 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
553563
if code < 0x0020 && code != 0x0009 {
554564
throw syntaxError(
555565
source: source,
556-
position: position,
566+
position: body.offset(of: positionIndex),
557567
description: "Invalid character within String: \(character(code))."
558568
)
559569
}
560570

561-
position += 1
571+
let startIterationIndex = positionIndex
572+
positionIndex = body.utf8.index(after: positionIndex)
562573

563574
if code == 92 { // \
564-
value += body.slice(start: chunkStart, end: position - 1)
565-
currentCode = body.charCode(at: position)
575+
value += String(body.utf8[chunkStartIndex..<startIterationIndex])!
576+
currentCode = body.charCode(at: positionIndex)
566577

567578
if let code = currentCode {
568579
switch code {
@@ -575,53 +586,59 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
575586
case 114: value += "\r"
576587
case 116: value += "\t"
577588
case 117: // u
589+
let aIndex = body.utf8.index(after: positionIndex)
590+
let bIndex = body.utf8.index(after: aIndex)
591+
let cIndex = body.utf8.index(after: bIndex)
592+
let dIndex = body.utf8.index(after: cIndex)
593+
578594
let charCode = uniCharCode(
579-
a: body.charCode(at: position + 1)!,
580-
b: body.charCode(at: position + 2)!,
581-
c: body.charCode(at: position + 3)!,
582-
d: body.charCode(at: position + 4)!
595+
a: body.utf8[aIndex],
596+
b: body.utf8[bIndex],
597+
c: body.utf8[cIndex],
598+
d: body.utf8[dIndex]
583599
)
584600

585601
if charCode < 0 {
586602
throw syntaxError(
587603
source: source,
588-
position: position,
604+
position: body.offset(of: positionIndex),
589605
description:
590606
"Invalid character escape sequence: " +
591-
"\\u\(body.slice(start: position + 1, end: position + 5))."
607+
"\\u\(body.utf8[aIndex...dIndex])."
592608
)
593609
}
594610

595611
value += String(Character(UnicodeScalar(UInt32(charCode))!))
596-
position += 4
612+
613+
positionIndex = dIndex
597614
default:
598615
throw syntaxError(
599616
source: source,
600-
position: position,
617+
position: body.offset(of: positionIndex),
601618
description: "Invalid character escape sequence: \\\(character(code))."
602619
)
603620
}
604621
}
605622

606-
position += 1
607-
chunkStart = position
623+
positionIndex = body.utf8.index(after: positionIndex)
624+
chunkStartIndex = positionIndex
608625
}
609626
}
610627

611628
if currentCode != 34 { // quote (")
612629
throw syntaxError(
613630
source: source,
614-
position: position,
631+
position: body.offset(of: positionIndex),
615632
description: "Unterminated string."
616633
)
617634
}
618635

619-
value += body.slice(start: chunkStart, end: position)
636+
value += String(body.utf8[chunkStartIndex..<positionIndex])!
620637

621638
return Token(
622639
kind: .string,
623640
start: start,
624-
end: position + 1,
641+
end: body.offset(of: positionIndex) + 1,
625642
line: line,
626643
column: col,
627644
value: value,
@@ -640,7 +657,7 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
640657
* which means the result of ORing the char2hex() will also be negative.
641658
*/
642659
func uniCharCode(a: UInt8, b: UInt8, c: UInt8, d: UInt8) -> Int {
643-
return char2hex(a) << 12 | char2hex(b) << 8 | char2hex(c) << 4 | char2hex(d)
660+
return char2hex(a) << 12 | char2hex(b) << 8 | char2hex(c) << 4 | char2hex(d)
644661
}
645662

646663
/**
@@ -654,9 +671,9 @@ func uniCharCode(a: UInt8, b: UInt8, c: UInt8, d: UInt8) -> Int {
654671
func char2hex(_ a: UInt8) -> Int {
655672
let a = Int(a)
656673
return a >= 48 && a <= 57 ? a - 48 : // 0-9
657-
a >= 65 && a <= 70 ? a - 55 : // A-F
658-
a >= 97 && a <= 102 ? a - 87 : // a-f
659-
-1
674+
a >= 65 && a <= 70 ? a - 55 : // A-F
675+
a >= 97 && a <= 102 ? a - 87 : // a-f
676+
-1
660677
}
661678

662679
/**
@@ -670,12 +687,12 @@ func readName(source: Source, position: Int, line: Int, col: Int, prev: Token) -
670687
var end = position + 1
671688

672689
while end != bodyLength,
673-
let code = body.charCode(at: end),
674-
(code == 95 || // _
675-
code >= 48 && code <= 57 || // 0-9
676-
code >= 65 && code <= 90 || // A-Z
677-
code >= 97 && code <= 122) { // a-z
678-
end += 1
690+
let code = body.charCode(at: end),
691+
(code == 95 || // _
692+
code >= 48 && code <= 57 || // 0-9
693+
code >= 65 && code <= 90 || // A-Z
694+
code >= 97 && code <= 122) { // a-z
695+
end += 1
679696
}
680697

681698
return Token(

Tests/GraphQLTests/LanguageTests/LexerTests.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,15 @@ class LexerTests : XCTestCase {
199199
XCTAssertEqual(token, expected)
200200
}
201201

202+
func testLongStrings() throws {
203+
measure {
204+
let token = try! lexOne("\"\(String(repeating: "123456", count: 10_000))\"")
205+
206+
XCTAssertEqual(token.start, 0)
207+
XCTAssertEqual(token.end, 60_002)
208+
}
209+
}
210+
202211
func testStringErrors() throws {
203212
XCTAssertThrowsError(try lexOne("\""))
204213
// "Syntax Error GraphQL (1:2) Unterminated string"

0 commit comments

Comments
 (0)