@@ -33,14 +33,14 @@ func advanceLexer(lexer: Lexer) throws -> Token {
33
33
lexer. lastToken = lexer. token
34
34
var token = lexer. lastToken
35
35
36
- if token. kind != . eof {
36
+ if token. kind != . eof {
37
37
repeat {
38
38
token. next = try readToken ( lexer: lexer, prev: token)
39
39
token = token. next!
40
40
} while token. kind == . comment
41
41
42
42
lexer. token = token
43
- }
43
+ }
44
44
45
45
return token
46
46
}
@@ -105,6 +105,17 @@ func getTokenDesc(_ token: Token) -> String {
105
105
}
106
106
107
107
extension String {
108
+ func offset( of index: Index ) -> Int {
109
+ return utf8. distance ( from: startIndex, to: index)
110
+ }
111
+
112
+ func charCode( at index: Index ) -> UInt8 ? {
113
+ guard index < utf8. endIndex else {
114
+ return nil
115
+ }
116
+ return utf8 [ index]
117
+ }
118
+
108
119
func charCode( at position: Int ) -> UInt8 ? {
109
120
guard position < utf8. count else {
110
121
return nil
@@ -121,7 +132,7 @@ extension String {
121
132
}
122
133
123
134
func character( _ code: UInt8 ) -> Character {
124
- return Character ( UnicodeScalar ( code) )
135
+ return Character ( UnicodeScalar ( code) )
125
136
}
126
137
127
138
/**
@@ -220,16 +231,16 @@ func readToken(lexer: Lexer, prev: Token) throws -> Token {
220
231
)
221
232
// .
222
233
case 46 :
223
- if body. charCode ( at: position + 1 ) == 46 && body. charCode ( at: position + 2 ) == 46 {
224
- return Token (
225
- kind: . spread,
226
- start: position,
227
- end: position + 3 ,
228
- line: line,
229
- column: col,
230
- prev: prev
231
- )
232
- }
234
+ if body. charCode ( at: position + 1 ) == 46 && body. charCode ( at: position + 2 ) == 46 {
235
+ return Token (
236
+ kind: . spread,
237
+ start: position,
238
+ end: position + 3 ,
239
+ line: line,
240
+ column: col,
241
+ prev: prev
242
+ )
243
+ }
233
244
// :
234
245
case 58 :
235
246
return Token (
@@ -535,14 +546,13 @@ func readDigits(source: Source, start: Int, firstCode: UInt8) throws -> Int {
535
546
*/
536
547
func readString( source: Source , start: Int , line: Int , col: Int , prev: Token ) throws -> Token {
537
548
let body = source. body
538
- let bodyLength = body. utf8. count
539
- var position = start + 1
540
- var chunkStart = position
549
+ var positionIndex = body. utf8. index ( body. utf8. startIndex, offsetBy: start + 1 )
550
+ var chunkStartIndex = positionIndex
541
551
var currentCode : UInt8 ? = 0
542
552
var value = " "
543
553
544
- while position < bodyLength {
545
- currentCode = body. charCode ( at: position )
554
+ while positionIndex < body . utf8 . endIndex {
555
+ currentCode = body. charCode ( at: positionIndex )
546
556
547
557
// not LineTerminator not Quote (")
548
558
guard let code = currentCode, code != 0x000A && code != 0x000D && code != 34 else {
@@ -553,16 +563,17 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
553
563
if code < 0x0020 && code != 0x0009 {
554
564
throw syntaxError (
555
565
source: source,
556
- position: position ,
566
+ position: body . offset ( of : positionIndex ) ,
557
567
description: " Invalid character within String: \( character ( code) ) . "
558
568
)
559
569
}
560
570
561
- position += 1
571
+ let startIterationIndex = positionIndex
572
+ positionIndex = body. utf8. index ( after: positionIndex)
562
573
563
574
if code == 92 { // \
564
- value += body. slice ( start : chunkStart , end : position - 1 )
565
- currentCode = body. charCode ( at: position )
575
+ value += String ( body. utf8 [ chunkStartIndex ..< startIterationIndex ] ) !
576
+ currentCode = body. charCode ( at: positionIndex )
566
577
567
578
if let code = currentCode {
568
579
switch code {
@@ -575,53 +586,59 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
575
586
case 114 : value += " \r "
576
587
case 116 : value += " \t "
577
588
case 117 : // u
589
+ let aIndex = body. utf8. index ( after: positionIndex)
590
+ let bIndex = body. utf8. index ( after: aIndex)
591
+ let cIndex = body. utf8. index ( after: bIndex)
592
+ let dIndex = body. utf8. index ( after: cIndex)
593
+
578
594
let charCode = uniCharCode (
579
- a: body. charCode ( at : position + 1 ) ! ,
580
- b: body. charCode ( at : position + 2 ) ! ,
581
- c: body. charCode ( at : position + 3 ) ! ,
582
- d: body. charCode ( at : position + 4 ) !
595
+ a: body. utf8 [ aIndex ] ,
596
+ b: body. utf8 [ bIndex ] ,
597
+ c: body. utf8 [ cIndex ] ,
598
+ d: body. utf8 [ dIndex ]
583
599
)
584
600
585
601
if charCode < 0 {
586
602
throw syntaxError (
587
603
source: source,
588
- position: position ,
604
+ position: body . offset ( of : positionIndex ) ,
589
605
description:
590
606
" Invalid character escape sequence: " +
591
- " \\ u \( body. slice ( start : position + 1 , end : position + 5 ) ) . "
607
+ " \\ u \( body. utf8 [ aIndex ... dIndex ] ) . "
592
608
)
593
609
}
594
610
595
611
value += String ( Character ( UnicodeScalar ( UInt32 ( charCode) ) !) )
596
- position += 4
612
+
613
+ positionIndex = dIndex
597
614
default :
598
615
throw syntaxError (
599
616
source: source,
600
- position: position ,
617
+ position: body . offset ( of : positionIndex ) ,
601
618
description: " Invalid character escape sequence: \\ \( character ( code) ) . "
602
619
)
603
620
}
604
621
}
605
622
606
- position += 1
607
- chunkStart = position
623
+ positionIndex = body . utf8 . index ( after : positionIndex )
624
+ chunkStartIndex = positionIndex
608
625
}
609
626
}
610
627
611
628
if currentCode != 34 { // quote (")
612
629
throw syntaxError (
613
630
source: source,
614
- position: position ,
631
+ position: body . offset ( of : positionIndex ) ,
615
632
description: " Unterminated string. "
616
633
)
617
634
}
618
635
619
- value += body. slice ( start : chunkStart , end : position )
636
+ value += String ( body. utf8 [ chunkStartIndex ..< positionIndex ] ) !
620
637
621
638
return Token (
622
639
kind: . string,
623
640
start: start,
624
- end: position + 1 ,
641
+ end: body . offset ( of : positionIndex ) + 1 ,
625
642
line: line,
626
643
column: col,
627
644
value: value,
@@ -640,7 +657,7 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
640
657
* which means the result of ORing the char2hex() will also be negative.
641
658
*/
642
659
func uniCharCode( a: UInt8 , b: UInt8 , c: UInt8 , d: UInt8 ) -> Int {
643
- return char2hex ( a) << 12 | char2hex ( b) << 8 | char2hex ( c) << 4 | char2hex ( d)
660
+ return char2hex ( a) << 12 | char2hex ( b) << 8 | char2hex ( c) << 4 | char2hex ( d)
644
661
}
645
662
646
663
/**
@@ -654,9 +671,9 @@ func uniCharCode(a: UInt8, b: UInt8, c: UInt8, d: UInt8) -> Int {
654
671
func char2hex( _ a: UInt8 ) -> Int {
655
672
let a = Int ( a)
656
673
return a >= 48 && a <= 57 ? a - 48 : // 0-9
657
- a >= 65 && a <= 70 ? a - 55 : // A-F
658
- a >= 97 && a <= 102 ? a - 87 : // a-f
659
- - 1
674
+ a >= 65 && a <= 70 ? a - 55 : // A-F
675
+ a >= 97 && a <= 102 ? a - 87 : // a-f
676
+ - 1
660
677
}
661
678
662
679
/**
@@ -670,12 +687,12 @@ func readName(source: Source, position: Int, line: Int, col: Int, prev: Token) -
670
687
var end = position + 1
671
688
672
689
while end != bodyLength,
673
- let code = body. charCode ( at: end) ,
674
- ( code == 95 || // _
675
- code >= 48 && code <= 57 || // 0-9
676
- code >= 65 && code <= 90 || // A-Z
677
- code >= 97 && code <= 122 ) { // a-z
678
- end += 1
690
+ let code = body. charCode ( at: end) ,
691
+ ( code == 95 || // _
692
+ code >= 48 && code <= 57 || // 0-9
693
+ code >= 65 && code <= 90 || // A-Z
694
+ code >= 97 && code <= 122 ) { // a-z
695
+ end += 1
679
696
}
680
697
681
698
return Token (
0 commit comments