Skip to content

Commit 49854bb

Browse files
authored
Merge pull request #216 from jrcmramos-bumble/replace-scanner-with-substrings
Replace Foundation scanner implementation
2 parents e523c14 + 445b733 commit 49854bb

File tree

3 files changed

+156
-67
lines changed

3 files changed

+156
-67
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright (c) 2019 Spotify AB.
2+
//
3+
// Licensed to the Apache Software Foundation (ASF) under one
4+
// or more contributor license agreements. See the NOTICE file
5+
// distributed with this work for additional information
6+
// regarding copyright ownership. The ASF licenses this file
7+
// to you under the Apache License, Version 2.0 (the
8+
// "License"); you may not use this file except in compliance
9+
// with the License. You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing,
14+
// software distributed under the License is distributed on an
15+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
// KIND, either express or implied. See the License for the
17+
// specific language governing permissions and limitations
18+
// under the License.
19+
20+
import Foundation
21+
22+
extension String.Index {
23+
init(compilerSafeOffset offset: Int, in string: String) {
24+
#if swift(>=5.0)
25+
self = String.Index(utf16Offset: offset, in: string)
26+
#else
27+
self = String.Index(encodedOffset: offset)
28+
#endif
29+
}
30+
}

Sources/XCLogParser/lexer/Lexer.swift

Lines changed: 43 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public final class Lexer {
2323

2424
static let SLFHeader = "SLF"
2525

26-
let typeDelimiters: CharacterSet
26+
let typeDelimiters: Set<Character>
2727
let filePath: String
2828
var classNames = [String]()
2929
var userDirToRedact: String? {
@@ -38,7 +38,7 @@ public final class Lexer {
3838

3939
public init(filePath: String) {
4040
self.filePath = filePath
41-
self.typeDelimiters = CharacterSet(charactersIn: TokenType.all())
41+
self.typeDelimiters = Set(TokenType.all())
4242
self.redactor = LexRedactor()
4343
}
4444

@@ -53,15 +53,18 @@ public final class Lexer {
5353
redacted: Bool,
5454
withoutBuildSpecificInformation: Bool) throws -> [Token] {
5555
let scanner = Scanner(string: contents)
56+
5657
guard scanSLFHeader(scanner: scanner) else {
5758
throw XCLogParserError.invalidLogHeader(filePath)
5859
}
60+
5961
var tokens = [Token]()
6062
while !scanner.isAtEnd {
63+
6164
guard let logTokens = scanSLFType(scanner: scanner,
6265
redacted: redacted,
6366
withoutBuildSpecificInformation: withoutBuildSpecificInformation),
64-
logTokens.isEmpty == false else {
67+
logTokens.isEmpty == false else {
6568
print(tokens)
6669
throw XCLogParserError.invalidLine(scanner.approximateLine)
6770
}
@@ -71,20 +74,15 @@ public final class Lexer {
7174
}
7275

7376
private func scanSLFHeader(scanner: Scanner) -> Bool {
74-
#if os(Linux)
75-
var format: String?
76-
#else
77-
var format: NSString?
78-
#endif
79-
return scanner.scanString(Lexer.SLFHeader, into: &format)
77+
return scanner.scan(string: Lexer.SLFHeader)
8078
}
8179

82-
private func scanSLFType(scanner: Scanner, redacted: Bool, withoutBuildSpecificInformation: Bool) -> [Token]? {
80+
private func scanSLFType(scanner: Scanner,
81+
redacted: Bool,
82+
withoutBuildSpecificInformation: Bool) -> [Token]? {
83+
let payload = self.scanPayload(scanner: scanner)
8384

84-
guard let payload = scanPayload(scanner: scanner) else {
85-
return nil
86-
}
87-
guard let tokenTypes = scanTypeDelimiter(scanner: scanner), tokenTypes.count > 0 else {
85+
guard let tokenTypes = self.scanTypeDelimiter(scanner: scanner), tokenTypes.count > 0 else {
8886
return nil
8987
}
9088

@@ -97,44 +95,30 @@ public final class Lexer {
9795
}
9896
}
9997

100-
private func scanPayload(scanner: Scanner) -> String? {
101-
var payload: String = ""
102-
#if os(Linux)
103-
var char: String?
104-
#else
105-
var char: NSString?
106-
#endif
98+
private func scanPayload(scanner: Scanner) -> String {
10799
let hexChars = "abcdef0123456789"
108-
while scanner.scanCharacters(from: CharacterSet(charactersIn: hexChars), into: &char),
109-
let char = char as String? {
110-
payload.append(char)
111-
}
112-
return payload
100+
let characterSet = Set(hexChars)
101+
return scanner.scanCharacters(from: characterSet) ?? ""
113102
}
114103

115104
private func scanTypeDelimiter(scanner: Scanner) -> [TokenType]? {
116-
#if os(Linux)
117-
var delimiters: String?
118-
#else
119-
var delimiters: NSString?
120-
#endif
121-
if scanner.scanCharacters(from: typeDelimiters, into: &delimiters), let delimiters = delimiters {
122-
let delimiters = String(delimiters)
123-
if delimiters.count > 1 {
124-
// if we found a string, we discard other type delimiters because there are part of the string
125-
let tokenString = TokenType.string
126-
if let char = delimiters.first, tokenString.rawValue == String(char) {
127-
scanner.scanLocation -= delimiters.count - 1
128-
return [tokenString]
129-
}
130-
}
131-
// sometimes we found one or more nil list (-) next to the type delimiter
132-
// in that case we'll return the delimiter and one or more `Token.null`
133-
return delimiters.compactMap { character -> TokenType? in
134-
TokenType(rawValue: String(character))
105+
guard let delimiters = scanner.scanCharacters(from: self.typeDelimiters) else {
106+
return nil
107+
}
108+
109+
if delimiters.count > 1 {
110+
// if we found a string, we discard other type delimiters because there are part of the string
111+
let tokenString = TokenType.string
112+
if let char = delimiters.first, tokenString.rawValue == String(char) {
113+
scanner.moveOffset(by: -(delimiters.count - 1))
114+
return [tokenString]
135115
}
136116
}
137-
return nil
117+
// sometimes we found one or more nil list (-) next to the type delimiter
118+
// in that case we'll return the delimiter and one or more `Token.null`
119+
return delimiters.compactMap { character -> TokenType? in
120+
TokenType(rawValue: String(character))
121+
}
138122
}
139123

140124
private func scanToken(scanner: Scanner,
@@ -252,19 +236,12 @@ public final class Lexer {
252236
scanner: Scanner,
253237
redacted: Bool,
254238
withoutBuildSpecificInformation: Bool) -> String? {
255-
guard let value = Int(length) else {
239+
guard let value = Int(length), let scannedResult = scanner.scan(count: value) else {
256240
print("error parsing string")
257241
return nil
258242
}
259-
#if swift(>=5.0)
260-
let start = String.Index(utf16Offset: scanner.scanLocation, in: scanner.string)
261-
let end = String.Index(utf16Offset: scanner.scanLocation + value, in: scanner.string)
262-
#else
263-
let start = String.Index(encodedOffset: scanner.scanLocation)
264-
let end = String.Index(encodedOffset: scanner.scanLocation + value)
265-
#endif
266-
scanner.scanLocation += value
267-
var result = String(scanner.string[start..<end])
243+
244+
var result = scannedResult
268245
if redacted {
269246
result = redactor.redactUserDir(string: result)
270247
}
@@ -285,19 +262,18 @@ public final class Lexer {
285262
}
286263
}
287264

288-
extension Scanner {
265+
private extension Scanner {
289266
var approximateLine: String {
290-
let endCount = string.count - scanLocation > 21 ? scanLocation + 21 : string.count - scanLocation
291-
#if swift(>=5.0)
292-
let start = String.Index(utf16Offset: scanLocation, in: self.string)
293-
let end = String.Index(utf16Offset: endCount, in: self.string)
294-
#else
295-
let start = String.Index(encodedOffset: scanLocation)
296-
let end = String.Index(encodedOffset: endCount)
297-
#endif
267+
let currentLocation = self.offset
268+
let contentSize = self.string.count
269+
270+
let start = String.Index(compilerSafeOffset: currentLocation, in: self.string)
271+
let endCount = contentSize - currentLocation > 21 ? currentLocation + 21 : contentSize - currentLocation
272+
let end = String.Index(compilerSafeOffset: endCount, in: self.string)
273+
298274
if end <= start {
299-
return String(string[start..<string.endIndex])
275+
return String(self.string[start..<self.stringEndIndex])
300276
}
301-
return String(string[start..<end])
277+
return String(self.string[start..<end])
302278
}
303279
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Copyright (c) 2019 Spotify AB.
2+
//
3+
// Licensed to the Apache Software Foundation (ASF) under one
4+
// or more contributor license agreements. See the NOTICE file
5+
// distributed with this work for additional information
6+
// regarding copyright ownership. The ASF licenses this file
7+
// to you under the Apache License, Version 2.0 (the
8+
// "License"); you may not use this file except in compliance
9+
// with the License. You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing,
14+
// software distributed under the License is distributed on an
15+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
// KIND, either express or implied. See the License for the
17+
// specific language governing permissions and limitations
18+
// under the License.
19+
20+
import Foundation
21+
22+
final class Scanner {
23+
24+
let string: String
25+
26+
private(set) var offset: Int
27+
private(set) lazy var stringEndIndex: String.Index = self.string.endIndex
28+
29+
var isAtEnd: Bool {
30+
String.Index(compilerSafeOffset: self.offset, in: self.string) >= self.stringEndIndex
31+
}
32+
33+
init(string: String) {
34+
self.string = string
35+
self.offset = 0
36+
}
37+
38+
func scan(count: Int) -> String? {
39+
let start = String.Index(compilerSafeOffset: self.offset, in: self.string)
40+
let endOffset = self.offset + count
41+
42+
guard endOffset <= self.string.utf16.count else { return nil }
43+
44+
let end = String.Index(compilerSafeOffset: endOffset, in: self.string)
45+
let result = self.string[start..<end]
46+
47+
guard result.count == count else { return nil }
48+
49+
self.offset += count
50+
51+
return String(result)
52+
}
53+
54+
func scan(string value: String) -> Bool {
55+
guard self.string.starts(with: value) else { return false }
56+
57+
self.offset += value.count
58+
return true
59+
}
60+
61+
func scanCharacters(from allowedCharacters: Set<Character>) -> String? {
62+
var prefix: String = ""
63+
var characterIndex = String.Index(compilerSafeOffset: self.offset, in: self.string)
64+
65+
while characterIndex < self.stringEndIndex {
66+
let character = self.string[characterIndex]
67+
68+
guard allowedCharacters.contains(character) else {
69+
break
70+
}
71+
72+
prefix.append(character)
73+
self.offset += 1
74+
characterIndex = String.Index(utf16Offset: self.offset, in: self.string)
75+
}
76+
77+
return prefix
78+
}
79+
80+
func moveOffset(by value: Int) {
81+
self.offset += value
82+
}
83+
}

0 commit comments

Comments
 (0)