Skip to content

Commit d95f6d2

Browse files
committed
Tokenize: remove startpos and endpos from RawToken
The line and column are only necessary for diagnostics so we can defer the work of computing/tracking these until diagnostics are emitted. This speeds up Tokenization with RawToken by about 20%
1 parent cd76123 commit d95f6d2

File tree

3 files changed

+12
-13
lines changed

3 files changed

+12
-13
lines changed

Tokenize/src/lexer.jl

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,13 @@ function readchar(l::Lexer{I}) where {I <: IO}
221221
return l.chars[1]
222222
end
223223

224+
function readchar(l::Lexer{I,RawToken}) where {I <: IO}
225+
c = readchar(l.io)
226+
l.chars = (l.chars[2], l.chars[3], l.chars[4], c)
227+
l.charspos = (l.charspos[2], l.charspos[3], l.charspos[4], position(l.io))
228+
return l.chars[1]
229+
end
230+
224231
readon(l::Lexer{I,RawToken}) where {I <: IO} = l.chars[1]
225232
function readon(l::Lexer{I,Token}) where {I <: IO}
226233
if l.charstore.size != 0
@@ -308,9 +315,7 @@ function emit(l::Lexer{IO_t,RawToken}, kind::Kind, err::TokenError = Tokens.NO_E
308315
end
309316
end
310317

311-
tok = RawToken(kind, (l.token_start_row, l.token_start_col),
312-
(l.current_row, l.current_col - 1),
313-
startpos(l), position(l) - 1, err, l.dotop, suffix)
318+
tok = RawToken(kind, startpos(l), position(l) - 1, err, l.dotop, suffix)
314319

315320
l.dotop = false
316321
l.last_token = kind

Tokenize/src/token.jl

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -76,19 +76,16 @@ Token() = Token(ERROR, (0,0), (0,0), 0, 0, "", UNKNOWN, false, false)
7676
struct RawToken <: AbstractToken
7777
kind::Kind
7878
# Offsets into a string or buffer
79-
startpos::Tuple{Int, Int} # row, col where token starts /end, col is a string index
80-
endpos::Tuple{Int, Int}
8179
startbyte::Int # The byte where the token start in the buffer
8280
endbyte::Int # The byte where the token ended in the buffer
8381
token_error::TokenError
8482
dotop::Bool
8583
suffix::Bool
8684
end
87-
function RawToken(kind::Kind, startposition::Tuple{Int, Int}, endposition::Tuple{Int, Int},
88-
startbyte::Int, endbyte::Int)
89-
RawToken(kind, startposition, endposition, startbyte, endbyte, NO_ERR, false, false)
85+
function RawToken(kind::Kind, startbyte::Int, endbyte::Int)
86+
RawToken(kind, startbyte, endbyte, NO_ERR, false, false)
9087
end
91-
RawToken() = RawToken(ERROR, (0,0), (0,0), 0, 0, UNKNOWN, false, false)
88+
RawToken() = RawToken(ERROR, 0, 0, UNKNOWN, false, false)
9289

9390

9491
const _EMPTY_TOKEN = Token()
@@ -177,9 +174,7 @@ end
177174
Base.print(io::IO, t::Token) = print(io, untokenize(t))
178175

179176
function Base.show(io::IO, t::RawToken)
180-
start_r, start_c = startpos(t)
181-
end_r, end_c = endpos(t)
182-
print(io, rpad(string(start_r, ",", start_c, "-", end_r, ",", end_c), 17, " "))
177+
print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " "))
183178
print(io, rpad(kind(t), 15, " "))
184179
end
185180

Tokenize/src/utilities.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,6 @@ eof(io::IO) = Base.eof(io)
198198
eof(c::Char) = c === EOF_CHAR
199199

200200
readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char)
201-
takechar(io::IO) = (readchar(io); io)
202201

203202
# Checks whether a Char is an operator, which can not be juxtaposed with another
204203
# Char to be an operator (i.e <=), and can be prefixed by a dot (.)

0 commit comments

Comments
 (0)