@@ -182,6 +182,7 @@ mutable struct ParseStream
182
182
lexer:: Tokenize.Lexers.Lexer{IOBuffer}
183
183
# Lookahead buffer for already lexed tokens
184
184
lookahead:: Vector{SyntaxToken}
185
+ lookahead_index:: Int
185
186
# Pool of stream positions for use as working space in parsing
186
187
position_pool:: Vector{Vector{ParseStreamPosition}}
187
188
# Parser output as an ordered sequence of ranges, parent nodes after children.
@@ -207,9 +208,12 @@ mutable struct ParseStream
207
208
# numbers. This means we're inexact for old dev versions but that seems
208
209
# like an acceptable tradeoff.
209
210
ver = (version. major, version. minor)
210
- new (text_buf, text_root, lexer,
211
- Vector {Vector{ParseStreamPosition}} (),
211
+ new (text_buf,
212
+ text_root,
213
+ lexer,
212
214
Vector {SyntaxToken} (),
215
+ 1 ,
216
+ Vector {Vector{ParseStreamPosition}} (),
213
217
Vector {TaggedRange} (),
214
218
Vector {Diagnostic} (),
215
219
next_byte,
@@ -284,19 +288,19 @@ end
284
288
# Stream input interface - the peek_* family of functions
285
289
286
290
# Buffer several tokens ahead
287
- function _buffer_lookahead_tokens (stream :: ParseStream )
291
+ function _buffer_lookahead_tokens (lexer, lookahead )
288
292
had_whitespace = false
289
293
token_count = 0
290
294
while true
291
- raw = Tokenize. Lexers. next_token (stream . lexer)
295
+ raw = Tokenize. Lexers. next_token (lexer)
292
296
k = TzTokens. exactkind (raw)
293
297
was_whitespace = k in (K " Whitespace" , K " Comment" , K " NewlineWs" )
294
298
had_whitespace |= was_whitespace
295
299
f = EMPTY_FLAGS
296
300
had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG)
297
301
raw. dotop && (f |= DOTOP_FLAG)
298
302
raw. suffix && (f |= SUFFIXED_FLAG)
299
- push! (stream . lookahead, SyntaxToken (SyntaxHead (k, f), raw. startbyte + 1 , raw. endbyte + 1 ))
303
+ push! (lookahead, SyntaxToken (SyntaxHead (k, f), raw. startbyte + 1 , raw. endbyte + 1 ))
300
304
token_count += 1
301
305
if k == K " EndMarker"
302
306
break
320
324
# zero whitespace tokens before the next token. The following code is an
321
325
# unrolled optimized version for that fast path. Empirically it seems we
322
326
# only hit the slow path about 5% of the time here.
323
- i = 1
327
+ i = stream . lookahead_index
324
328
if n == 1 && i+ 1 <= length (stream. lookahead)
325
329
if skip_newlines
326
330
k = kind (stream. lookahead[i])
@@ -349,15 +353,20 @@ end
349
353
end
350
354
351
355
@noinline function __lookahead_index (stream, n, skip_newlines)
352
- i = 1
356
+ i = stream . lookahead_index
353
357
while true
354
358
if i > length (stream. lookahead)
355
- _buffer_lookahead_tokens (stream)
359
+ n_to_delete = stream. lookahead_index- 1
360
+ if n_to_delete > 0.9 * length (stream. lookahead)
361
+ Base. _deletebeg! (stream. lookahead, n_to_delete)
362
+ i -= n_to_delete
363
+ stream. lookahead_index = 1
364
+ end
365
+ _buffer_lookahead_tokens (stream. lexer, stream. lookahead)
356
366
end
357
367
k = kind (stream. lookahead[i])
358
- is_skipped = k ∈ (K " Whitespace" , K " Comment" ) ||
359
- (k == K " NewlineWs" && skip_newlines)
360
- if ! is_skipped
368
+ if ! ((k == K " Whitespace" || k == K " Comment" ) ||
369
+ (k == K " NewlineWs" && skip_newlines))
361
370
if n == 1
362
371
return i
363
372
end
@@ -398,7 +407,7 @@ function peek_token(stream::ParseStream, n::Integer=1;
398
407
end
399
408
i = _lookahead_index (stream, n, skip_newlines)
400
409
if ! skip_whitespace
401
- i = 1
410
+ i = stream . lookahead_index
402
411
end
403
412
return stream. lookahead[i]
404
413
end
@@ -445,13 +454,13 @@ end
445
454
#
446
455
# Though note bump() really does both input and output
447
456
448
- # Bump the next `n` tokens
457
+ # Bump up until the `n`th token
449
458
# flags and remap_kind are applied to any non-trivia tokens
450
- function _bump_n (stream:: ParseStream , n:: Integer , flags, remap_kind= K " None" )
451
- if n <= 0
459
+ function _bump_until_n (stream:: ParseStream , n:: Integer , flags, remap_kind= K " None" )
460
+ if n < stream . lookahead_index
452
461
return
453
462
end
454
- for i = 1 : n
463
+ for i in stream . lookahead_index : n
455
464
tok = stream. lookahead[i]
456
465
k = kind (tok)
457
466
if k == K " EndMarker"
@@ -465,7 +474,7 @@ function _bump_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None")
465
474
last_byte (tok), lastindex (stream. ranges)+ 1 )
466
475
push! (stream. ranges, range)
467
476
end
468
- Base . _deletebeg! ( stream. lookahead, n)
477
+ stream. lookahead_index = n + 1
469
478
stream. next_byte = last_byte (last (stream. ranges)) + 1
470
479
# Defuse the time bomb
471
480
stream. peek_count = 0
@@ -480,7 +489,7 @@ Shift the current token from the input to the output, adding the given flags.
480
489
function bump (stream:: ParseStream , flags= EMPTY_FLAGS; skip_newlines= false ,
481
490
error= nothing , remap_kind:: Kind = K " None" )
482
491
emark = position (stream)
483
- _bump_n (stream, _lookahead_index (stream, 1 , skip_newlines), flags, remap_kind)
492
+ _bump_until_n (stream, _lookahead_index (stream, 1 , skip_newlines), flags, remap_kind)
484
493
if ! isnothing (error)
485
494
emit (stream, emark, K " error" , flags, error= error)
486
495
end
@@ -496,7 +505,7 @@ Bump comments and whitespace tokens preceding the next token
496
505
function bump_trivia (stream:: ParseStream , flags= EMPTY_FLAGS;
497
506
skip_newlines= true , error= nothing )
498
507
emark = position (stream)
499
- _bump_n (stream, _lookahead_index (stream, 1 , skip_newlines) - 1 , EMPTY_FLAGS)
508
+ _bump_until_n (stream, _lookahead_index (stream, 1 , skip_newlines) - 1 , EMPTY_FLAGS)
500
509
if ! isnothing (error)
501
510
emit (stream, emark, K " error" , flags, error= error)
502
511
end
@@ -523,11 +532,12 @@ lexing ambiguities. There's no special whitespace handling — bump any
523
532
whitespace if necessary with bump_trivia.
524
533
"""
525
534
function bump_glue (stream:: ParseStream , kind, flags, num_tokens)
535
+ i = stream. lookahead_index
526
536
span = TaggedRange (SyntaxHead (kind, flags), K " None" ,
527
- first_byte (stream. lookahead[1 ]),
528
- last_byte (stream. lookahead[num_tokens]),
537
+ first_byte (stream. lookahead[i ]),
538
+ last_byte (stream. lookahead[i - 1 + num_tokens]),
529
539
lastindex (stream. ranges) + 1 )
530
- Base . _deletebeg! ( stream. lookahead, num_tokens)
540
+ stream. lookahead_index += num_tokens
531
541
push! (stream. ranges, span)
532
542
stream. next_byte = last_byte (last (stream. ranges)) + 1
533
543
stream. peek_count = 0
@@ -553,7 +563,8 @@ TODO: Are these the only cases? Can we replace this general utility with a
553
563
simpler one which only splits preceding dots?
554
564
"""
555
565
function bump_split (stream:: ParseStream , split_spec... )
556
- tok = popfirst! (stream. lookahead)
566
+ tok = stream. lookahead[stream. lookahead_index]
567
+ stream. lookahead_index += 1
557
568
fbyte = first_byte (tok)
558
569
for (i, (nbyte, k, f)) in enumerate (split_spec)
559
570
lbyte = (i == length (split_spec)) ? last_byte (tok) : fbyte + nbyte - 1
@@ -655,8 +666,9 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...)
655
666
if whitespace
656
667
# It's the whitespace which is the error. Find the range of the current
657
668
# whitespace.
658
- begin_tok_i = 1
659
- end_tok_i = is_whitespace (stream. lookahead[i]) ? i : max (1 , i- 1 )
669
+ begin_tok_i = stream. lookahead_index
670
+ end_tok_i = is_whitespace (stream. lookahead[i]) ?
671
+ i : max (stream. lookahead_index, i- 1 )
660
672
end
661
673
fbyte = first_byte (stream. lookahead[begin_tok_i])
662
674
lbyte = last_byte (stream. lookahead[end_tok_i])
0 commit comments