11# frozen_string_literal: true
22
3+ require "set"
34require "strscan"
45
56module Prism
@@ -8,16 +9,17 @@ class Parser
89 # Accepts a list of prism tokens and converts them into the expected
910 # format for the parser gem.
1011 class Lexer
12+ # These tokens are always skipped
13+ TYPES_ALWAYS_SKIP = %i[ IGNORED_NEWLINE __END__ EOF ] . to_set
14+ private_constant :TYPES_ALWAYS_SKIP
15+
1116 # The direct translating of types between the two lexers.
1217 TYPES = {
1318 # These tokens should never appear in the output of the lexer.
14- EOF : nil ,
1519 MISSING : nil ,
1620 NOT_PROVIDED : nil ,
17- IGNORED_NEWLINE : nil ,
1821 EMBDOC_END : nil ,
1922 EMBDOC_LINE : nil ,
20- __END__ : nil ,
2123
2224 # These tokens have more or less direct mappings.
2325 AMPERSAND : :tAMPER2 ,
@@ -193,18 +195,18 @@ class Lexer
193195 #
194196 # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
195197 # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
196- LAMBDA_TOKEN_TYPES = [ :kDO_LAMBDA , :tLAMBDA , :tLAMBEG ]
198+ LAMBDA_TOKEN_TYPES = [ :kDO_LAMBDA , :tLAMBDA , :tLAMBEG ] . to_set
197199
198200 # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
199201 # The following token types are listed as those classified as `tLPAREN`.
200202 LPAREN_CONVERSION_TOKEN_TYPES = [
201203 :kBREAK , :kCASE , :tDIVIDE , :kFOR , :kIF , :kNEXT , :kRETURN , :kUNTIL , :kWHILE , :tAMPER , :tANDOP , :tBANG , :tCOMMA , :tDOT2 , :tDOT3 ,
202204 :tEQL , :tLPAREN , :tLPAREN2 , :tLPAREN_ARG , :tLSHFT , :tNL , :tOP_ASGN , :tOROP , :tPIPE , :tSEMI , :tSTRING_DBEG , :tUMINUS , :tUPLUS
203- ]
205+ ] . to_set
204206
205207 # Types of tokens that are allowed to continue a method call with comments in-between.
206208 # For these, the parser gem doesn't emit a newline token after the last comment.
207- COMMENT_CONTINUATION_TYPES = [ :COMMENT , :AMPERSAND_DOT , :DOT ]
209+ COMMENT_CONTINUATION_TYPES = [ :COMMENT , :AMPERSAND_DOT , :DOT ] . to_set
208210 private_constant :COMMENT_CONTINUATION_TYPES
209211
210212 # Heredocs are complex and require us to keep track of a bit of info to refer to later
@@ -251,7 +253,7 @@ def to_a
251253 while index < length
252254 token , state = lexed [ index ]
253255 index += 1
254- next if %i[ IGNORED_NEWLINE __END__ EOF ] . include? ( token . type )
256+ next if TYPES_ALWAYS_SKIP . include? ( token . type )
255257
256258 type = TYPES . fetch ( token . type )
257259 value = token . value
@@ -342,7 +344,7 @@ def to_a
342344 when :tSTRING_BEG
343345 next_token = lexed [ index ] [ 0 ]
344346 next_next_token = lexed [ index + 1 ] [ 0 ]
345- basic_quotes = [ " \" " , "'" ] . include? ( value )
347+ basic_quotes = value == '"' || value == "'"
346348
347349 if basic_quotes && next_token &.type == :STRING_END
348350 next_location = token . location . join ( next_token . location )
0 commit comments