22# :markup: markdown
33
44require "delegate"
5- require "ripper"
65
76module Prism
87 # This class is responsible for lexing the source using prism and then
@@ -199,6 +198,58 @@ def deconstruct_keys(keys)
199198 "__END__" : :on___end__
200199 } . freeze
201200
201+ # Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states
202+ # to reimplement to_s without using Ripper.
203+ class State
204+ # Ripper-internal bitflags.
205+ ALL = %i[
206+ BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
207+ ] . map . with_index . to_h { |name , i | [ 2 ** i , name ] }
208+ ALL [ 0 ] = :NONE
209+ ALL . freeze
210+ ALL . each { |value , name | const_set ( name , value ) }
211+
212+ # :stopdoc:
213+
214+ attr_reader :to_int , :to_s
215+
216+ def initialize ( i )
217+ @to_int = i
218+ @to_s = state_name ( i )
219+ freeze
220+ end
221+
222+ def []( index )
223+ case index
224+ when 0 , :to_int
225+ @to_int
226+ when 1 , :to_s
227+ @to_s
228+ else
229+ nil
230+ end
231+ end
232+
233+ alias to_i to_int
234+ alias inspect to_s
235+ def pretty_print ( q ) q . text ( to_s ) end
236+ def ==( i ) super or to_int == i end
237+ def &( i ) self . class . new ( to_int & i ) end
238+ def |( i ) self . class . new ( to_int | i ) end
239+ def allbits? ( i ) to_int . allbits? ( i ) end
240+ def anybits? ( i ) to_int . anybits? ( i ) end
241+ def nobits? ( i ) to_int . nobits? ( i ) end
242+
243+ # :startdoc:
244+
245+ private
246+
247+ # Convert the state flags into the format exposed by ripper.
248+ def state_name ( bits )
249+ ALL . filter_map { |flag , name | name if bits & flag != 0 } . join ( "|" )
250+ end
251+ end
252+
202253 # When we produce tokens, we produce the same arrays that Ripper does.
203254 # However, we add a couple of convenience methods onto them to make them a
204255 # little easier to work with. We delegate all other methods to the array.
@@ -249,8 +300,8 @@ def ==(other) # :nodoc:
249300 class IdentToken < Token
250301 def ==( other ) # :nodoc:
251302 ( self [ 0 ...-1 ] == other [ 0 ...-1 ] ) && (
252- ( other [ 3 ] == Ripper :: EXPR_LABEL | Ripper :: EXPR_END ) ||
253- ( other [ 3 ] & Ripper :: EXPR_ARG_ANY != 0 )
303+ ( other [ 3 ] == State :: LABEL | State :: END ) ||
304+ ( other [ 3 ] & ( State :: ARG | State :: CMDARG ) != 0 )
254305 )
255306 end
256307 end
@@ -261,8 +312,8 @@ class IgnoredNewlineToken < Token
261312 def ==( other ) # :nodoc:
262313 return false unless self [ 0 ...-1 ] == other [ 0 ...-1 ]
263314
264- if self [ 3 ] == Ripper :: EXPR_ARG | Ripper :: EXPR_LABELED
265- other [ 3 ] & Ripper :: EXPR_ARG | Ripper :: EXPR_LABELED != 0
315+ if self [ 3 ] == State :: ARG | State :: LABELED
316+ other [ 3 ] & State :: ARG | State :: LABELED != 0
266317 else
267318 self [ 3 ] == other [ 3 ]
268319 end
@@ -280,8 +331,8 @@ def ==(other) # :nodoc:
280331 class ParamToken < Token
281332 def ==( other ) # :nodoc:
282333 ( self [ 0 ...-1 ] == other [ 0 ...-1 ] ) && (
283- ( other [ 3 ] == Ripper :: EXPR_END ) ||
284- ( other [ 3 ] == Ripper :: EXPR_END | Ripper :: EXPR_LABEL )
334+ ( other [ 3 ] == State :: END ) ||
335+ ( other [ 3 ] == State :: END | State :: LABEL )
285336 )
286337 end
287338 end
@@ -615,6 +666,11 @@ def self.build(opening)
615666
616667 private_constant :Heredoc
617668
669+ # In previous versions of Ruby, Ripper wouldn't flush the bom before the
670+ # first token, so we had to have a hack in place to account for that.
671+ BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
672+ private_constant :BOM_FLUSHED
673+
618674 attr_reader :source , :options
619675
620676 def initialize ( source , **options )
@@ -630,13 +686,9 @@ def result
630686
631687 result = Prism . lex ( source , **options )
632688 result_value = result . value
633- previous_state = nil #: Ripper::Lexer:: State?
689+ previous_state = nil #: State?
634690 last_heredoc_end = nil #: Integer?
635691
636- # In previous versions of Ruby, Ripper wouldn't flush the bom before the
637- # first token, so we had to have a hack in place to account for that. This
638- # checks for that behavior.
639- bom_flushed = Ripper . lex ( "\xEF \xBB \xBF # test" ) [ 0 ] [ 0 ] [ 1 ] == 0
640692 bom = source . byteslice ( 0 ..2 ) == "\xEF \xBB \xBF "
641693
642694 result_value . each_with_index do |( token , lex_state ) , index |
@@ -651,7 +703,7 @@ def result
651703 if bom && lineno == 1
652704 column -= 3
653705
654- if index == 0 && column == 0 && !bom_flushed
706+ if index == 0 && column == 0 && !BOM_FLUSHED
655707 flushed =
656708 case token . type
657709 when :BACK_REFERENCE , :INSTANCE_VARIABLE , :CLASS_VARIABLE ,
@@ -675,7 +727,7 @@ def result
675727
676728 event = RIPPER . fetch ( token . type )
677729 value = token . value
678- lex_state = Ripper :: Lexer :: State . new ( lex_state )
730+ lex_state = State . new ( lex_state )
679731
680732 token =
681733 case event
@@ -689,7 +741,7 @@ def result
689741 last_heredoc_end = token . location . end_offset
690742 IgnoreStateToken . new ( [ [ lineno , column ] , event , value , lex_state ] )
691743 when :on_ident
692- if lex_state == Ripper :: EXPR_END
744+ if lex_state == State :: END
693745 # If we have an identifier that follows a method name like:
694746 #
695747 # def foo bar
@@ -699,7 +751,7 @@ def result
699751 # yet. We do this more accurately, so we need to allow comparing
700752 # against both END and END|LABEL.
701753 ParamToken . new ( [ [ lineno , column ] , event , value , lex_state ] )
702- elsif lex_state == Ripper :: EXPR_END | Ripper :: EXPR_LABEL
754+ elsif lex_state == State :: END | State :: LABEL
703755 # In the event that we're comparing identifiers, we're going to
704756 # allow a little divergence. Ripper doesn't account for local
705757 # variables introduced through named captures in regexes, and we
@@ -739,7 +791,7 @@ def result
739791 counter += { on_embexpr_beg : -1 , on_embexpr_end : 1 } [ current_event ] || 0
740792 end
741793
742- Ripper :: Lexer :: State . new ( result_value [ current_index ] [ 1 ] )
794+ State . new ( result_value [ current_index ] [ 1 ] )
743795 else
744796 previous_state
745797 end
0 commit comments