ruby · st0012 · Nov 23, 2025 · Nov 23, 2025 · kou · Nov 23, 2025
diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb
@@ -1,27 +1,59 @@
 # frozen_string_literal: true
-require 'ripper'
+require 'prism'
 
 ##
-# Wrapper for Ripper lex states
+# Wrapper for Prism lex with Ripper-compatible API
 
 class RDoc::Parser::RipperStateLex
-  # :stopdoc:
-
   Token = Struct.new(:line_no, :char_no, :kind, :text, :state)
 
-  EXPR_END   = Ripper::EXPR_END
-  EXPR_ENDFN = Ripper::EXPR_ENDFN
-  EXPR_ARG   = Ripper::EXPR_ARG
-  EXPR_FNAME = Ripper::EXPR_FNAME
+  # Lexer states from Ripper
+  EXPR_END   = 0x2    # 2 - Expression ends
+  EXPR_ENDFN = 0x8    # 8 - Function definition ends
+  EXPR_ARG   = 0x10   # 16 - Inside argument list
+  EXPR_FNAME = 0x80   # 128 - Inside function name
+  EXPR_LABEL = 0x400  # 1024 - Label in hash literal
+
+  REDEFINABLE_OPERATORS = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~].freeze
 
-  class InnerStateLex < Ripper::Filter
-    def initialize(code)
-      super(code)
+  # Returns tokens parsed from +code+.
+  def self.parse(code)
+    lex = self.new(code)
-    lex = self.new(code)
+    lex = new(code)
-    lex = self.new(code)
+    lex = new(code)
+    tokens = []
+    begin
+      while tk = lex.get_squashed_tk
+        tokens.push tk
+      end
+    rescue StopIteration
     end
+    tokens
+  end
+
+  # Returns +true+ if lex state will be +END+ after +token+.
+  def self.end?(token)
+    (token[:state] & EXPR_END)
+  end
 
-    def on_default(event, tok, data)
-      data << Token.new(lineno, column, event, tok, state)
+  # New lexer for +code+.
+  def initialize(code)
+    @buf = []
+    @heredoc_queue = []
+    # Use Prism.lex_compat for Ripper-compatible tokenization
+    lex_result = Prism.lex_compat(code)
+    prism_tokens = lex_result.value.map do |(pos, kind, text, state)|
+      line_no, char_no = pos
+      # Convert Ripper::Lexer::State to integer to avoid Ripper dependency
+      state_int = state.respond_to?(:to_i) ? state.to_i : state
+      Token.new(line_no, char_no, kind, text, state_int)
     end
+
+    # Prism.lex_compat omits :on_sp tokens, so we need to insert them for proper
+    # syntax highlighting and token stream reconstruction
+    tokens_with_spaces = insert_space_tokens(prism_tokens, code)
+
+    # Fix Prism incompatibility: Prism returns :on_ignored_nl after `def foo; end`
+    # but parsers expect :on_nl for proper token collection in single-line methods
+    @tokens = normalize_ignored_nl_for_single_line_methods(tokens_with_spaces)
   end
 
   def get_squashed_tk
@@ -32,38 +64,32 @@ def get_squashed_tk
     end
     return nil if tk.nil?
     case tk[:kind]
-    when :on_symbeg then
+    when :on_symbeg
       tk = get_symbol_tk(tk)
-    when :on_tstring_beg then
+    when :on_tstring_beg
       tk = get_string_tk(tk)
-    when :on_backtick then
+    when :on_backtick
       if (tk[:state] & (EXPR_FNAME | EXPR_ENDFN)) != 0
         tk[:kind] = :on_ident
-        tk[:state] = Ripper::Lexer::State.new(EXPR_ARG)
+        tk[:state] = EXPR_ARG
       else
         tk = get_string_tk(tk)
       end
-    when :on_regexp_beg then
+    when :on_regexp_beg
       tk = get_regexp_tk(tk)
-    when :on_embdoc_beg then
+    when :on_embdoc_beg
       tk = get_embdoc_tk(tk)
-    when :on_heredoc_beg then
+    when :on_heredoc_beg
       @heredoc_queue << retrieve_heredoc_info(tk)
-    when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then
+    when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end
       if !@heredoc_queue.empty?
         get_heredoc_tk(*@heredoc_queue.shift)
       elsif tk[:text].nil? # :on_ignored_nl sometimes gives nil
         tk[:text] = ''
       end
-    when :on_words_beg then
-      tk = get_words_tk(tk)
-    when :on_qwords_beg then
+    when :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg
       tk = get_words_tk(tk)
-    when :on_symbols_beg then
-      tk = get_words_tk(tk)
-    when :on_qsymbols_beg then
-      tk = get_words_tk(tk)
-    when :on_op then
+    when :on_op
       if '&.' == tk[:text]
         tk[:kind] = :on_period
       else
@@ -73,7 +99,9 @@ def get_squashed_tk
     tk
   end
 
-  private def get_symbol_tk(tk)
+  private
+
+  def get_symbol_tk(tk)
     is_symbol = true
     symbol_tk = Token.new(tk.line_no, tk.char_no, :on_symbol)
     if ":'" == tk[:text] or ':"' == tk[:text] or tk[:text].start_with?('%s')
@@ -82,31 +110,10 @@ def get_squashed_tk
       symbol_tk[:state] = tk1[:state]
     else
       case (tk1 = get_squashed_tk)[:kind]
-      when :on_ident
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
       when :on_tstring_content
         symbol_tk[:text] = ":#{tk1[:text]}"
         symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end
-      when :on_tstring_end
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_op
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_ivar
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_cvar
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_gvar
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_const
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_kw
+      when :on_ident, :on_tstring_end, :on_op, :on_ivar, :on_cvar, :on_const, :on_kw
         symbol_tk[:text] = ":#{tk1[:text]}"
         symbol_tk[:state] = tk1[:state]
       else
@@ -120,7 +127,7 @@ def get_squashed_tk
     tk
   end
 
-  private def get_string_tk(tk)
+  def get_string_tk(tk)
     string = tk[:text]
     state = nil
     kind = :on_tstring
@@ -139,15 +146,15 @@ def get_squashed_tk
         break
       else
         string = string + inner_str_tk[:text]
-        if :on_embexpr_beg == inner_str_tk[:kind] then
+        if :on_embexpr_beg == inner_str_tk[:kind]
           kind = :on_dstring if :on_tstring == kind
         end
       end
     end
     Token.new(tk.line_no, tk.char_no, kind, string, state)
   end
 
-  private def get_regexp_tk(tk)
+  def get_regexp_tk(tk)
     string = tk[:text]
     state = nil
     loop do
@@ -165,7 +172,7 @@ def get_squashed_tk
     Token.new(tk.line_no, tk.char_no, :on_regexp, string, state)
   end
 
-  private def get_embdoc_tk(tk)
+  def get_embdoc_tk(tk)
     string = tk[:text]
     until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do
       string = string + embdoc_tk[:text]
@@ -174,16 +181,16 @@ def get_squashed_tk
     Token.new(tk.line_no, tk.char_no, :on_embdoc, string, embdoc_tk.state)
   end
 
-  private def get_heredoc_tk(heredoc_name, indent)
-    string = ''
+  def get_heredoc_tk(heredoc_name, indent)
+    string = +''
     start_tk = nil
     prev_tk = nil
     until heredoc_end?(heredoc_name, indent, tk = @tokens.shift) do
       start_tk = tk unless start_tk
       if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no]
-        string = string + (' ' * tk[:char_no])
+        string << (' ' * tk[:char_no])
       end
-      string = string + tk[:text]
+      string << tk[:text]
       prev_tk = tk
     end
     start_tk = tk unless start_tk
@@ -193,15 +200,15 @@ def get_squashed_tk
     @buf.unshift heredoc_tk
   end
 
-  private def retrieve_heredoc_info(tk)
+  def retrieve_heredoc_info(tk)
     name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2')
     indent = tk[:text] =~ /\A<<[-~]/
     [name, indent]
   end
 
-  private def heredoc_end?(name, indent, tk)
+  def heredoc_end?(name, indent, tk)
     result = false
-    if :on_heredoc_end == tk[:kind] then
+    if :on_heredoc_end == tk[:kind]
       tk_name = tk[:text].chomp
       tk_name.lstrip! if indent
       if name == tk_name
@@ -211,8 +218,8 @@ def get_squashed_tk
     result
   end
 
-  private def get_words_tk(tk)
-    string = ''
+  def get_words_tk(tk)
+    string = +''
     start_token = tk[:text]
     start_quote = tk[:text].rstrip[-1]
     line_no = tk[:line_no]
@@ -232,36 +239,31 @@ def get_squashed_tk
       if tk.nil?
         end_token = end_quote
         break
-      elsif :on_tstring_content == tk[:kind] then
-        string += tk[:text]
-      elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then
-        if end_quote == tk[:text].strip then
+      elsif :on_tstring_content == tk[:kind]
+        string << tk[:text]
+      elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind]
+        if end_quote == tk[:text].strip
           end_token = tk[:text]
           break
         else
-          string += tk[:text]
+          string << tk[:text]
         end
       else
-        string += tk[:text]
+        string << tk[:text]
       end
     end
     text = "#{start_token}#{string}#{end_token}"
     Token.new(line_no, char_no, :on_dstring, text, state)
   end
 
-  private def get_op_tk(tk)
-    redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~]
-    if redefinable_operators.include?(tk[:text]) and tk[:state] == EXPR_ARG then
-      tk[:state] = Ripper::Lexer::State.new(EXPR_ARG)
+  def get_op_tk(tk)
+    if REDEFINABLE_OPERATORS.include?(tk[:text]) and tk[:state] == EXPR_ARG
+      tk[:state] = EXPR_ARG
       tk[:kind] = :on_ident
-    elsif tk[:text] =~ /^[-+]$/ then
+    elsif tk[:text] =~ /^[-+]$/
-    elsif tk[:text] =~ /^[-+]$/
+    elsif tk[:text] =~ /\A[-+]\z/
-    elsif tk[:text] =~ /^[-+]$/
+    elsif tk[:text] =~ /\A[-+]\z/
       tk_ahead = get_squashed_tk
       case tk_ahead[:kind]
-      when :on_int, :on_float, :on_rational, :on_imaginary then
-        tk[:text] += tk_ahead[:text]
-        tk[:kind] = tk_ahead[:kind]
-        tk[:state] = tk_ahead[:state]
-      when :on_heredoc_beg, :on_tstring, :on_dstring # frozen/non-frozen string literal
+      when :on_int, :on_float, :on_rational, :on_imaginary, :on_heredoc_beg, :on_tstring, :on_dstring
         tk[:text] += tk_ahead[:text]
         tk[:kind] = tk_ahead[:kind]
         tk[:state] = tk_ahead[:state]
@@ -272,31 +274,66 @@ def get_squashed_tk
     tk
   end
 
-  # :startdoc:
-
-  # New lexer for +code+.
-  def initialize(code)
-    @buf = []
-    @heredoc_queue = []
-    @inner_lex = InnerStateLex.new(code)
-    @tokens = @inner_lex.parse([])
-  end
-
-  # Returns tokens parsed from +code+.
-  def self.parse(code)
-    lex = self.new(code)
-    tokens = []
-    begin
-      while tk = lex.get_squashed_tk
-        tokens.push tk
+  def normalize_ignored_nl_for_single_line_methods(tokens)
+    tokens.each_cons(2) do |prev_token, token|
+      # Convert :on_ignored_nl to :on_nl when it follows an `end` keyword on the same line
+      # This ensures proper token collection for single-line method definitions
+      if token.kind == :on_ignored_nl &&
+         prev_token.kind == :on_kw && prev_token.text == 'end' &&
+         prev_token.line_no == token.line_no
+        token[:kind] = :on_nl
       end
-    rescue StopIteration
     end
     tokens
   end
 
-  # Returns +true+ if lex state will be +END+ after +token+.
-  def self.end?(token)
-    (token[:state] & EXPR_END)
+  def insert_space_tokens(tokens, code)
+    return tokens if tokens.empty?
+
+    lines = code.lines
+    result = []
+    prev_token = nil
+
+    tokens.each_with_index do |token, i|
+      # Check for leading spaces at the start of a line
+      # (when current token is not on the same line as previous token and doesn't start at column 0)
+      if prev_token && prev_token.line_no < token.line_no && token.char_no > 0
+        # There are leading spaces on this line
+        line_text = lines[token.line_no - 1]
+        if line_text
+          leading_spaces = line_text[0...token.char_no]
+          if leading_spaces && !leading_spaces.empty? && leading_spaces.match?(/\A\s+\z/)
+            space_token = Token.new(token.line_no, 0, :on_sp, leading_spaces, prev_token.state)
+            result << space_token
+          end
+        end
+      end
+
+      result << token
+
+      next_token = tokens[i + 1]
+      current_end_col = token.char_no + token.text.length
+
+      # Insert space tokens for gaps between tokens on the same line
+      if next_token && next_token.line_no == token.line_no && current_end_col < next_token.char_no
+        space_text = lines[token.line_no - 1][current_end_col...next_token.char_no]
+        if space_text && !space_text.empty?
+          space_token = Token.new(token.line_no, current_end_col, :on_sp, space_text, token.state)
+          result << space_token
+        end
+      # Handle backslash-newline line continuations for proper display
+      elsif next_token && next_token.line_no > token.line_no
+        rest_of_line = lines[token.line_no - 1][current_end_col..-1]
+        if rest_of_line&.match?(/\A\s*\\\n?\z/)
+          # Insert space tokens for whitespace and backslash-newline
+          space_token = Token.new(token.line_no, current_end_col, :on_sp, rest_of_line, token.state)
+          result << space_token
+        end
+      end
+
+      prev_token = token
+    end
+
+    result
   end
 end