ruby · kddnewton · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
diff --git a/Steepfile b/Steepfile
@@ -10,6 +10,7 @@ target :lib do
   # TODO: Type-checking these files is still WIP
   ignore "lib/prism/desugar_compiler.rb"
   ignore "lib/prism/lex_compat.rb"
+  ignore "lib/prism/lex_ripper.rb"
   ignore "lib/prism/serialize.rb"
   ignore "lib/prism/ffi.rb"
   ignore "lib/prism/translation"

diff --git a/lib/prism.rb b/lib/prism.rb
@@ -20,7 +20,7 @@ module Prism
   autoload :DSL, "prism/dsl"
   autoload :InspectVisitor, "prism/inspect_visitor"
   autoload :LexCompat, "prism/lex_compat"
-  autoload :LexRipper, "prism/lex_compat"
+  autoload :LexRipper, "prism/lex_ripper"
   autoload :MutationCompiler, "prism/mutation_compiler"
   autoload :Pack, "prism/pack"
   autoload :Pattern, "prism/pattern"

diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
@@ -2,7 +2,6 @@
 # :markup: markdown
 
 require "delegate"
-require "ripper"
 
 module Prism
   # This class is responsible for lexing the source using prism and then
@@ -199,6 +198,58 @@ def deconstruct_keys(keys)
       "__END__": :on___end__
     }.freeze
 
+    # Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states
+    # to reimplement to_s without using Ripper.
+    class State
+      # Ripper-internal bitflags.
+      ALL = %i[
+        BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
+      ].map.with_index.to_h { |name, i| [2 ** i, name] }
+      ALL[0] = :NONE
+      ALL.freeze
+      ALL.each { |value, name| const_set(name, value) }
+
+      # :stopdoc:
+
+      attr_reader :to_int, :to_s
+
+      def initialize(i)
+        @to_int = i
+        @to_s = state_name(i)
+        freeze
+      end
+
+      def [](index)
+        case index
+        when 0, :to_int
+          @to_int
+        when 1, :to_s
+          @to_s
+        else
+          nil
+        end
+      end
+
+      alias to_i to_int
+      alias inspect to_s
+      def pretty_print(q) q.text(to_s) end
+      def ==(i) super or to_int == i end
+      def &(i) self.class.new(to_int & i) end
+      def |(i) self.class.new(to_int | i) end
+      def allbits?(i) to_int.allbits?(i) end
+      def anybits?(i) to_int.anybits?(i) end
+      def nobits?(i) to_int.nobits?(i) end
+
+      # :startdoc:
+
+      private
+
+      # Convert the state flags into the format exposed by ripper.
+      def state_name(bits)
+        ALL.filter_map { |flag, name| name if bits & flag != 0  }.join("|")
+      end
+    end
+
     # When we produce tokens, we produce the same arrays that Ripper does.
     # However, we add a couple of convenience methods onto them to make them a
     # little easier to work with. We delegate all other methods to the array.
@@ -249,8 +300,8 @@ def ==(other) # :nodoc:
     class IdentToken < Token
       def ==(other) # :nodoc:
         (self[0...-1] == other[0...-1]) && (
-          (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
-          (other[3] & Ripper::EXPR_ARG_ANY != 0)
+          (other[3] == State::LABEL | State::END) ||
+          (other[3] & (State::ARG | State::CMDARG) != 0)
         )
       end
     end
@@ -261,8 +312,8 @@ class IgnoredNewlineToken < Token
       def ==(other) # :nodoc:
         return false unless self[0...-1] == other[0...-1]
 
-        if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
-          other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
+        if self[3] == State::ARG | State::LABELED
+          other[3] & State::ARG | State::LABELED != 0
         else
           self[3] == other[3]
         end
@@ -280,8 +331,8 @@ def ==(other) # :nodoc:
     class ParamToken < Token
       def ==(other) # :nodoc:
         (self[0...-1] == other[0...-1]) && (
-          (other[3] == Ripper::EXPR_END) ||
-          (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
+          (other[3] == State::END) ||
+          (other[3] == State::END | State::LABEL)
         )
       end
     end
@@ -615,6 +666,11 @@ def self.build(opening)
 
     private_constant :Heredoc
 
+    # In previous versions of Ruby, Ripper wouldn't flush the bom before the
+    # first token, so we had to have a hack in place to account for that.
+    BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
+    private_constant :BOM_FLUSHED
+
     attr_reader :source, :options
 
     def initialize(source, **options)
@@ -630,13 +686,9 @@ def result
 
       result = Prism.lex(source, **options)
       result_value = result.value
-      previous_state = nil #: Ripper::Lexer::State?
+      previous_state = nil #: State?
       last_heredoc_end = nil #: Integer?
 
-      # In previous versions of Ruby, Ripper wouldn't flush the bom before the
-      # first token, so we had to have a hack in place to account for that. This
-      # checks for that behavior.
-      bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
       bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
 
       result_value.each_with_index do |(token, lex_state), index|
@@ -651,7 +703,7 @@ def result
         if bom && lineno == 1
           column -= 3
 
-          if index == 0 && column == 0 && !bom_flushed
+          if index == 0 && column == 0 && !BOM_FLUSHED
             flushed =
               case token.type
               when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
@@ -675,7 +727,7 @@ def result
 
         event = RIPPER.fetch(token.type)
         value = token.value
-        lex_state = Ripper::Lexer::State.new(lex_state)
+        lex_state = State.new(lex_state)
 
         token =
           case event
@@ -689,7 +741,7 @@ def result
             last_heredoc_end = token.location.end_offset
             IgnoreStateToken.new([[lineno, column], event, value, lex_state])
           when :on_ident
-            if lex_state == Ripper::EXPR_END
+            if lex_state == State::END
               # If we have an identifier that follows a method name like:
               #
               #     def foo bar
@@ -699,7 +751,7 @@ def result
               # yet. We do this more accurately, so we need to allow comparing
               # against both END and END|LABEL.
               ParamToken.new([[lineno, column], event, value, lex_state])
-            elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
+            elsif lex_state == State::END | State::LABEL
               # In the event that we're comparing identifiers, we're going to
               # allow a little divergence. Ripper doesn't account for local
               # variables introduced through named captures in regexes, and we
@@ -739,7 +791,7 @@ def result
                   counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
                 end
 
-                Ripper::Lexer::State.new(result_value[current_index][1])
+                State.new(result_value[current_index][1])
               else
                 previous_state
               end
@@ -867,62 +919,4 @@ def result
   end
 
   private_constant :LexCompat
-
-  # This is a class that wraps the Ripper lexer to produce almost exactly the
-  # same tokens.
-  class LexRipper # :nodoc:
-    attr_reader :source
-
-    def initialize(source)
-      @source = source
-    end
-
-    def result
-      previous = [] #: [[Integer, Integer], Symbol, String, untyped] | []
-      results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]]
-
-      lex(source).each do |token|
-        case token[1]
-        when :on_sp
-          # skip
-        when :on_tstring_content
-          if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
-            previous[2] << token[2]
-          else
-            results << token
-            previous = token
-          end
-        when :on_words_sep
-          if previous[1] == :on_words_sep
-            previous[2] << token[2]
-          else
-            results << token
-            previous = token
-          end
-        else
-          results << token
-          previous = token
-        end
-      end
-
-      results
-    end
-
-    private
-
-    if Ripper.method(:lex).parameters.assoc(:keyrest)
-      def lex(source)
-        Ripper.lex(source, raise_errors: true)
-      end
-    else
-      def lex(source)
-        ripper = Ripper::Lexer.new(source)
-        ripper.lex.tap do |result|
-          raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
-        end
-      end
-    end
-  end
-
-  private_constant :LexRipper
 end
diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+require "ripper"
+
+module Prism
+  # This is a class that wraps the Ripper lexer to produce almost exactly the
+  # same tokens.
+  class LexRipper # :nodoc:
+    attr_reader :source
+
+    def initialize(source)
+      @source = source
+    end
+
+    def result
+      previous = [] #: [[Integer, Integer], Symbol, String, untyped] | []
+      results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]]
+
+      lex(source).each do |token|
+        case token[1]
+        when :on_sp
+          # skip
+        when :on_tstring_content
+          if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
+            previous[2] << token[2]
+          else
+            results << token
+            previous = token
+          end
+        when :on_words_sep
+          if previous[1] == :on_words_sep
+            previous[2] << token[2]
+          else
+            results << token
+            previous = token
+          end
+        else
+          results << token
+          previous = token
+        end
+      end
+
+      results
+    end
+
+    private
+
+    if Ripper.method(:lex).parameters.assoc(:keyrest)
+      def lex(source)
+        Ripper.lex(source, raise_errors: true)
+      end
+    else
+      def lex(source)
+        ripper = Ripper::Lexer.new(source)
+        ripper.lex.tap do |result|
+          raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
+        end
+      end
+    end
+  end
+
+  private_constant :LexRipper
+end
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
@@ -1,8 +1,6 @@
 # frozen_string_literal: true
 # :markup: markdown
 
-require "ripper"
-
 module Prism
   module Translation
     # This class provides a compatibility layer between prism and Ripper. It

diff --git a/prism.gemspec b/prism.gemspec
@@ -77,6 +77,7 @@ Gem::Specification.new do |spec|
     "lib/prism/ffi.rb",
     "lib/prism/inspect_visitor.rb",
     "lib/prism/lex_compat.rb",
+    "lib/prism/lex_ripper.rb",
     "lib/prism/mutation_compiler.rb",
     "lib/prism/node_ext.rb",
     "lib/prism/node.rb",

diff --git a/rakelib/lex.rake b/rakelib/lex.rake
@@ -126,7 +126,6 @@ TARGETS.each do |name, target|
   desc "Lex #{repo} and compare with lex_compat"
   task "lex:#{name}" => [dirpath, :compile] do
     $:.unshift(File.expand_path("../lib", __dir__))
-    require "ripper"
     require "prism"
 
     plain_text = ENV.fetch("CI", false)
@@ -169,7 +168,6 @@ end
 desc "Lex files and compare with lex_compat"
 task lex: :compile do
   $:.unshift(File.expand_path("../lib", __dir__))
-  require "ripper"
   require "prism"
 
   plain_text = ENV.fetch("CI", false)
@@ -201,7 +199,6 @@ desc "Lex against the most recent version of various rubygems"
 task "lex:rubygems": [:compile, "tmp/failing"] do
   $:.unshift(File.expand_path("../lib", __dir__))
   require "net/http"
-  require "ripper"
   require "rubygems/package"
   require "tmpdir"
   require "prism"
@@ -333,7 +330,6 @@ desc "Lex against the top 100 rubygems"
 task "lex:topgems": ["download:topgems", :compile] do
   $:.unshift(File.expand_path("../lib", __dir__))
   require "net/http"
-  require "ripper"
   require "rubygems/package"
   require "tmpdir"
   require "prism"

diff --git a/rakelib/typecheck.rake b/rakelib/typecheck.rake
@@ -20,6 +20,7 @@ namespace :typecheck do
     File.write("sorbet/typed_overrides.yml", ERB.new(<<~YAML, trim_mode: "-").result_with_hash(locals))
       false:
         - ./lib/prism/lex_compat.rb
+        - ./lib/prism/lex_ripper.rb
         - ./lib/prism/node_ext.rb
         - ./lib/prism/parse_result.rb
         - ./lib/prism/visitor.rb

diff --git a/test/prism/magic_comment_test.rb b/test/prism/magic_comment_test.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require_relative "test_helper"
+require "ripper"
 
 module Prism
   class MagicCommentTest < TestCase