Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Steepfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ target :lib do
# TODO: Type-checking these files is still WIP
ignore "lib/prism/desugar_compiler.rb"
ignore "lib/prism/lex_compat.rb"
ignore "lib/prism/lex_ripper.rb"
ignore "lib/prism/serialize.rb"
ignore "lib/prism/ffi.rb"
ignore "lib/prism/translation"
Expand Down
2 changes: 1 addition & 1 deletion lib/prism.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module Prism
autoload :DSL, "prism/dsl"
autoload :InspectVisitor, "prism/inspect_visitor"
autoload :LexCompat, "prism/lex_compat"
autoload :LexRipper, "prism/lex_compat"
autoload :LexRipper, "prism/lex_ripper"
autoload :MutationCompiler, "prism/mutation_compiler"
autoload :Pack, "prism/pack"
autoload :Pattern, "prism/pattern"
Expand Down
144 changes: 69 additions & 75 deletions lib/prism/lex_compat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# :markup: markdown

require "delegate"
require "ripper"

module Prism
# This class is responsible for lexing the source using prism and then
Expand Down Expand Up @@ -199,6 +198,58 @@ def deconstruct_keys(keys)
"__END__": :on___end__
}.freeze

# Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states
# to reimplement to_s without using Ripper.
class State
# Ripper-internal bitflags.
ALL = %i[
BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
].map.with_index.to_h { |name, i| [2 ** i, name] }
ALL[0] = :NONE
ALL.freeze
ALL.each { |value, name| const_set(name, value) }

# :stopdoc:

attr_reader :to_int, :to_s

def initialize(i)
@to_int = i
@to_s = state_name(i)
freeze
end

def [](index)
case index
when 0, :to_int
@to_int
when 1, :to_s
@to_s
else
nil
end
end

alias to_i to_int
alias inspect to_s
def pretty_print(q) q.text(to_s) end
def ==(i) super or to_int == i end
def &(i) self.class.new(to_int & i) end
def |(i) self.class.new(to_int | i) end
def allbits?(i) to_int.allbits?(i) end
def anybits?(i) to_int.anybits?(i) end
def nobits?(i) to_int.nobits?(i) end

# :startdoc:

private

# Convert the state flags into the format exposed by ripper.
def state_name(bits)
ALL.filter_map { |flag, name| name if bits & flag != 0 }.join("|")
end
end

# When we produce tokens, we produce the same arrays that Ripper does.
# However, we add a couple of convenience methods onto them to make them a
# little easier to work with. We delegate all other methods to the array.
Expand Down Expand Up @@ -249,8 +300,8 @@ def ==(other) # :nodoc:
class IdentToken < Token
def ==(other) # :nodoc:
(self[0...-1] == other[0...-1]) && (
(other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
(other[3] & Ripper::EXPR_ARG_ANY != 0)
(other[3] == State::LABEL | State::END) ||
(other[3] & (State::ARG | State::CMDARG) != 0)
)
end
end
Expand All @@ -261,8 +312,8 @@ class IgnoredNewlineToken < Token
def ==(other) # :nodoc:
return false unless self[0...-1] == other[0...-1]

if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
if self[3] == State::ARG | State::LABELED
other[3] & State::ARG | State::LABELED != 0
else
self[3] == other[3]
end
Expand All @@ -280,8 +331,8 @@ def ==(other) # :nodoc:
class ParamToken < Token
def ==(other) # :nodoc:
(self[0...-1] == other[0...-1]) && (
(other[3] == Ripper::EXPR_END) ||
(other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
(other[3] == State::END) ||
(other[3] == State::END | State::LABEL)
)
end
end
Expand Down Expand Up @@ -615,6 +666,11 @@ def self.build(opening)

private_constant :Heredoc

# In previous versions of Ruby, Ripper wouldn't flush the bom before the
# first token, so we had to have a hack in place to account for that.
BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
private_constant :BOM_FLUSHED

attr_reader :source, :options

def initialize(source, **options)
Expand All @@ -630,13 +686,9 @@ def result

result = Prism.lex(source, **options)
result_value = result.value
previous_state = nil #: Ripper::Lexer::State?
previous_state = nil #: State?
last_heredoc_end = nil #: Integer?

# In previous versions of Ruby, Ripper wouldn't flush the bom before the
# first token, so we had to have a hack in place to account for that. This
# checks for that behavior.
bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
bom = source.byteslice(0..2) == "\xEF\xBB\xBF"

result_value.each_with_index do |(token, lex_state), index|
Expand All @@ -651,7 +703,7 @@ def result
if bom && lineno == 1
column -= 3

if index == 0 && column == 0 && !bom_flushed
if index == 0 && column == 0 && !BOM_FLUSHED
flushed =
case token.type
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
Expand All @@ -675,7 +727,7 @@ def result

event = RIPPER.fetch(token.type)
value = token.value
lex_state = Ripper::Lexer::State.new(lex_state)
lex_state = State.new(lex_state)

token =
case event
Expand All @@ -689,7 +741,7 @@ def result
last_heredoc_end = token.location.end_offset
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
when :on_ident
if lex_state == Ripper::EXPR_END
if lex_state == State::END
# If we have an identifier that follows a method name like:
#
# def foo bar
Expand All @@ -699,7 +751,7 @@ def result
# yet. We do this more accurately, so we need to allow comparing
# against both END and END|LABEL.
ParamToken.new([[lineno, column], event, value, lex_state])
elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
elsif lex_state == State::END | State::LABEL
# In the event that we're comparing identifiers, we're going to
# allow a little divergence. Ripper doesn't account for local
# variables introduced through named captures in regexes, and we
Expand Down Expand Up @@ -739,7 +791,7 @@ def result
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
end

Ripper::Lexer::State.new(result_value[current_index][1])
State.new(result_value[current_index][1])
else
previous_state
end
Expand Down Expand Up @@ -867,62 +919,4 @@ def result
end

private_constant :LexCompat

# This is a class that wraps the Ripper lexer to produce almost exactly the
# same tokens.
class LexRipper # :nodoc:
attr_reader :source

def initialize(source)
@source = source
end

def result
previous = [] #: [[Integer, Integer], Symbol, String, untyped] | []
results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]]

lex(source).each do |token|
case token[1]
when :on_sp
# skip
when :on_tstring_content
if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
previous[2] << token[2]
else
results << token
previous = token
end
when :on_words_sep
if previous[1] == :on_words_sep
previous[2] << token[2]
else
results << token
previous = token
end
else
results << token
previous = token
end
end

results
end

private

if Ripper.method(:lex).parameters.assoc(:keyrest)
def lex(source)
Ripper.lex(source, raise_errors: true)
end
else
def lex(source)
ripper = Ripper::Lexer.new(source)
ripper.lex.tap do |result|
raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
end
end
end
end

private_constant :LexRipper
end
64 changes: 64 additions & 0 deletions lib/prism/lex_ripper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# frozen_string_literal: true
# :markup: markdown

require "ripper"

module Prism
# This is a class that wraps the Ripper lexer to produce almost exactly the
# same tokens.
class LexRipper # :nodoc:
attr_reader :source

def initialize(source)
@source = source
end

def result
previous = [] #: [[Integer, Integer], Symbol, String, untyped] | []
results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]]

lex(source).each do |token|
case token[1]
when :on_sp
# skip
when :on_tstring_content
if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
previous[2] << token[2]
else
results << token
previous = token
end
when :on_words_sep
if previous[1] == :on_words_sep
previous[2] << token[2]
else
results << token
previous = token
end
else
results << token
previous = token
end
end

results
end

private

if Ripper.method(:lex).parameters.assoc(:keyrest)
def lex(source)
Ripper.lex(source, raise_errors: true)
end
else
def lex(source)
ripper = Ripper::Lexer.new(source)
ripper.lex.tap do |result|
raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
end
end
end
end

private_constant :LexRipper
end
2 changes: 0 additions & 2 deletions lib/prism/translation/ripper.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# frozen_string_literal: true
# :markup: markdown

require "ripper"

module Prism
module Translation
# This class provides a compatibility layer between prism and Ripper. It
Expand Down
1 change: 1 addition & 0 deletions prism.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ Gem::Specification.new do |spec|
"lib/prism/ffi.rb",
"lib/prism/inspect_visitor.rb",
"lib/prism/lex_compat.rb",
"lib/prism/lex_ripper.rb",
"lib/prism/mutation_compiler.rb",
"lib/prism/node_ext.rb",
"lib/prism/node.rb",
Expand Down
4 changes: 0 additions & 4 deletions rakelib/lex.rake
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ TARGETS.each do |name, target|
desc "Lex #{repo} and compare with lex_compat"
task "lex:#{name}" => [dirpath, :compile] do
$:.unshift(File.expand_path("../lib", __dir__))
require "ripper"
require "prism"

plain_text = ENV.fetch("CI", false)
Expand Down Expand Up @@ -169,7 +168,6 @@ end
desc "Lex files and compare with lex_compat"
task lex: :compile do
$:.unshift(File.expand_path("../lib", __dir__))
require "ripper"
require "prism"

plain_text = ENV.fetch("CI", false)
Expand Down Expand Up @@ -201,7 +199,6 @@ desc "Lex against the most recent version of various rubygems"
task "lex:rubygems": [:compile, "tmp/failing"] do
$:.unshift(File.expand_path("../lib", __dir__))
require "net/http"
require "ripper"
require "rubygems/package"
require "tmpdir"
require "prism"
Expand Down Expand Up @@ -333,7 +330,6 @@ desc "Lex against the top 100 rubygems"
task "lex:topgems": ["download:topgems", :compile] do
$:.unshift(File.expand_path("../lib", __dir__))
require "net/http"
require "ripper"
require "rubygems/package"
require "tmpdir"
require "prism"
Expand Down
1 change: 1 addition & 0 deletions rakelib/typecheck.rake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace :typecheck do
File.write("sorbet/typed_overrides.yml", ERB.new(<<~YAML, trim_mode: "-").result_with_hash(locals))
false:
- ./lib/prism/lex_compat.rb
- ./lib/prism/lex_ripper.rb
- ./lib/prism/node_ext.rb
- ./lib/prism/parse_result.rb
- ./lib/prism/visitor.rb
Expand Down
1 change: 1 addition & 0 deletions test/prism/magic_comment_test.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# frozen_string_literal: true

require_relative "test_helper"
require "ripper"

module Prism
class MagicCommentTest < TestCase
Expand Down
Loading