Skip to content

Commit 7841970

Browse files
committed
Decouple ripper translator from ripper library
Ripper exposes Ripper::Lexer:State in its output, which is a bit of a problem. To make this work, I basically copy-pasted the implementation. I'm unsure if that is acceptable and added a test to make sure that these values never go out of sync. I don't imagine them changing often, prism maps them 1:1 for its own usage. This also fixed the shim by accident. `Ripper.lex` went to `Translation::Ripper.lex` when it should have been the original. Removing the need for the original resolves that issue.
1 parent fb9caf1 commit 7841970

File tree

2 files changed

+81
-17
lines changed

2 files changed

+81
-17
lines changed

lib/prism/lex_compat.rb

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# :markup: markdown
33

44
require "delegate"
5-
require "ripper"
65

76
module Prism
87
# This class is responsible for lexing the source using prism and then
@@ -199,6 +198,58 @@ def deconstruct_keys(keys)
199198
"__END__": :on___end__
200199
}.freeze
201200

201+
# Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states
202+
# to reimplement to_s without using Ripper.
203+
class State
204+
# Ripper-internal bitflags.
205+
ALL = %i[
206+
BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
207+
].map.with_index.to_h { |name, i| [2 ** i, name] }
208+
ALL[0] = :NONE
209+
ALL.freeze
210+
ALL.each { |value, name| const_set(name, value) }
211+
212+
# :stopdoc:
213+
214+
attr_reader :to_int, :to_s
215+
216+
def initialize(i)
217+
@to_int = i
218+
@to_s = state_name(i)
219+
freeze
220+
end
221+
222+
def [](index)
223+
case index
224+
when 0, :to_int
225+
@to_int
226+
when 1, :to_s
227+
@to_s
228+
else
229+
nil
230+
end
231+
end
232+
233+
alias to_i to_int
234+
alias inspect to_s
235+
def pretty_print(q) q.text(to_s) end
236+
def ==(i) super or to_int == i end
237+
def &(i) self.class.new(to_int & i) end
238+
def |(i) self.class.new(to_int | i) end
239+
def allbits?(i) to_int.allbits?(i) end
240+
def anybits?(i) to_int.anybits?(i) end
241+
def nobits?(i) to_int.nobits?(i) end
242+
243+
# :startdoc:
244+
245+
private
246+
247+
# Convert the state flags into the format exposed by ripper.
248+
def state_name(bits)
249+
ALL.filter_map { |flag, name| name if bits & flag != 0 }.join("|")
250+
end
251+
end
252+
202253
# When we produce tokens, we produce the same arrays that Ripper does.
203254
# However, we add a couple of convenience methods onto them to make them a
204255
# little easier to work with. We delegate all other methods to the array.
@@ -249,8 +300,8 @@ def ==(other) # :nodoc:
249300
class IdentToken < Token
250301
def ==(other) # :nodoc:
251302
(self[0...-1] == other[0...-1]) && (
252-
(other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
253-
(other[3] & Ripper::EXPR_ARG_ANY != 0)
303+
(other[3] == State::LABEL | State::END) ||
304+
(other[3] & (State::ARG | State::CMDARG) != 0)
254305
)
255306
end
256307
end
@@ -261,8 +312,8 @@ class IgnoredNewlineToken < Token
261312
def ==(other) # :nodoc:
262313
return false unless self[0...-1] == other[0...-1]
263314

264-
if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
265-
other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
315+
if self[3] == State::ARG | State::LABELED
316+
other[3] & State::ARG | State::LABELED != 0
266317
else
267318
self[3] == other[3]
268319
end
@@ -280,8 +331,8 @@ def ==(other) # :nodoc:
280331
class ParamToken < Token
281332
def ==(other) # :nodoc:
282333
(self[0...-1] == other[0...-1]) && (
283-
(other[3] == Ripper::EXPR_END) ||
284-
(other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
334+
(other[3] == State::END) ||
335+
(other[3] == State::END | State::LABEL)
285336
)
286337
end
287338
end
@@ -615,6 +666,11 @@ def self.build(opening)
615666

616667
private_constant :Heredoc
617668

669+
# In previous versions of Ruby, Ripper wouldn't flush the bom before the
670+
# first token, so we had to have a hack in place to account for that.
671+
BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
672+
private_constant :BOM_FLUSHED
673+
618674
attr_reader :source, :options
619675

620676
def initialize(source, **options)
@@ -630,13 +686,9 @@ def result
630686

631687
result = Prism.lex(source, **options)
632688
result_value = result.value
633-
previous_state = nil #: Ripper::Lexer::State?
689+
previous_state = nil #: State?
634690
last_heredoc_end = nil #: Integer?
635691

636-
# In previous versions of Ruby, Ripper wouldn't flush the bom before the
637-
# first token, so we had to have a hack in place to account for that. This
638-
# checks for that behavior.
639-
bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
640692
bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
641693

642694
result_value.each_with_index do |(token, lex_state), index|
@@ -651,7 +703,7 @@ def result
651703
if bom && lineno == 1
652704
column -= 3
653705

654-
if index == 0 && column == 0 && !bom_flushed
706+
if index == 0 && column == 0 && !BOM_FLUSHED
655707
flushed =
656708
case token.type
657709
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
@@ -675,7 +727,7 @@ def result
675727

676728
event = RIPPER.fetch(token.type)
677729
value = token.value
678-
lex_state = Ripper::Lexer::State.new(lex_state)
730+
lex_state = State.new(lex_state)
679731

680732
token =
681733
case event
@@ -689,7 +741,7 @@ def result
689741
last_heredoc_end = token.location.end_offset
690742
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
691743
when :on_ident
692-
if lex_state == Ripper::EXPR_END
744+
if lex_state == State::END
693745
# If we have an identifier that follows a method name like:
694746
#
695747
# def foo bar
@@ -699,7 +751,7 @@ def result
699751
# yet. We do this more accurately, so we need to allow comparing
700752
# against both END and END|LABEL.
701753
ParamToken.new([[lineno, column], event, value, lex_state])
702-
elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
754+
elsif lex_state == State::END | State::LABEL
703755
# In the event that we're comparing identifiers, we're going to
704756
# allow a little divergence. Ripper doesn't account for local
705757
# variables introduced through named captures in regexes, and we
@@ -739,7 +791,7 @@ def result
739791
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
740792
end
741793

742-
Ripper::Lexer::State.new(result_value[current_index][1])
794+
State.new(result_value[current_index][1])
743795
else
744796
previous_state
745797
end

test/prism/ruby/ripper_test.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,18 @@ class RipperTest < TestCase
6363
define_method(fixture.test_name) { assert_ripper(fixture.read) }
6464
end
6565

66+
# Check that the hardcoded values don't change without us noticing.
67+
def test_internals
68+
actual = LexCompat::State::ALL
69+
expected = Ripper.constants.select { |name| name.start_with?("EXPR_") }
70+
expected -= %i[EXPR_VALUE EXPR_BEG_ANY EXPR_ARG_ANY EXPR_END_ANY]
71+
72+
assert_equal(expected.size, actual.size)
73+
expected.each do |const_name|
74+
assert_equal(const_name.to_s.delete_prefix("EXPR_").to_sym, actual[Ripper.const_get(const_name)])
75+
end
76+
end
77+
6678
private
6779

6880
def assert_ripper(source)

0 commit comments

Comments
 (0)