@@ -226,7 +226,7 @@ def state
226226 end
227227
228228 # Tokens where state should be ignored
229- # used for :on_comment, :on_heredoc_end, :on_embexpr_end
229+ # used for :on_sp, : on_comment, :on_heredoc_end, :on_embexpr_end
230230 class IgnoreStateToken < Token
231231 def ==( other ) # :nodoc:
232232 self [ 0 ...-1 ] == other [ 0 ...-1 ]
@@ -611,10 +611,10 @@ def self.build(opening)
611611 BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
612612 private_constant :BOM_FLUSHED
613613
614- attr_reader :source , : options
614+ attr_reader :options
615615
616- def initialize ( source , **options )
617- @source = source
616+ def initialize ( code , **options )
617+ @code = code
618618 @options = options
619619 end
620620
@@ -624,12 +624,13 @@ def result
624624 state = :default
625625 heredoc_stack = [ [ ] ] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
626626
627- result = Prism . lex ( source , **options )
627+ result = Prism . lex ( @code , **options )
628+ source = result . source
628629 result_value = result . value
629630 previous_state = nil #: State?
630631 last_heredoc_end = nil #: Integer?
631632
632- bom = source . byteslice ( 0 ..2 ) == "\xEF \xBB \xBF "
633+ bom = source . slice ( 0 ..2 ) == "\xEF \xBB \xBF "
633634
634635 result_value . each_with_index do |( token , lex_state ) , index |
635636 lineno = token . location . start_line
@@ -763,7 +764,7 @@ def result
763764 end_offset += 3
764765 end
765766
766- tokens << Token . new ( [ [ lineno , 0 ] , :on_nl , source . byteslice ( start_offset ...end_offset ) , lex_state ] )
767+ tokens << Token . new ( [ [ lineno , 0 ] , :on_nl , source . slice ( start_offset ...end_offset ) , lex_state ] )
767768 end
768769 end
769770
@@ -857,7 +858,88 @@ def result
857858 # We sort by location to compare against Ripper's output
858859 tokens . sort_by! ( &:location )
859860
860- Result . new ( tokens , result . comments , result . magic_comments , result . data_loc , result . errors , result . warnings , Source . for ( source ) )
861+ # Add :on_sp tokens
862+ tokens = add_on_sp_tokens ( tokens , source , result . data_loc , bom )
863+
864+ Result . new ( tokens , result . comments , result . magic_comments , result . data_loc , result . errors , result . warnings , source )
865+ end
866+
867+ def add_on_sp_tokens ( tokens , source , data_loc , bom )
868+ new_tokens = [ ]
869+
870+ prev_token_state = Translation ::Ripper ::Lexer ::State . new ( Translation ::Ripper ::EXPR_BEG )
871+ prev_token_end = 0
872+
873+ tokens . each do |token |
874+ first_token = new_tokens . empty?
875+ line , column = token . location
876+ start_offset = source . line_to_byte_offset ( line ) + column
877+
878+ if start_offset > prev_token_end
879+ sp_value = source . slice ( prev_token_end , start_offset - prev_token_end )
880+ sp_line = source . line ( prev_token_end )
881+ sp_column = source . column ( prev_token_end )
882+ continuation_index = sp_value . byteindex ( "\\ " )
883+
884+ # ripper emits up to three :on_sp tokens when line continuations are used
885+ if continuation_index
886+ next_whitespace_index = continuation_index + 1
887+ next_whitespace_index += 1 if sp_value . byteslice ( next_whitespace_index ) == "\r "
888+ next_whitespace_index += 1
889+ first_whitespace = sp_value [ 0 ...continuation_index ]
890+ continuation = sp_value [ continuation_index ...next_whitespace_index ]
891+ second_whitespace = sp_value [ next_whitespace_index ..]
892+
893+ new_tokens << IgnoreStateToken . new ( [
894+ [ sp_line , sp_column ] ,
895+ :on_sp ,
896+ first_whitespace ,
897+ prev_token_state
898+ ] ) unless first_whitespace . empty?
899+
900+ new_tokens << IgnoreStateToken . new ( [
901+ [ sp_line , sp_column + continuation_index ] ,
902+ :on_sp ,
903+ continuation ,
904+ prev_token_state
905+ ] )
906+
907+ new_tokens << IgnoreStateToken . new ( [
908+ [ sp_line + 1 , 0 ] ,
909+ :on_sp ,
910+ second_whitespace ,
911+ prev_token_state
912+ ] ) unless second_whitespace . empty?
913+ else
914+ new_tokens << IgnoreStateToken . new ( [
915+ [ sp_line , sp_column ] ,
916+ :on_sp ,
917+ sp_value ,
918+ prev_token_state
919+ ] )
920+ end
921+ end
922+
923+ new_tokens << token
924+ prev_token_state = token . state
925+ prev_token_end = start_offset + token . value . bytesize
926+ prev_token_end += 3 if first_token && bom
927+ end
928+
929+ unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
930+ end_offset = source . source . bytesize
931+ end_offset -= 3 if bom
932+ if prev_token_end < end_offset
933+ new_tokens << IgnoreStateToken . new ( [
934+ [ source . line ( prev_token_end ) , source . column ( prev_token_end ) ] ,
935+ :on_sp ,
936+ source . slice ( prev_token_end , end_offset - prev_token_end ) ,
937+ prev_token_state
938+ ] )
939+ end
940+ end
941+
942+ new_tokens
861943 end
862944 end
863945
0 commit comments