diff --git a/lib/irb.rb b/lib/irb.rb index fd0bfe35c..39e4d74be 100644 --- a/lib/irb.rb +++ b/lib/irb.rb @@ -5,6 +5,7 @@ # by Keiju ISHITSUKA(keiju@ruby-lang.org) # +require "prism" require "ripper" require "reline" @@ -314,15 +315,13 @@ def configure_io end if @context.io.respond_to?(:dynamic_prompt) @context.io.dynamic_prompt do |lines| - tokens = RubyLex.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, local_variables: @context.local_variables) - line_results = IRB::NestingParser.parse_by_line(tokens) + code = lines.map{ |l| l + "\n" }.join + tokens = RubyLex.ripper_lex_without_warning(code, local_variables: @context.local_variables) + parse_lex_result = Prism.parse_lex(code, scopes: [@context.local_variables]) + line_results = IRB::NestingParser.parse_by_line(parse_lex_result) tokens_until_line = [] - line_results.map.with_index do |(line_tokens, _prev_opens, next_opens, _min_depth), line_num_offset| - line_tokens.each do |token, _s| - # Avoid appending duplicated token. Tokens that include "n" like multiline - # tstring_content can exist in multiple lines. - tokens_until_line << token if token != tokens_until_line.last - end + line_results.map.with_index do |(_prev_opens, next_opens, _min_depth), line_num_offset| + tokens_until_line << tokens.shift while !tokens.empty? && tokens.first.pos[0] <= line_num_offset + 1 continue = @scanner.should_continue?(tokens_until_line) generate_prompt(next_opens, continue, line_num_offset) end @@ -336,7 +335,8 @@ def configure_io code = lines[0..line_index].map { |l| "#{l}\n" }.join tokens = RubyLex.ripper_lex_without_warning(code, local_variables: @context.local_variables) - @scanner.process_indent_level(tokens, lines, line_index, is_newline) + parse_lex_result = Prism.parse_lex(code, scopes: [@context.local_variables]) + @scanner.process_indent_level(tokens, parse_lex_result, lines, line_index, is_newline) end end end diff --git a/lib/irb/nesting_parser.rb b/lib/irb/nesting_parser.rb index c1c9a5cc7..86d7215da 100644 --- a/lib/irb/nesting_parser.rb +++ b/lib/irb/nesting_parser.rb @@ -1,238 +1,375 @@ # frozen_string_literal: true + +require 'prism' + module IRB module NestingParser - IGNORE_TOKENS = %i[on_sp on_ignored_nl on_comment on_embdoc_beg on_embdoc on_embdoc_end] + NestingElem = Struct.new(:pos, :event, :tok) - class << self - # Scan each token and call the given block with array of token and other information for parsing - def scan_opens(tokens) - opens = [] - pending_heredocs = [] - first_token_on_line = true - tokens.each do |t| - skip = false - last_tok, state, args = opens.last - case state - when :in_alias_undef - skip = t.event == :on_kw - when :in_unquoted_symbol - unless IGNORE_TOKENS.include?(t.event) - opens.pop - skip = true - end - when :in_lambda_head - opens.pop if t.event == :on_tlambeg || (t.event == :on_kw && t.tok == 'do') - when :in_method_head - unless IGNORE_TOKENS.include?(t.event) - next_args = [] - body = nil - if args.include?(:receiver) - case t.event - when :on_lparen, :on_ivar, :on_gvar, :on_cvar - # def (receiver). | def @ivar. | def $gvar. | def @@cvar. - next_args << :dot - when :on_kw - case t.tok - when 'self', 'true', 'false', 'nil' - # def self(arg) | def self. - next_args.push(:arg, :dot) - else - # def if(arg) - skip = true - next_args << :arg - end - when :on_op, :on_backtick - # def +(arg) - skip = true - next_args << :arg - when :on_ident, :on_const - # def a(arg) | def a. - next_args.push(:arg, :dot) - end - end - if args.include?(:dot) - # def receiver.name - next_args << :name if t.event == :on_period || (t.event == :on_op && t.tok == '::') - end - if args.include?(:name) - if %i[on_ident on_const on_op on_kw on_backtick].include?(t.event) - # def name(arg) | def receiver.name(arg) - next_args << :arg - skip = true - end - end - if args.include?(:arg) - case t.event - when :on_nl, :on_semicolon - # def receiver.f; - body = :normal - when :on_lparen - # def receiver.f() - next_args << :eq - else - if t.event == :on_op && t.tok == '=' - # def receiver.f = - body = :oneliner - else - # def receiver.f arg - next_args << :arg_without_paren - end - end - end - if args.include?(:eq) - if t.event == :on_op && t.tok == '=' - body = :oneliner - else - body = :normal - end - end - if args.include?(:arg_without_paren) - if %i[on_semicolon on_nl].include?(t.event) - # def f a; - body = :normal - else - # def f a, b - next_args << :arg_without_paren - end - end - if body == :oneliner - opens.pop - elsif body - opens[-1] = [last_tok, nil] - else - opens[-1] = [last_tok, :in_method_head, next_args] - end - end - when :in_for_while_until_condition - if t.event == :on_semicolon || t.event == :on_nl || (t.event == :on_kw && t.tok == 'do') - skip = true if t.event == :on_kw && t.tok == 'do' - opens[-1] = [last_tok, nil] - end - end + class NestingVisitor < Prism::Visitor + def initialize + @lines = [] + @heredocs = [] + end - unless skip - case t.event - when :on_kw - case t.tok - when 'begin', 'class', 'module', 'do', 'case' - opens << [t, nil] - when 'end' - opens.pop - when 'def' - opens << [t, :in_method_head, [:receiver, :name]] - when 'if', 'unless' - unless t.state.allbits?(Ripper::EXPR_LABEL) - opens << [t, nil] - end - when 'while', 'until' - unless t.state.allbits?(Ripper::EXPR_LABEL) - opens << [t, :in_for_while_until_condition] - end - when 'ensure', 'rescue' - unless t.state.allbits?(Ripper::EXPR_LABEL) - opens.pop - opens << [t, nil] - end - when 'alias' - opens << [t, :in_alias_undef, 2] - when 'undef' - opens << [t, :in_alias_undef, 1] - when 'elsif', 'else', 'when' - opens.pop - opens << [t, nil] - when 'for' - opens << [t, :in_for_while_until_condition] - when 'in' - if last_tok&.event == :on_kw && %w[case in].include?(last_tok.tok) && first_token_on_line - opens.pop - opens << [t, nil] - end - end - when :on_tlambda - opens << [t, :in_lambda_head] - when :on_lparen, :on_lbracket, :on_lbrace, :on_tlambeg, :on_embexpr_beg, :on_embdoc_beg - opens << [t, nil] - when :on_rparen, :on_rbracket, :on_rbrace, :on_embexpr_end, :on_embdoc_end - opens.pop - when :on_heredoc_beg - pending_heredocs << t - when :on_heredoc_end - opens.pop - when :on_backtick - opens << [t, nil] unless t.state == Ripper::EXPR_ARG - when :on_tstring_beg, :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_regexp_beg - opens << [t, nil] - when :on_tstring_end, :on_regexp_end, :on_label_end - opens.pop - when :on_symbeg - if t.tok == ':' - opens << [t, :in_unquoted_symbol] - else - opens << [t, nil] - end + def nestings + size = [@lines.size, @heredocs.size].max + nesting = [] + size.times.map do |line_index| + @lines[line_index]&.sort_by { |col, pri| [col, pri] }&.each do |col, pri, elem| + if elem + nesting << elem + else + nesting.pop end end - if t.event == :on_nl || t.event == :on_semicolon - first_token_on_line = true - elsif t.event != :on_sp - first_token_on_line = false + @heredocs[line_index]&.sort_by { |_node, (_line, col)| col }&.reverse_each do |elem| + nesting << elem end - if pending_heredocs.any? && t.tok.include?("\n") - pending_heredocs.reverse_each { |t| opens << [t, nil] } - pending_heredocs = [] + nesting.dup + end + end + + def heredoc_open(node) + elem = NestingElem.new([node.location.start_line, node.location.start_column], :on_heredoc_beg, node.opening) + (@heredocs[node.location.start_line - 1] ||= []) << elem + end + + def open(line, column, elem) + (@lines[line - 1] ||= []) << [column, +1, elem] + end + + def close(line, column) + (@lines[line - 1] ||= []) << [column, -1] + end + + def modifier_node?(node, keyword_loc) + !(keyword_loc && node.location.start_line == keyword_loc.start_line && node.location.start_column == keyword_loc.start_column) + end + + def open_location(location, type, tok) + open(location.start_line, location.start_column, NestingElem.new([location.start_line, location.start_column], type, tok)) + end + + def close_location(location) + close(location.end_line, location.end_column) + end + + def close_location_start(location) + close(location.start_line, location.start_column) + end + + def close_end_keyword_loc(node) + close_location(node.end_keyword_loc) if node.end_keyword == 'end' + end + + def close_closing_loc(node) + close_location(node.closing_loc) unless node.closing.nil? || node.closing.empty? + end + + def visit_for_node(node) + super + open_location(node.location, :on_kw, 'for') + close_end_keyword_loc(node) + end + + def visit_while_node(node) + super + return if modifier_node?(node, node.keyword_loc) + + open_location(node.location, :on_kw, 'while') + close_closing_loc(node) + end + + def visit_until_node(node) + super + return if modifier_node?(node, node.keyword_loc) + + open_location(node.location, :on_kw, 'until') + close_closing_loc(node) + end + + def visit_if_node(node) + super + return if !node.if_keyword || modifier_node?(node, node.if_keyword_loc) + + open_location(node.location, :on_kw, node.if_keyword) + if node.subsequent + close_location_start(node.subsequent.location) + else + close_end_keyword_loc(node) + end + end + + def visit_unless_node(node) + super + return if modifier_node?(node, node.keyword_loc) + + open_location(node.location, :on_kw, 'unless') + if node.else_clause + close_location_start(node.else_clause.location) + else + close_end_keyword_loc(node) + end + end + + def visit_case_node(node) + super + open_location(node.location, :on_kw, 'case') + if node.else_clause + close_location_start(node.else_clause.location) + else + close_end_keyword_loc(node) + end + end + alias visit_case_match_node visit_case_node + + def visit_when_node(node) + super + close_location_start(node.location) + open_location(node.location, :on_kw, 'when') + end + + def visit_in_node(node) + super + close_location_start(node.location) + open_location(node.location, :on_kw, 'in') + end + + def visit_else_node(node) + super + if node.else_keyword == 'else' + open_location(node.location, :on_kw, 'else') + close_end_keyword_loc(node) + end + end + + def visit_ensure_node(node) + super + return if modifier_node?(node, node.ensure_keyword_loc) + + close_location_start(node.location) + open_location(node.location, :on_kw, 'ensure') + end + + def visit_rescue_node(node) + super + return if modifier_node?(node, node.keyword_loc) + + close_location_start(node.location) + open_location(node.location, :on_kw, 'rescue') + end + + def visit_begin_node(node) + super + if node.begin_keyword + open_location(node.location, :on_kw, 'begin') + close_end_keyword_loc(node) + end + end + + def visit_block_node(node) + super + open_location(node.location, node.opening == '{' ? :on_lbrace : :on_kw, node.opening) + close_closing_loc(node) + end + + def visit_array_node(node) + super + type = + case node.opening + when nil + # `x = 1, 2` doesn't have opening + nil + when '[' + :bracket + when /\A%W/ + :on_words_beg + when /\A%w/ + :on_qwords_beg + when /\A%I/ + :on_symbols_beg + when /\A%i/ + :on_qsymbols_beg + end + + if type + open_location(node.location, type, node.opening) + close_closing_loc(node) + end + end + + def visit_hash_node(node) + super + open_location(node.location, :on_lbrace, '{') + close_closing_loc(node) + end + + def heredoc_string_like(node, type) + if node.opening&.start_with?('<<') + heredoc_open(node) + # Heredoc closing contains trailing newline. We need to exclude it + close_location_start(node.closing_loc) unless node.closing.empty? + elsif node.opening + open_location(node.location, type, node.opening) + if node.closing && node.closing != '' + # Closing of `"#{\n` is "\n". We need to treat it as not-closed. + close_location_start(node.closing_loc) if node.opening.match?(/\n\z/) || node.closing != "\n" end - if opens.last && opens.last[1] == :in_alias_undef && !IGNORE_TOKENS.include?(t.event) && t.event != :on_heredoc_end - tok, state, arg = opens.pop - opens << [tok, state, arg - 1] if arg >= 1 + end + end + + def visit_embedded_statements_node(node) + super + open_location(node.location, :on_embexpr_beg, '#{') + close_closing_loc(node) + end + + def visit_interpolated_string_node(node) + super + heredoc_string_like(node, :on_tstring_beg) + end + alias visit_string_node visit_interpolated_string_node + + def visit_interpolated_x_string_node(node) + super + heredoc_string_like(node, :on_backtick) + end + alias visit_x_string_node visit_interpolated_x_string_node + + def visit_symbol_node(node) + super + unless node.opening.nil? || node.opening.empty? || node.opening == ':' + # :"sym" or %s[sym] + open_location(node.location, :on_symbeg, node.opening) + close_closing_loc(node) + end + end + alias visit_interpolated_symbol_node visit_symbol_node + + def visit_regular_expression_node(node) + super + open_location(node.location, :on_regexp_beg, node.opening) + close_closing_loc(node) + end + alias visit_interpolated_regular_expression_node visit_regular_expression_node + + def visit_parentheses_node(node) + super + open_location(node.location, :on_lparen, '(') + close_closing_loc(node) + end + + def visit_call_node(node) + super + type = + case node.opening + when '(' + :on_lparen + when '[' + :on_lbracket end - yield t, opens if block_given? + + if type + open_location(node.opening_loc, type, node.opening) + close_location(node.closing_loc) unless node.closing.empty? + end + end + + def visit_block_parameters_node(node) + super + if node.opening == '(' + open_location(node.location, :on_lparen, '(') + close_closing_loc(node) + end + end + + def visit_lambda_node(node) + super + open_location(node.opening_loc, :on_tlambeg, node.opening) + close_location(node.closing_loc) unless node.closing.empty? + end + + def visit_super_node(node) + super + if node.lparen + open_location(node.lparen_loc, :on_lparen, '(') + close_location(node.rparen_loc) if node.rparen == ')' + end + end + alias visit_yield_node visit_super_node + alias visit_defined_node visit_super_node + + def visit_def_node(node) + super + open_location(node.location, :on_kw, 'def') + if node.lparen == '(' + open_location(node.lparen_loc, :on_lparen, '(') + close_location(node.rparen_loc) if node.rparen == ')' end - opens.map(&:first) + pending_heredocs.reverse + if node.equal + close_location(node.equal_loc) + else + close_end_keyword_loc(node) + end + end + + def visit_class_node(node) + super + open_location(node.location, :on_kw, 'class') + close_end_keyword_loc(node) + end + alias visit_singleton_class_node visit_class_node + + def visit_module_node(node) + super + open_location(node.location, :on_kw, 'module') + close_end_keyword_loc(node) end + end + + class << self - def open_tokens(tokens) - # scan_opens without block will return a list of open tokens at last token position - scan_opens(tokens) + # Return a list of open nestings at last token position + def open_nestings(parse_lex_result) + parse_by_line(parse_lex_result).last[1] end - # Calculates token information [line_tokens, prev_opens, next_opens, min_depth] for each line. + # Calculates nesting information [prev_opens, next_opens, min_depth] for each line. # Example code # ["hello # world"+( # First line - # line_tokens: [[lbracket, '['], [tstring_beg, '"'], [tstring_content("hello\nworld"), "hello\n"]] - # prev_opens: [] - # next_tokens: [lbracket, tstring_beg] - # min_depth: 0 (minimum at beginning of line) + # prev_opens: [] + # next_opens: [lbracket, tstring_beg] + # min_depth: 0 (minimum at beginning of line) # Second line - # line_tokens: [[tstring_content("hello\nworld"), "world"], [tstring_end, '"'], [op, '+'], [lparen, '(']] - # prev_opens: [lbracket, tstring_beg] - # next_tokens: [lbracket, lparen] - # min_depth: 1 (minimum just after tstring_end) - def parse_by_line(tokens) - line_tokens = [] - prev_opens = [] - min_depth = 0 - output = [] - last_opens = scan_opens(tokens) do |t, opens| - depth = t == opens.last&.first ? opens.size - 1 : opens.size - min_depth = depth if depth < min_depth - if t.tok.include?("\n") - t.tok.each_line do |line| - line_tokens << [t, line] - next if line[-1] != "\n" - next_opens = opens.map(&:first) - output << [line_tokens, prev_opens, next_opens, min_depth] - prev_opens = next_opens - min_depth = prev_opens.size - line_tokens = [] - end - else - line_tokens << [t, t.tok] + # prev_opens: [lbracket, tstring_beg] + # next_opens: [lbracket, lparen] + # min_depth: 1 (minimum just after tstring_end) + + def parse_by_line(parse_lex_result) + visitor = NestingVisitor.new + node, tokens = parse_lex_result.value + node.accept(visitor) + tokens.each do |token,| + case token.type + when :EMBDOC_BEGIN + visitor.open_location(token.location, :on_embdoc_beg, '=begin') + when :EMBDOC_END + visitor.close_location_start(token.location) end end - output << [line_tokens, prev_opens, last_opens, min_depth] if line_tokens.any? - output + nestings = visitor.nestings + last_nesting = nestings.last || [] + + num_lines = parse_lex_result.source.source.lines.size + num_lines.times.map do |i| + prev_opens = i == 0 ? [] : nestings[i - 1] || last_nesting + opens = nestings[i] || last_nesting + min_depth = prev_opens.zip(opens).take_while { |s, e| s == e }.size + [prev_opens, opens, min_depth] + end end end end diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index dd4a8d060..35dd1bfc8 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -4,6 +4,7 @@ # by Keiju ISHITSUKA(keiju@ruby-lang.org) # +require "prism" require "ripper" require "jruby" if RUBY_ENGINE == "jruby" require_relative "nesting_parser" @@ -170,7 +171,7 @@ def ripper_lex_without_warning(code, local_variables: []) def check_code_state(code, local_variables:) tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables) - opens = NestingParser.open_tokens(tokens) + opens = NestingParser.open_nestings(Prism.parse_lex(code, scopes: [local_variables])) [tokens, opens, code_terminated?(code, tokens, opens, local_variables: local_variables)] end @@ -339,7 +340,7 @@ def free_indent_token?(token) # Calculates the difference of pasted code's indent and indent calculated from tokens def indent_difference(lines, line_results, line_index) loop do - _tokens, prev_opens, _next_opens, min_depth = line_results[line_index] + prev_opens, _next_opens, min_depth = line_results[line_index] open_token = prev_opens.last if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token)) # If the leading whitespace is an indent, return the difference @@ -356,14 +357,14 @@ def indent_difference(lines, line_results, line_index) end end - def process_indent_level(tokens, lines, line_index, is_newline) - line_results = NestingParser.parse_by_line(tokens) + def process_indent_level(tokens, parse_lex_result, lines, line_index, is_newline) + line_results = NestingParser.parse_by_line(parse_lex_result) result = line_results[line_index] if result - _tokens, prev_opens, next_opens, min_depth = result + prev_opens, next_opens, min_depth = result else # When last line is empty - prev_opens = next_opens = line_results.last[2] + prev_opens = next_opens = line_results.last[1] min_depth = next_opens.size end @@ -405,7 +406,7 @@ def process_indent_level(tokens, lines, line_index, is_newline) elsif prev_open_token&.event == :on_heredoc_beg tok = prev_open_token.tok if prev_opens.size <= next_opens.size - if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token + if is_newline && lines[line_index].empty? && line_results[line_index - 1][0].last != next_open_token # First line in heredoc tok.match?(/^<<[-~]/) ? base_indent + indent : indent elsif tok.match?(/^<<~/) @@ -485,7 +486,7 @@ def check_termination_in_prev_line(code, local_variables:) if first_token && first_token.state != Ripper::EXPR_DOT tokens_without_last_line = tokens[0..index] code_without_last_line = tokens_without_last_line.map(&:tok).join - opens_without_last_line = NestingParser.open_tokens(tokens_without_last_line) + opens_without_last_line = NestingParser.open_nestings(Prism.parse_lex(code_without_last_line, scopes: [local_variables])) if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line, local_variables: local_variables) return last_line_tokens.map(&:tok).join end diff --git a/test/irb/test_irb.rb b/test/irb/test_irb.rb index d687ca9af..cb83bd995 100644 --- a/test/irb/test_irb.rb +++ b/test/irb/test_irb.rb @@ -604,18 +604,17 @@ def test_pasted_code_keep_base_indent_spaces_with_heredoc [%q( [1), 10, 12, 3], [%q( ]+[["a), 10, 14, 4], [%q(b" + <<~A + <<-B + <