diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 74751d2fe5..05354ab29d 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -200,6 +200,11 @@ class Lexer :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS ] + # Types of tokens that are allowed to continue a method call with comments in-between. + # For these, the parser gem doesn't emit a newline token after the last comment. + COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT] + private_constant :COMMENT_CONTINUATION_TYPES + # Heredocs are complex and require us to keep track of a bit of info to refer to later HeredocData = Struct.new(:identifier, :common_whitespace, keyword_init: true) @@ -233,8 +238,13 @@ def to_a index = 0 length = lexed.length - heredoc_stack = Array.new - quote_stack = Array.new + heredoc_stack = [] + quote_stack = [] + + # The parser gem emits the newline tokens for comments out of order. This saves + # that token location to emit at a later time to properly line everything up. + # https://github.com/whitequark/parser/issues/1025 + comment_newline_location = nil while index < length token, state = lexed[index] @@ -257,23 +267,46 @@ def to_a value.delete_prefix!("?") when :tCOMMENT if token.type == :EMBDOC_BEGIN - start_index = index while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1) value += next_token.value index += 1 end - if start_index != index - value += next_token.value - location = range(token.location.start_offset, lexed[index][0].location.end_offset) - index += 1 - end + value += next_token.value + location = range(token.location.start_offset, lexed[index][0].location.end_offset) + index += 1 else value.chomp! location = range(token.location.start_offset, token.location.end_offset - 1) + + prev_token = lexed[index - 2][0] + next_token = lexed[index][0] + + is_inline_comment = prev_token.location.start_line == token.location.start_line + if is_inline_comment && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type) + tokens << [:tCOMMENT, [value, location]] + + nl_location = range(token.location.end_offset - 1, token.location.end_offset) + tokens << [:tNL, [nil, nl_location]] + next + elsif is_inline_comment && next_token&.type == :COMMENT + comment_newline_location = range(token.location.end_offset - 1, token.location.end_offset) + elsif comment_newline_location && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type) + tokens << [:tCOMMENT, [value, location]] + tokens << [:tNL, [nil, comment_newline_location]] + comment_newline_location = nil + next + end end when :tNL + next_token = next_token = lexed[index][0] + # Newlines after comments are emitted out of order. + if next_token&.type == :COMMENT + comment_newline_location = location + next + end + value = nil when :tFLOAT value = parse_float(value) diff --git a/rakelib/typecheck.rake b/rakelib/typecheck.rake index 7a43368555..497282d6f0 100644 --- a/rakelib/typecheck.rake +++ b/rakelib/typecheck.rake @@ -37,6 +37,7 @@ namespace :typecheck do - ./lib/prism/node_ext.rb - ./lib/prism/parse_result.rb - ./lib/prism/visitor.rb + - ./lib/prism/translation/parser/lexer.rb - ./lib/prism/translation/ripper.rb - ./lib/prism/translation/ripper/sexp.rb - ./lib/prism/translation/ruby_parser.rb diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index 0a0baa52f4..7ec712a4f0 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -81,29 +81,21 @@ class ParserTest < TestCase # These files are failing to translate their lexer output into the lexer # output expected by the parser gem, so we'll skip them for now. skip_tokens = [ - "comments.txt", "dash_heredocs.txt", "dos_endings.txt", "embdoc_no_newline_at_end.txt", - "heredoc_with_comment.txt", "heredocs_with_ignored_newlines.txt", - "indented_file_end.txt", "methods.txt", "strings.txt", "tilde_heredocs.txt", "seattlerb/backticks_interpolation_line.txt", "seattlerb/bug169.txt", "seattlerb/case_in.txt", - "seattlerb/class_comments.txt", "seattlerb/difficult4__leading_dots2.txt", "seattlerb/difficult6__7.txt", "seattlerb/difficult6__8.txt", "seattlerb/dsym_esc_to_sym.txt", "seattlerb/heredoc_unicode.txt", - "seattlerb/module_comments.txt", - "seattlerb/parse_line_block_inline_comment_leading_newlines.txt", - "seattlerb/parse_line_block_inline_comment.txt", - "seattlerb/parse_line_block_inline_multiline_comment.txt", "seattlerb/parse_line_heredoc.txt", "seattlerb/pct_w_heredoc_interp_nested.txt", "seattlerb/read_escape_unicode_curlies.txt", @@ -117,7 +109,6 @@ class ParserTest < TestCase "whitequark/beginless_erange_after_newline.txt", "whitequark/beginless_irange_after_newline.txt", "whitequark/bug_ascii_8bit_in_literal.txt", - "whitequark/bug_def_no_paren_eql_begin.txt", "whitequark/forward_arg_with_open_args.txt", "whitequark/kwarg_no_paren.txt", "whitequark/lbrace_arg_after_command_args.txt",