ruby · kddnewton · Jan 11, 2025 · Jan 11, 2025 · Jan 11, 2025
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
@@ -200,6 +200,11 @@ class Lexer
           :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
         ]
 
+        # Types of tokens that are allowed to continue a method call with comments in-between.
+        # For these, the parser gem doesn't emit a newline token after the last comment.
+        COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT]
+        private_constant :COMMENT_CONTINUATION_TYPES
+
         # Heredocs are complex and require us to keep track of a bit of info to refer to later
         HeredocData = Struct.new(:identifier, :common_whitespace, keyword_init: true)
 
@@ -233,8 +238,13 @@ def to_a
           index = 0
           length = lexed.length
 
-          heredoc_stack = Array.new
-          quote_stack = Array.new
+          heredoc_stack = []
+          quote_stack = []
+
+          # The parser gem emits the newline tokens for comments out of order. This saves
+          # that token location to emit at a later time to properly line everything up.
+          # https://github.com/whitequark/parser/issues/1025
+          comment_newline_location = nil
 
           while index < length
             token, state = lexed[index]
@@ -257,23 +267,46 @@ def to_a
               value.delete_prefix!("?")
             when :tCOMMENT
               if token.type == :EMBDOC_BEGIN
-                start_index = index
 
                 while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
                   value += next_token.value
                   index += 1
                 end
 
-                if start_index != index
-                  value += next_token.value
-                  location = range(token.location.start_offset, lexed[index][0].location.end_offset)
-                  index += 1
-                end
+                value += next_token.value
+                location = range(token.location.start_offset, lexed[index][0].location.end_offset)
+                index += 1
               else
                 value.chomp!
                 location = range(token.location.start_offset, token.location.end_offset - 1)
+
+                prev_token = lexed[index - 2][0]
+                next_token = lexed[index][0]
+
+                is_inline_comment = prev_token.location.start_line == token.location.start_line
+                if is_inline_comment && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
+                  tokens << [:tCOMMENT, [value, location]]
+
+                  nl_location = range(token.location.end_offset - 1, token.location.end_offset)
+                  tokens << [:tNL, [nil, nl_location]]
+                  next
+                elsif is_inline_comment && next_token&.type == :COMMENT
+                  comment_newline_location = range(token.location.end_offset - 1, token.location.end_offset)
+                elsif comment_newline_location && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
+                  tokens << [:tCOMMENT, [value, location]]
+                  tokens << [:tNL, [nil, comment_newline_location]]
+                  comment_newline_location = nil
+                  next
+                end
               end
             when :tNL
+              next_token = next_token = lexed[index][0]
+              # Newlines after comments are emitted out of order.
+              if next_token&.type == :COMMENT
+                comment_newline_location = location
+                next
+              end
+
               value = nil
             when :tFLOAT
               value = parse_float(value)

diff --git a/rakelib/typecheck.rake b/rakelib/typecheck.rake
@@ -37,6 +37,7 @@ namespace :typecheck do
         - ./lib/prism/node_ext.rb
         - ./lib/prism/parse_result.rb
         - ./lib/prism/visitor.rb
+        - ./lib/prism/translation/parser/lexer.rb
         - ./lib/prism/translation/ripper.rb
         - ./lib/prism/translation/ripper/sexp.rb
         - ./lib/prism/translation/ruby_parser.rb

diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb
@@ -81,29 +81,21 @@ class ParserTest < TestCase
     # These files are failing to translate their lexer output into the lexer
     # output expected by the parser gem, so we'll skip them for now.
     skip_tokens = [
-      "comments.txt",
       "dash_heredocs.txt",
       "dos_endings.txt",
       "embdoc_no_newline_at_end.txt",
-      "heredoc_with_comment.txt",
       "heredocs_with_ignored_newlines.txt",
-      "indented_file_end.txt",
       "methods.txt",
       "strings.txt",
       "tilde_heredocs.txt",
       "seattlerb/backticks_interpolation_line.txt",
       "seattlerb/bug169.txt",
       "seattlerb/case_in.txt",
-      "seattlerb/class_comments.txt",
       "seattlerb/difficult4__leading_dots2.txt",
       "seattlerb/difficult6__7.txt",
       "seattlerb/difficult6__8.txt",
       "seattlerb/dsym_esc_to_sym.txt",
       "seattlerb/heredoc_unicode.txt",
-      "seattlerb/module_comments.txt",
-      "seattlerb/parse_line_block_inline_comment_leading_newlines.txt",
-      "seattlerb/parse_line_block_inline_comment.txt",
-      "seattlerb/parse_line_block_inline_multiline_comment.txt",
       "seattlerb/parse_line_heredoc.txt",
       "seattlerb/pct_w_heredoc_interp_nested.txt",
       "seattlerb/read_escape_unicode_curlies.txt",
@@ -117,7 +109,6 @@ class ParserTest < TestCase
       "whitequark/beginless_erange_after_newline.txt",
       "whitequark/beginless_irange_after_newline.txt",
       "whitequark/bug_ascii_8bit_in_literal.txt",
-      "whitequark/bug_def_no_paren_eql_begin.txt",
       "whitequark/forward_arg_with_open_args.txt",
       "whitequark/kwarg_no_paren.txt",
       "whitequark/lbrace_arg_after_command_args.txt",