Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 41 additions & 8 deletions lib/prism/translation/parser/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,11 @@ class Lexer
:tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
]

# Types of tokens that are allowed to continue a method call with comments in-between.
# For these, the parser gem doesn't emit a newline token after the last comment.
COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT]
private_constant :COMMENT_CONTINUATION_TYPES

# Heredocs are complex and require us to keep track of a bit of info to refer to later
HeredocData = Struct.new(:identifier, :common_whitespace, keyword_init: true)

Expand Down Expand Up @@ -233,8 +238,13 @@ def to_a
index = 0
length = lexed.length

heredoc_stack = Array.new
quote_stack = Array.new
heredoc_stack = []
quote_stack = []

# The parser gem emits the newline tokens for comments out of order. This saves
# that token location to emit at a later time to properly line everything up.
# https://github.com/whitequark/parser/issues/1025
comment_newline_location = nil

while index < length
token, state = lexed[index]
Expand All @@ -257,23 +267,46 @@ def to_a
value.delete_prefix!("?")
when :tCOMMENT
if token.type == :EMBDOC_BEGIN
start_index = index

while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
value += next_token.value
index += 1
end

if start_index != index
value += next_token.value
location = range(token.location.start_offset, lexed[index][0].location.end_offset)
index += 1
end
value += next_token.value
location = range(token.location.start_offset, lexed[index][0].location.end_offset)
index += 1
else
value.chomp!
location = range(token.location.start_offset, token.location.end_offset - 1)

prev_token = lexed[index - 2][0]
next_token = lexed[index][0]

is_inline_comment = prev_token.location.start_line == token.location.start_line
if is_inline_comment && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
tokens << [:tCOMMENT, [value, location]]

nl_location = range(token.location.end_offset - 1, token.location.end_offset)
tokens << [:tNL, [nil, nl_location]]
next
elsif is_inline_comment && next_token&.type == :COMMENT
comment_newline_location = range(token.location.end_offset - 1, token.location.end_offset)
elsif comment_newline_location && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
tokens << [:tCOMMENT, [value, location]]
tokens << [:tNL, [nil, comment_newline_location]]
comment_newline_location = nil
next
end
end
when :tNL
next_token = next_token = lexed[index][0]
# Newlines after comments are emitted out of order.
if next_token&.type == :COMMENT
comment_newline_location = location
next
end

value = nil
when :tFLOAT
value = parse_float(value)
Expand Down
1 change: 1 addition & 0 deletions rakelib/typecheck.rake
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ namespace :typecheck do
- ./lib/prism/node_ext.rb
- ./lib/prism/parse_result.rb
- ./lib/prism/visitor.rb
- ./lib/prism/translation/parser/lexer.rb
- ./lib/prism/translation/ripper.rb
- ./lib/prism/translation/ripper/sexp.rb
- ./lib/prism/translation/ruby_parser.rb
Expand Down
9 changes: 0 additions & 9 deletions test/prism/ruby/parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,29 +81,21 @@ class ParserTest < TestCase
# These files are failing to translate their lexer output into the lexer
# output expected by the parser gem, so we'll skip them for now.
skip_tokens = [
"comments.txt",
"dash_heredocs.txt",
"dos_endings.txt",
"embdoc_no_newline_at_end.txt",
"heredoc_with_comment.txt",
"heredocs_with_ignored_newlines.txt",
"indented_file_end.txt",
"methods.txt",
"strings.txt",
"tilde_heredocs.txt",
"seattlerb/backticks_interpolation_line.txt",
"seattlerb/bug169.txt",
"seattlerb/case_in.txt",
"seattlerb/class_comments.txt",
"seattlerb/difficult4__leading_dots2.txt",
"seattlerb/difficult6__7.txt",
"seattlerb/difficult6__8.txt",
"seattlerb/dsym_esc_to_sym.txt",
"seattlerb/heredoc_unicode.txt",
"seattlerb/module_comments.txt",
"seattlerb/parse_line_block_inline_comment_leading_newlines.txt",
"seattlerb/parse_line_block_inline_comment.txt",
"seattlerb/parse_line_block_inline_multiline_comment.txt",
"seattlerb/parse_line_heredoc.txt",
"seattlerb/pct_w_heredoc_interp_nested.txt",
"seattlerb/read_escape_unicode_curlies.txt",
Expand All @@ -117,7 +109,6 @@ class ParserTest < TestCase
"whitequark/beginless_erange_after_newline.txt",
"whitequark/beginless_irange_after_newline.txt",
"whitequark/bug_ascii_8bit_in_literal.txt",
"whitequark/bug_def_no_paren_eql_begin.txt",
"whitequark/forward_arg_with_open_args.txt",
"whitequark/kwarg_no_paren.txt",
"whitequark/lbrace_arg_after_command_args.txt",
Expand Down
Loading