From 101962526df8ef0792a8158ebe03ca4b20a86110 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sun, 12 Jan 2025 13:49:44 +0100 Subject: [PATCH] Fix parser translator tSPACE tokens for percent arrays Tests worked around this but the incompatibility is not hard to fix. This fixes 17 token incompatibilies in tests here that were previously passing --- lib/prism/translation/parser/lexer.rb | 10 +++++++++- test/prism/ruby/parser_test.rb | 7 ------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 49fdd2aea8..3af4015605 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -457,7 +457,15 @@ def to_a location = range(token.location.start_offset, token.location.start_offset + 1) end - quote_stack.pop + if percent_array?(quote_stack.pop) + prev_token = lexed[index - 2][0] if index - 2 >= 0 + empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type) + ends_with_whitespace = prev_token&.type == :WORDS_SEP + # parser always emits a space token after content in a percent array, even if no actual whitespace is present. + if !empty && !ends_with_whitespace + tokens << [:tSPACE, [nil, range(token.location.start_offset, token.location.start_offset)]] + end + end when :tSYMBEG if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END next_location = token.location.join(next_token.location) diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index 2e9211e70d..1542bc6562 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -219,13 +219,6 @@ def assert_equal_tokens(expected_tokens, actual_tokens) expected_index += 1 actual_index += 1 - # The parser gem always has a space before a string end in list - # literals, but we don't. So we'll skip over the space. - if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END - expected_index += 1 - next - end - # There are a lot of tokens that have very specific meaning according # to the context of the parser. We don't expose that information in # prism, so we need to normalize these tokens a bit.