diff --git a/src/prism.c b/src/prism.c index ab8de96975..72a22ac489 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9580,28 +9580,6 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte pm_buffer_append_byte(buffer, byte); } -/** - * Write each byte of the given escaped character into the buffer. - */ -static inline void -escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) { - size_t width; - if (parser->encoding_changed) { - width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - } else { - width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end); - } - - // TODO: If the character is invalid in the given encoding, then we'll just - // push one byte into the buffer. This should actually be an error. - width = (width == 0) ? 1 : width; - - for (size_t index = 0; index < width; index++) { - escape_write_byte_encoded(parser, buffer, *parser->current.end); - parser->current.end++; - } -} - /** * The regular expression engine doesn't support the same escape sequences as * Ruby does. So first we have to read the escape sequence, and then we have to @@ -9626,6 +9604,28 @@ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular escape_write_byte_encoded(parser, buffer, byte); } +/** + * Write each byte of the given escaped character into the buffer. + */ +static inline void +escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) { + size_t width; + if (parser->encoding_changed) { + width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); + } else { + width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end); + } + + if (width == 1) { + escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags)); + } else { + // Assume the next character wasn't meant to be part of this escape + // sequence since it is invalid. Add an error and move on. + parser->current.end += width; + pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL); + } +} + /** * Warn about using a space or a tab character in an escape, as opposed to using * \\s or \\t. Note that we can quite copy the source because the warning @@ -10050,7 +10050,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre } default: { if (parser->current.end < parser->end) { - escape_write_escape_encoded(parser, buffer); + escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags); } else { pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER); } diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb index f9e5a60e45..299eddadfe 100644 --- a/test/prism/unescape_test.rb +++ b/test/prism/unescape_test.rb @@ -204,6 +204,9 @@ def assert_context(context) # \C-a \C-b \C-c ... assert_unescape(context, "C-#{chr}") + # \C-\a \C-\b \C-\c ... + assert_unescape(context, "C-\\#{chr}") + # \ca \cb \cc ... assert_unescape(context, "c#{chr}")