Skip to content

Commit 4d8c9c1

Browse files
kddnewtonmatzbot
authored andcommitted
[ruby/prism] Handle escaped characters after controls
Fixes [Bug #20986] ruby/prism@fd0c563e9e
1 parent c859e15 commit 4d8c9c1

File tree

2 files changed

+26
-23
lines changed

2 files changed

+26
-23
lines changed

prism/prism.c

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9580,28 +9580,6 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
95809580
pm_buffer_append_byte(buffer, byte);
95819581
}
95829582

9583-
/**
9584-
* Write each byte of the given escaped character into the buffer.
9585-
*/
9586-
static inline void
9587-
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9588-
size_t width;
9589-
if (parser->encoding_changed) {
9590-
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9591-
} else {
9592-
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9593-
}
9594-
9595-
// TODO: If the character is invalid in the given encoding, then we'll just
9596-
// push one byte into the buffer. This should actually be an error.
9597-
width = (width == 0) ? 1 : width;
9598-
9599-
for (size_t index = 0; index < width; index++) {
9600-
escape_write_byte_encoded(parser, buffer, *parser->current.end);
9601-
parser->current.end++;
9602-
}
9603-
}
9604-
96059583
/**
96069584
* The regular expression engine doesn't support the same escape sequences as
96079585
* Ruby does. So first we have to read the escape sequence, and then we have to
@@ -9626,6 +9604,28 @@ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular
96269604
escape_write_byte_encoded(parser, buffer, byte);
96279605
}
96289606

9607+
/**
9608+
* Write each byte of the given escaped character into the buffer.
9609+
*/
9610+
static inline void
9611+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9612+
size_t width;
9613+
if (parser->encoding_changed) {
9614+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9615+
} else {
9616+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9617+
}
9618+
9619+
if (width == 1) {
9620+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9621+
} else {
9622+
// Assume the next character wasn't meant to be part of this escape
9623+
// sequence since it is invalid. Add an error and move on.
9624+
parser->current.end += width;
9625+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9626+
}
9627+
}
9628+
96299629
/**
96309630
* Warn about using a space or a tab character in an escape, as opposed to using
96319631
* \\s or \\t. Note that we can quite copy the source because the warning
@@ -10050,7 +10050,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
1005010050
/* fallthrough */
1005110051
default: {
1005210052
if (parser->current.end < parser->end) {
10053-
escape_write_escape_encoded(parser, buffer);
10053+
escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
1005410054
} else {
1005510055
pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
1005610056
}

test/prism/unescape_test.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ def assert_context(context)
204204
# \C-a \C-b \C-c ...
205205
assert_unescape(context, "C-#{chr}")
206206

207+
# \C-\a \C-\b \C-\c ...
208+
assert_unescape(context, "C-\\#{chr}")
209+
207210
# \ca \cb \cc ...
208211
assert_unescape(context, "c#{chr}")
209212

0 commit comments

Comments
 (0)