[ruby/prism] Handle escaped characters after controls

kddnewton · matzbot · commit 4d8c9c131021 · 2024-12-26T22:35:28.000Z
Fixes [Bug #20986] ruby/prism@fd0c563e9e
diff --git a/prism/prism.c b/prism/prism.c
@@ -9580,28 +9580,6 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
     pm_buffer_append_byte(buffer, byte);
 }
 
-/**
- * Write each byte of the given escaped character into the buffer.
- */
-static inline void
-escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
-    size_t width;
-    if (parser->encoding_changed) {
-        width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
-    } else {
-        width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
-    }
-
-    // TODO: If the character is invalid in the given encoding, then we'll just
-    // push one byte into the buffer. This should actually be an error.
-    width = (width == 0) ? 1 : width;
-
-    for (size_t index = 0; index < width; index++) {
-        escape_write_byte_encoded(parser, buffer, *parser->current.end);
-        parser->current.end++;
-    }
-}
-
 /**
  * The regular expression engine doesn't support the same escape sequences as
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -9626,6 +9604,28 @@ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular
     escape_write_byte_encoded(parser, buffer, byte);
 }
 
+/**
+ * Write each byte of the given escaped character into the buffer.
+ */
+static inline void
+escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
+    size_t width;
+    if (parser->encoding_changed) {
+        width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+    } else {
+        width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
+    }
+
+    if (width == 1) {
+        escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
+    } else {
+        // Assume the next character wasn't meant to be part of this escape
+        // sequence since it is invalid. Add an error and move on.
+        parser->current.end += width;
+        pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+    }
+}
+
 /**
  * Warn about using a space or a tab character in an escape, as opposed to using
  * \\s or \\t. Note that we can quite copy the source because the warning
@@ -10050,7 +10050,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
         /* fallthrough */
         default: {
             if (parser->current.end < parser->end) {
-                escape_write_escape_encoded(parser, buffer);
+                escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
             } else {
                 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
             }
diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb
@@ -204,6 +204,9 @@ def assert_context(context)
         # \C-a \C-b \C-c ...
         assert_unescape(context, "C-#{chr}")
 
+        # \C-\a \C-\b \C-\c ...
+        assert_unescape(context, "C-\\#{chr}")
+
         # \ca \cb \cc ...
         assert_unescape(context, "c#{chr}")