@@ -8613,7 +8613,7 @@ escape_hexadecimal_digit(const uint8_t value) {
86138613 * validated.
86148614 */
86158615static inline uint32_t
8616- escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
8616+ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location ) {
86178617 uint32_t value = 0;
86188618 for (size_t index = 0; index < length; index++) {
86198619 if (index != 0) value <<= 4;
@@ -8623,7 +8623,11 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
86238623 // Here we're going to verify that the value is actually a valid Unicode
86248624 // codepoint and not a surrogate pair.
86258625 if (value >= 0xD800 && value <= 0xDFFF) {
8626- pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
8626+ if (error_location != NULL) {
8627+ pm_parser_err(parser, error_location->start, error_location->end, PM_ERR_ESCAPE_INVALID_UNICODE);
8628+ } else {
8629+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
8630+ }
86278631 return 0xFFFD;
86288632 }
86298633
@@ -8923,7 +8927,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
89238927 extra_codepoints_start = unicode_start;
89248928 }
89258929
8926- uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
8930+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL );
89278931 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
89288932
89298933 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -8964,7 +8968,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
89648968 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
89658969 }
89668970 } else if (length == 4) {
8967- uint32_t value = escape_unicode(parser, parser->current.end, 4);
8971+ uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL );
89688972
89698973 if (flags & PM_ESCAPE_FLAG_REGEXP) {
89708974 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
@@ -20368,7 +20372,7 @@ pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, con
2036820372}
2036920373
2037020374static inline const uint8_t *
20371- pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20375+ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location ) {
2037220376 const uint8_t *start = cursor - 1;
2037320377 cursor++;
2037420378
@@ -20379,7 +20383,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
2037920383
2038020384 if (*cursor != '{') {
2038120385 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20382- uint32_t value = escape_unicode(parser, cursor, length);
20386+ uint32_t value = escape_unicode(parser, cursor, length, error_location );
2038320387
2038420388 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
2038520389 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
@@ -20402,7 +20406,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
2040220406 if (length == 0) {
2040320407 break;
2040420408 }
20405- uint32_t value = escape_unicode(parser, cursor, length);
20409+ uint32_t value = escape_unicode(parser, cursor, length, error_location );
2040620410
2040720411 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
2040820412 cursor += length;
@@ -20412,7 +20416,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
2041220416}
2041320417
2041420418static void
20415- pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
20419+ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location ) {
2041620420 const uint8_t *end = source + length;
2041720421 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
2041820422
@@ -20430,7 +20434,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8
2043020434 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
2043120435 break;
2043220436 case 'u':
20433- cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
20437+ cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location );
2043420438 break;
2043520439 default:
2043620440 pm_buffer_append_byte(unescaped, '\\');
@@ -20473,7 +20477,7 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
2047320477 // unescaped, which is what we need.
2047420478 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
2047520479 if (PRISM_UNLIKELY(cursor != NULL)) {
20476- pm_named_capture_escape(parser, &unescaped, source, length, cursor);
20480+ pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->shared ? NULL : &call->receiver->location );
2047720481 source = (const uint8_t *) pm_buffer_value(&unescaped);
2047820482 length = pm_buffer_length(&unescaped);
2047920483 }
0 commit comments