@@ -4658,7 +4658,8 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg,
46584658
46594659/** Read unicode escape sequence. */
46604660static_inline bool read_uni_esc (u8 * * src_ptr , u8 * * dst_ptr ,
4661- const char * * msg , yyjson_read_flag flg ) {
4661+ const char * * msg , yyjson_read_flag flg ,
4662+ bool * unierr ) {
46624663#define return_err (_end , _msg ) *msg = _msg; *src_ptr = _end; return false
46634664
46644665 u8 * src = * src_ptr ;
@@ -4669,12 +4670,19 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
46694670 src += 2 ; /* skip `\u` */
46704671 if (unlikely (!hex_load_4 (src , & hi ))) {
46714672 if (has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 )) {
4672- usize cnt = 0 ;
4673+ usize cnt = 0 , i ;
4674+ u8 ch ;
46734675 while (cnt < 4 && char_is_hex (src [cnt ])) cnt ++ ;
4676+ ch = src [cnt ];
4677+ dst [0 ] = '\\' ;
4678+ dst [1 ] = 'u' ;
4679+ for (i = 0 ; i < cnt ; i ++ ) dst [2 + i ] = src [i ];
4680+ dst += 2 + cnt ;
46744681 src += cnt ;
4675- * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
4682+ if ( ch && ch != '"' && ch != '\'' ) src ++ ;
46764683 * src_ptr = src ;
46774684 * dst_ptr = dst ;
4685+ if (unierr ) * unierr = true;
46784686 return true;
46794687 }
46804688 return_err (src - 2 , "invalid escaped sequence in string" );
@@ -4699,6 +4707,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
46994707 * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
47004708 * src_ptr = src ;
47014709 * dst_ptr = dst ;
4710+ if (unierr ) * unierr = true;
47024711 return true;
47034712 }
47044713 if (has_allow (INVALID_SURROGATE )) {
@@ -4714,6 +4723,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47144723 }
47154724 * src_ptr = src ;
47164725 * dst_ptr = dst ;
4726+ if (unierr ) * unierr = true;
47174727 return true;
47184728 }
47194729 return_err (src - 6 , "no low surrogate in string" );
@@ -4727,6 +4737,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47274737 * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
47284738 * src_ptr = src ;
47294739 * dst_ptr = dst ;
4740+ if (unierr ) * unierr = true;
47304741 return true;
47314742 }
47324743 if (has_allow (INVALID_SURROGATE )) {
@@ -4742,6 +4753,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47424753 }
47434754 * src_ptr = src ;
47444755 * dst_ptr = dst ;
4756+ if (unierr ) * unierr = true;
47454757 return true;
47464758 }
47474759 return_err (src - 6 , "invalid escape in string" );
@@ -4752,6 +4764,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47524764 * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
47534765 * src_ptr = src ;
47544766 * dst_ptr = dst ;
4767+ if (unierr ) * unierr = true;
47554768 return true;
47564769 }
47574770 if (has_allow (INVALID_SURROGATE )) {
@@ -4767,6 +4780,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47674780 }
47684781 * src_ptr = src ;
47694782 * dst_ptr = dst ;
4783+ if (unierr ) * unierr = true;
47704784 return true;
47714785 }
47724786 return_err (src - 6 , "invalid low surrogate in string" );
@@ -4783,6 +4797,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47834797 * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
47844798 * src_ptr = src ;
47854799 * dst_ptr = dst ;
4800+ if (unierr ) * unierr = true;
47864801 return true;
47874802 }
47884803 if (!has_allow (INVALID_SURROGATE )) {
@@ -4798,6 +4813,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47984813 } else {
47994814 * dst ++ = (u8 )hi ;
48004815 }
4816+ if (unierr ) * unierr = true;
48014817 }
48024818 * src_ptr = src ;
48034819 * dst_ptr = dst ;
@@ -4839,6 +4855,7 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
48394855 u8 * src = hdr , * dst = NULL , * pos ;
48404856 u16 hi , lo ;
48414857 u32 uni , tmp ;
4858+ bool unierr = false;
48424859
48434860 /* Resume incremental parsing. */
48444861 if (con && unlikely (con [0 ])) {
@@ -4901,7 +4918,8 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
49014918 gcc_store_barrier (* src );
49024919 if (likely (* src == quo )) {
49034920 val -> tag = ((u64 )(src - hdr ) << YYJSON_TAG_BIT ) | YYJSON_TYPE_STR |
4904- (quo == '"' ? YYJSON_SUBTYPE_NOESC : 0 );
4921+ (unierr ? YYJSON_SUBTYPE_UNIERR :
4922+ (quo == '"' ? YYJSON_SUBTYPE_NOESC : 0 ));
49054923 val -> uni .str = (const char * )hdr ;
49064924 * src = '\0' ;
49074925 * end = src + 1 ;
@@ -4950,14 +4968,13 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
49504968 }
49514969#endif
49524970 if (unlikely (pos == src )) {
4953- if (has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 )) {
4954- dst = src ;
4955- * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
4956- ++ src ;
4957- goto copy_utf8 ;
4958- }
4959- if (has_allow (INVALID_UNICODE )) ++ src ;
4960- else return_err (src , "invalid UTF-8 encoding in string" );
4971+ if (!has_allow (INVALID_UNICODE ) &&
4972+ !has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 ))
4973+ return_err (src , "invalid UTF-8 encoding in string" );
4974+ dst = src ;
4975+ * dst ++ = * src ++ ;
4976+ unierr = true;
4977+ goto copy_utf8 ;
49614978 }
49624979 goto skip_ascii ;
49634980 }
@@ -4977,7 +4994,7 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
49774994 case 't' : * dst ++ = '\t' ; src ++ ; break ;
49784995 case 'u' :
49794996 src -- ;
4980- if (!read_uni_esc (& src , & dst , msg , flg )) return_err (src , * msg );
4997+ if (!read_uni_esc (& src , & dst , msg , flg , & unierr )) return_err (src , * msg );
49814998 break ;
49824999 default : {
49835000 if (has_allow (EXT_ESCAPE )) {
@@ -5029,24 +5046,21 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
50295046 }
50305047 }
50315048 } else if (likely (* src == quo )) {
5032- val -> tag = ((u64 )(dst - hdr ) << YYJSON_TAG_BIT ) | YYJSON_TYPE_STR ;
5049+ val -> tag = ((u64 )(dst - hdr ) << YYJSON_TAG_BIT ) | YYJSON_TYPE_STR |
5050+ (unierr ? YYJSON_SUBTYPE_UNIERR : 0 );
50335051 val -> uni .str = (const char * )hdr ;
50345052 * dst = '\0' ;
50355053 * end = src + 1 ;
50365054 if (con ) con [0 ] = con [1 ] = NULL ;
50375055 return true;
50385056 } else {
5039- if (has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 )) {
5040- if (src >= eof ) return_err (src , "unclosed string" );
5041- * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
5042- src ++ ;
5043- } else {
5044- if (!has_allow (INVALID_UNICODE )) {
5045- return_err (src , "unexpected control character in string" );
5046- }
5047- if (src >= eof ) return_err (src , "unclosed string" );
5048- * dst ++ = * src ++ ;
5057+ if (!has_allow (INVALID_UNICODE ) &&
5058+ !has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 )) {
5059+ return_err (src , "unexpected control character in string" );
50495060 }
5061+ if (src >= eof ) return_err (src , "unclosed string" );
5062+ * dst ++ = * src ++ ;
5063+ unierr = true;
50505064 }
50515065
50525066copy_ascii :
@@ -5134,15 +5148,12 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
51345148 }
51355149#endif
51365150 if (unlikely (pos == src )) {
5137- if (has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 )) {
5138- * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
5139- ++ src ;
5140- goto copy_utf8 ;
5141- }
5142- if (!has_allow (INVALID_UNICODE )) {
5151+ if (!has_allow (INVALID_UNICODE ) &&
5152+ !has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 ))
51435153 return_err (src , MSG_ERR_UTF8 );
5144- }
5145- goto copy_ascii_stop_1 ;
5154+ * dst ++ = * src ++ ;
5155+ unierr = true;
5156+ goto copy_utf8 ;
51465157 }
51475158 goto copy_ascii ;
51485159 }
@@ -5177,7 +5188,7 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
51775188
51785189#define return_suc (_str_end , _cur_end ) do { \
51795190 val->tag = ((u64)(_str_end - hdr) << YYJSON_TAG_BIT) | \
5180- (u64)(YYJSON_TYPE_STR); \
5191+ (u64)(YYJSON_TYPE_STR | (unierr ? YYJSON_SUBTYPE_UNIERR : 0) ); \
51815192 val->uni.str = (const char *)hdr; \
51825193 *pre = _str_end; *end = _cur_end; \
51835194 return true; \
@@ -5188,6 +5199,7 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
51885199 u8 * src = hdr , * dst = NULL ;
51895200 u16 hi , lo ;
51905201 u32 uni , tmp ;
5202+ bool unierr = false;
51915203
51925204 /* add null-terminator for previous raw string */
51935205 * * pre = '\0' ;
@@ -5232,9 +5244,14 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
52325244 src += 4 ;
52335245 } else {
52345246#if !YYJSON_DISABLE_UTF8_VALIDATION
5235- if (!has_allow (INVALID_UNICODE )) return_err (src , MSG_ERR_UTF8 );
5247+ if (!has_allow (INVALID_UNICODE ) &&
5248+ !has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 ))
5249+ return_err (src , MSG_ERR_UTF8 );
52365250#endif
5237- src += 1 ;
5251+ dst = src ;
5252+ * dst ++ = * src ++ ;
5253+ unierr = true;
5254+ goto copy_utf8 ;
52385255 }
52395256 }
52405257 if (char_is_id_ascii (* src )) goto skip_ascii ;
@@ -5243,7 +5260,7 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
52435260 dst = src ;
52445261copy_escape :
52455262 if (byte_match_2 (src , "\\u" )) {
5246- if (!read_uni_esc (& src , & dst , msg , flg )) return_err (src , * msg );
5263+ if (!read_uni_esc (& src , & dst , msg , flg , & unierr )) return_err (src , * msg );
52475264 } else {
52485265 if (!char_is_id_next (* src )) return_suc (dst , src );
52495266 return_err (src , "unexpected character in key" );
@@ -5299,13 +5316,8 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
52995316 !has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 ))
53005317 return_err (src , MSG_ERR_UTF8 );
53015318#endif
5302- if (has_rflag (flg , YYJSON_READ_REPLACE_INVALID_UNICODE , 1 )) {
5303- * dst ++ = 0xEF ; * dst ++ = 0xBF ; * dst ++ = 0xBD ;
5304- src += 1 ;
5305- } else {
5306- * dst = * src ;
5307- dst += 1 ; src += 1 ;
5308- }
5319+ * dst ++ = * src ++ ;
5320+ unierr = true;
53095321 }
53105322 }
53115323 if (char_is_id_ascii (* src )) goto copy_ascii ;
@@ -9120,12 +9132,16 @@ static_inline u8 *yyjson_write_single(yyjson_val *val,
91209132 str_ptr = (const u8 * )unsafe_yyjson_get_str (val );
91219133 check_str_len (str_len );
91229134 incr_len (str_len * 6 + 2 + end_len );
9123- if (likely (cpy ) && unsafe_yyjson_get_subtype (val )) {
9124- cur = write_str_noesc (cur , str_ptr , str_len );
9125- } else {
9126- cur = write_str (cur , esc , inv , str_ptr , str_len , enc_table );
9127- if (unlikely (!cur )) goto fail_str ;
9128- }
9135+ do {
9136+ yyjson_subtype st = unsafe_yyjson_get_subtype (val );
9137+ if (likely (cpy ) && st == YYJSON_SUBTYPE_NOESC ) {
9138+ cur = write_str_noesc (cur , str_ptr , str_len );
9139+ } else {
9140+ bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR );
9141+ cur = write_str (cur , esc , inv2 , str_ptr , str_len , enc_table );
9142+ if (unlikely (!cur )) goto fail_str ;
9143+ }
9144+ } while (0 );
91299145 break ;
91309146
91319147 case YYJSON_TYPE_NUM :
@@ -9257,12 +9273,16 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root,
92579273 str_ptr = (const u8 * )unsafe_yyjson_get_str (val );
92589274 check_str_len (str_len );
92599275 incr_len (str_len * 6 + 16 );
9260- if (likely (cpy ) && unsafe_yyjson_get_subtype (val )) {
9261- cur = write_str_noesc (cur , str_ptr , str_len );
9262- } else {
9263- cur = write_str (cur , esc , inv , str_ptr , str_len , enc_table );
9264- if (unlikely (!cur )) goto fail_str ;
9265- }
9276+ do {
9277+ yyjson_subtype st = unsafe_yyjson_get_subtype (val );
9278+ if (likely (cpy ) && st == YYJSON_SUBTYPE_NOESC ) {
9279+ cur = write_str_noesc (cur , str_ptr , str_len );
9280+ } else {
9281+ bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR );
9282+ cur = write_str (cur , esc , inv2 , str_ptr , str_len , enc_table );
9283+ if (unlikely (!cur )) goto fail_str ;
9284+ }
9285+ } while (0 );
92669286 * cur ++ = is_key ? ':' : ',' ;
92679287 goto val_end ;
92689288 }
@@ -9443,12 +9463,16 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root,
94439463 check_str_len (str_len );
94449464 incr_len (str_len * 6 + 16 + (no_indent ? 0 : level * 4 ));
94459465 cur = write_indent (cur , no_indent ? 0 : level , spaces );
9446- if (likely (cpy ) && unsafe_yyjson_get_subtype (val )) {
9447- cur = write_str_noesc (cur , str_ptr , str_len );
9448- } else {
9449- cur = write_str (cur , esc , inv , str_ptr , str_len , enc_table );
9450- if (unlikely (!cur )) goto fail_str ;
9451- }
9466+ do {
9467+ yyjson_subtype st = unsafe_yyjson_get_subtype (val );
9468+ if (likely (cpy ) && st == YYJSON_SUBTYPE_NOESC ) {
9469+ cur = write_str_noesc (cur , str_ptr , str_len );
9470+ } else {
9471+ bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR );
9472+ cur = write_str (cur , esc , inv2 , str_ptr , str_len , enc_table );
9473+ if (unlikely (!cur )) goto fail_str ;
9474+ }
9475+ } while (0 );
94529476 * cur ++ = is_key ? ':' : ',' ;
94539477 * cur ++ = is_key ? ' ' : '\n' ;
94549478 goto val_end ;
@@ -9812,12 +9836,16 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root,
98129836 str_ptr = (const u8 * )unsafe_yyjson_get_str (val );
98139837 check_str_len (str_len );
98149838 incr_len (str_len * 6 + 16 );
9815- if (likely (cpy ) && unsafe_yyjson_get_subtype (val )) {
9816- cur = write_str_noesc (cur , str_ptr , str_len );
9817- } else {
9818- cur = write_str (cur , esc , inv , str_ptr , str_len , enc_table );
9819- if (unlikely (!cur )) goto fail_str ;
9820- }
9839+ do {
9840+ yyjson_subtype st = unsafe_yyjson_get_subtype (val );
9841+ if (likely (cpy ) && st == YYJSON_SUBTYPE_NOESC ) {
9842+ cur = write_str_noesc (cur , str_ptr , str_len );
9843+ } else {
9844+ bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR );
9845+ cur = write_str (cur , esc , inv2 , str_ptr , str_len , enc_table );
9846+ if (unlikely (!cur )) goto fail_str ;
9847+ }
9848+ } while (0 );
98219849 * cur ++ = is_key ? ':' : ',' ;
98229850 goto val_end ;
98239851 }
@@ -10004,12 +10032,16 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root,
1000410032 check_str_len (str_len );
1000510033 incr_len (str_len * 6 + 16 + (no_indent ? 0 : level * 4 ));
1000610034 cur = write_indent (cur , no_indent ? 0 : level , spaces );
10007- if (likely (cpy ) && unsafe_yyjson_get_subtype (val )) {
10008- cur = write_str_noesc (cur , str_ptr , str_len );
10009- } else {
10010- cur = write_str (cur , esc , inv , str_ptr , str_len , enc_table );
10011- if (unlikely (!cur )) goto fail_str ;
10012- }
10035+ do {
10036+ yyjson_subtype st = unsafe_yyjson_get_subtype (val );
10037+ if (likely (cpy ) && st == YYJSON_SUBTYPE_NOESC ) {
10038+ cur = write_str_noesc (cur , str_ptr , str_len );
10039+ } else {
10040+ bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR );
10041+ cur = write_str (cur , esc , inv2 , str_ptr , str_len , enc_table );
10042+ if (unlikely (!cur )) goto fail_str ;
10043+ }
10044+ } while (0 );
1001310045 * cur ++ = is_key ? ':' : ',' ;
1001410046 * cur ++ = is_key ? ' ' : '\n' ;
1001510047 goto val_end ;
0 commit comments