@@ -101,6 +101,7 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
101101// 0 - single byte char that don't need to be escaped.
102102// (x | 8) - char that needs to be escaped.
103103static const unsigned char CHAR_LENGTH_MASK = 7 ;
104+ static const unsigned char ESCAPE_MASK = 8 ;
104105
105106static const unsigned char escape_table [256 ] = {
106107 // ASCII Control Characters
@@ -165,6 +166,84 @@ static const unsigned char script_safe_escape_table[256] = {
165166 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
166167};
167168
169+
170+ typedef struct _search_state {
171+ const char * ptr ;
172+ const char * end ;
173+ const char * cursor ;
174+ FBuffer * buffer ;
175+ } search_state ;
176+
177+ static inline void search_flush (search_state * search )
178+ {
179+ fbuffer_append (search -> buffer , search -> cursor , search -> ptr - search -> cursor );
180+ search -> cursor = search -> ptr ;
181+ }
182+
183+ static inline unsigned char search_escape (search_state * search , const unsigned char escape_table [256 ])
184+ {
185+ while (search -> ptr < search -> end ) {
186+ unsigned char ch = (unsigned char )* search -> ptr ;
187+ unsigned char ch_len = escape_table [ch ];
188+
189+ if (RB_UNLIKELY (ch_len )) {
190+ if (ch_len & ESCAPE_MASK ) {
191+ if (RB_UNLIKELY (ch_len == 11 )) {
192+ const unsigned char * uptr = (const unsigned char * )search -> ptr ;
193+ if (!(uptr [1 ] == 0x80 && (uptr [2 ] >> 1 ) == 0x54 )) {
194+ search -> ptr += 3 ;
195+ continue ;
196+ }
197+ }
198+ search_flush (search );
199+ return ch_len & CHAR_LENGTH_MASK ;
200+ } else {
201+ search -> ptr += ch_len ;
202+ }
203+ } else {
204+ search -> ptr ++ ;
205+ }
206+ }
207+ search_flush (search );
208+ return 0 ;
209+ }
210+
211+ static inline void fast_escape_UTF8_char (search_state * search , unsigned char ch_len ) {
212+ const unsigned char ch = (unsigned char )* search -> ptr ;
213+ switch (ch_len ) {
214+ case 1 : {
215+ switch (ch ) {
216+ case '"' : fbuffer_append (search -> buffer , "\\\"" , 2 ); break ;
217+ case '\\' : fbuffer_append (search -> buffer , "\\\\" , 2 ); break ;
218+ case '/' : fbuffer_append (search -> buffer , "\\/" , 2 ); break ;
219+ case '\b' : fbuffer_append (search -> buffer , "\\b" , 2 ); break ;
220+ case '\f' : fbuffer_append (search -> buffer , "\\f" , 2 ); break ;
221+ case '\n' : fbuffer_append (search -> buffer , "\\n" , 2 ); break ;
222+ case '\r' : fbuffer_append (search -> buffer , "\\r" , 2 ); break ;
223+ case '\t' : fbuffer_append (search -> buffer , "\\t" , 2 ); break ;
224+ default : {
225+ const char * hexdig = "0123456789abcdef" ;
226+ char scratch [6 ] = { '\\' , 'u' , '0' , '0' , 0 , 0 };
227+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
228+ scratch [5 ] = hexdig [ch & 0xf ];
229+ fbuffer_append (search -> buffer , scratch , 6 );
230+ break ;
231+ }
232+ }
233+ break ;
234+ }
235+ case 3 : {
236+ if (search -> ptr [2 ] & 1 ) {
237+ fbuffer_append (search -> buffer , "\\u2029" , 6 );
238+ } else {
239+ fbuffer_append (search -> buffer , "\\u2028" , 6 );
240+ }
241+ break ;
242+ }
243+ }
244+ search -> cursor = (search -> ptr += ch_len );
245+ }
246+
168247/* Converts in_string to a JSON string (without the wrapping '"'
169248 * characters) in FBuffer out_buffer.
170249 *
@@ -181,182 +260,114 @@ static const unsigned char script_safe_escape_table[256] = {
181260 * Everything else (should be UTF-8) is just passed through and
182261 * appended to the result.
183262 */
184- static inline void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE str , const unsigned char escape_table [256 ])
263+ static inline void convert_UTF8_to_JSON (search_state * search , const unsigned char escape_table [256 ])
185264{
186- const char * hexdig = "0123456789abcdef" ;
187- char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
188-
189- const char * ptr = RSTRING_PTR (str );
190- unsigned long len = RSTRING_LEN (str );
191-
192- unsigned long beg = 0 , pos = 0 ;
193-
194- #define FLUSH_POS (bytes ) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
265+ unsigned char ch_len ;
266+ while ((ch_len = search_escape (search , escape_table ))) {
267+ fast_escape_UTF8_char (search , ch_len );
268+ }
269+ }
195270
196- while (pos < len ) {
197- unsigned char ch = ptr [pos ];
271+ static inline unsigned char search_ascii_only_escape (search_state * search , const unsigned char escape_table [256 ])
272+ {
273+ while (search -> ptr < search -> end ) {
274+ unsigned char ch = (unsigned char )* search -> ptr ;
198275 unsigned char ch_len = escape_table [ch ];
199- /* JSON encoding */
200276
201277 if (RB_UNLIKELY (ch_len )) {
202- switch (ch_len ) {
203- case 9 : {
204- FLUSH_POS (1 );
205- switch (ch ) {
206- case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
207- case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
208- case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
209- case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
210- case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
211- case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
212- case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
213- case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
214- default : {
215- scratch [2 ] = '0' ;
216- scratch [3 ] = '0' ;
217- scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
218- scratch [5 ] = hexdig [ch & 0xf ];
219- fbuffer_append (out_buffer , scratch , 6 );
220- break ;
221- }
222- }
223- break ;
224- }
225- case 11 : {
226- unsigned char b2 = ptr [pos + 1 ];
227- if (RB_UNLIKELY (b2 == 0x80 )) {
228- unsigned char b3 = ptr [pos + 2 ];
229- if (b3 == 0xA8 ) {
230- FLUSH_POS (3 );
231- fbuffer_append (out_buffer , "\\u2028" , 6 );
232- break ;
233- } else if (b3 == 0xA9 ) {
234- FLUSH_POS (3 );
235- fbuffer_append (out_buffer , "\\u2029" , 6 );
236- break ;
237- }
238- }
239- ch_len = 3 ;
240- // fallthrough
241- }
242- default :
243- pos += ch_len ;
244- break ;
245- }
278+ search_flush (search );
279+ return ch_len & CHAR_LENGTH_MASK ;
246280 } else {
247- pos ++ ;
281+ search -> ptr ++ ;
248282 }
249283 }
250- #undef FLUSH_POS
251-
252- if (beg < len ) {
253- fbuffer_append (out_buffer , & ptr [beg ], len - beg );
254- }
255-
256- RB_GC_GUARD (str );
284+ search_flush (search );
285+ return 0 ;
257286}
258287
259- static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE str , const unsigned char escape_table [256 ])
260- {
261- const char * hexdig = "0123456789abcdef" ;
262- char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
263-
264- const char * ptr = RSTRING_PTR (str );
265- unsigned long len = RSTRING_LEN (str );
266-
267- unsigned long beg = 0 , pos = 0 ;
268-
269- #define FLUSH_POS (bytes ) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
270-
271- while (pos < len ) {
272- unsigned char ch = ptr [pos ];
273- unsigned char ch_len = escape_table [ch ];
274-
275- if (RB_UNLIKELY (ch_len )) {
276- switch (ch_len ) {
277- case 9 : {
278- FLUSH_POS (1 );
279- switch (ch ) {
280- case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
281- case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
282- case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
283- case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
284- case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
285- case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
286- case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
287- case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
288- default : {
289- scratch [2 ] = '0' ;
290- scratch [3 ] = '0' ;
291- scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
292- scratch [5 ] = hexdig [ch & 0xf ];
293- fbuffer_append (out_buffer , scratch , 6 );
294- break ;
295- }
296- }
288+ static inline void full_escape_UTF8_char (search_state * search , unsigned char ch_len ) {
289+ const unsigned char ch = (unsigned char )* search -> ptr ;
290+ switch (ch_len ) {
291+ case 1 : {
292+ switch (ch ) {
293+ case '"' : fbuffer_append (search -> buffer , "\\\"" , 2 ); break ;
294+ case '\\' : fbuffer_append (search -> buffer , "\\\\" , 2 ); break ;
295+ case '/' : fbuffer_append (search -> buffer , "\\/" , 2 ); break ;
296+ case '\b' : fbuffer_append (search -> buffer , "\\b" , 2 ); break ;
297+ case '\f' : fbuffer_append (search -> buffer , "\\f" , 2 ); break ;
298+ case '\n' : fbuffer_append (search -> buffer , "\\n" , 2 ); break ;
299+ case '\r' : fbuffer_append (search -> buffer , "\\r" , 2 ); break ;
300+ case '\t' : fbuffer_append (search -> buffer , "\\t" , 2 ); break ;
301+ default : {
302+ const char * hexdig = "0123456789abcdef" ;
303+ char scratch [6 ] = { '\\' , 'u' , '0' , '0' , 0 , 0 };
304+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
305+ scratch [5 ] = hexdig [ch & 0xf ];
306+ fbuffer_append (search -> buffer , scratch , 6 );
297307 break ;
298308 }
299- default : {
300- uint32_t wchar = 0 ;
301- ch_len = ch_len & CHAR_LENGTH_MASK ;
302-
303- switch (ch_len ) {
304- case 2 :
305- wchar = ptr [pos ] & 0x1F ;
306- break ;
307- case 3 :
308- wchar = ptr [pos ] & 0x0F ;
309- break ;
310- case 4 :
311- wchar = ptr [pos ] & 0x07 ;
312- break ;
313- }
309+ }
310+ break ;
311+ }
312+ default : {
313+ const char * hexdig = "0123456789abcdef" ;
314+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
314315
315- for (short i = 1 ; i < ch_len ; i ++ ) {
316- wchar = (wchar << 6 ) | (ptr [pos + i ] & 0x3F );
317- }
316+ uint32_t wchar = 0 ;
318317
319- FLUSH_POS (ch_len );
318+ switch (ch_len ) {
319+ case 2 :
320+ wchar = ch & 0x1F ;
321+ break ;
322+ case 3 :
323+ wchar = ch & 0x0F ;
324+ break ;
325+ case 4 :
326+ wchar = ch & 0x07 ;
327+ break ;
328+ }
320329
321- if (wchar <= 0xFFFF ) {
322- scratch [2 ] = hexdig [wchar >> 12 ];
323- scratch [3 ] = hexdig [(wchar >> 8 ) & 0xf ];
324- scratch [4 ] = hexdig [(wchar >> 4 ) & 0xf ];
325- scratch [5 ] = hexdig [wchar & 0xf ];
326- fbuffer_append (out_buffer , scratch , 6 );
327- } else {
328- uint16_t hi , lo ;
329- wchar -= 0x10000 ;
330- hi = 0xD800 + (uint16_t )(wchar >> 10 );
331- lo = 0xDC00 + (uint16_t )(wchar & 0x3FF );
332-
333- scratch [2 ] = hexdig [hi >> 12 ];
334- scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
335- scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
336- scratch [5 ] = hexdig [hi & 0xf ];
337-
338- scratch [8 ] = hexdig [lo >> 12 ];
339- scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
340- scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
341- scratch [11 ] = hexdig [lo & 0xf ];
342-
343- fbuffer_append (out_buffer , scratch , 12 );
344- }
330+ for (short i = 1 ; i < ch_len ; i ++ ) {
331+ wchar = (wchar << 6 ) | (search -> ptr [i ] & 0x3F );
332+ }
345333
346- break ;
347- }
334+ if (wchar <= 0xFFFF ) {
335+ scratch [2 ] = hexdig [wchar >> 12 ];
336+ scratch [3 ] = hexdig [(wchar >> 8 ) & 0xf ];
337+ scratch [4 ] = hexdig [(wchar >> 4 ) & 0xf ];
338+ scratch [5 ] = hexdig [wchar & 0xf ];
339+ fbuffer_append (search -> buffer , scratch , 6 );
340+ } else {
341+ uint16_t hi , lo ;
342+ wchar -= 0x10000 ;
343+ hi = 0xD800 + (uint16_t )(wchar >> 10 );
344+ lo = 0xDC00 + (uint16_t )(wchar & 0x3FF );
345+
346+ scratch [2 ] = hexdig [hi >> 12 ];
347+ scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
348+ scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
349+ scratch [5 ] = hexdig [hi & 0xf ];
350+
351+ scratch [8 ] = hexdig [lo >> 12 ];
352+ scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
353+ scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
354+ scratch [11 ] = hexdig [lo & 0xf ];
355+
356+ fbuffer_append (search -> buffer , scratch , 12 );
348357 }
349- } else {
350- pos ++ ;
358+
359+ break ;
351360 }
352361 }
353- #undef FLUSH_POS
362+ search -> cursor = (search -> ptr += ch_len );
363+ }
354364
355- if (beg < len ) {
356- fbuffer_append (out_buffer , & ptr [beg ], len - beg );
365+ static void convert_UTF8_to_ASCII_only_JSON (search_state * search , const unsigned char escape_table [256 ])
366+ {
367+ unsigned char ch_len ;
368+ while ((ch_len = search_ascii_only_escape (search , escape_table ))) {
369+ full_escape_UTF8_char (search , ch_len );
357370 }
358-
359- RB_GC_GUARD (str );
360371}
361372
362373/*
@@ -911,13 +922,20 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
911922
912923 fbuffer_append_char (buffer , '"' );
913924
925+ long len ;
926+ search_state search ;
927+ search .buffer = buffer ;
928+ RSTRING_GETMEM (obj , search .ptr , len );
929+ search .cursor = search .ptr ;
930+ search .end = search .ptr + len ;
931+
914932 switch (rb_enc_str_coderange (obj )) {
915933 case ENC_CODERANGE_7BIT :
916934 case ENC_CODERANGE_VALID :
917935 if (RB_UNLIKELY (state -> ascii_only )) {
918- convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
936+ convert_UTF8_to_ASCII_only_JSON (& search , state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
919937 } else {
920- convert_UTF8_to_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table );
938+ convert_UTF8_to_JSON (& search , state -> script_safe ? script_safe_escape_table : escape_table );
921939 }
922940 break ;
923941 default :
0 commit comments