@@ -199,73 +199,80 @@ static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char esc
199199 RB_GC_GUARD (str );
200200}
201201
202- static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE in_string , bool out_script_safe )
202+ static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE str , const char escape_table [ 256 ] , bool out_script_safe )
203203{
204204 const char * hexdig = "0123456789abcdef" ;
205205 char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
206206
207- const char * in_utf8_str = RSTRING_PTR (in_string );
208- unsigned long in_utf8_len = RSTRING_LEN (in_string );
207+ const char * ptr = RSTRING_PTR (str );
208+ unsigned long len = RSTRING_LEN (str );
209209
210- unsigned long beg = 0 , pos ;
210+ unsigned long beg = 0 , pos = 0 ;
211211
212- for (pos = 0 ; pos < in_utf8_len ;) {
213- uint32_t ch ;
214- short ch_len ;
215- bool should_escape ;
216-
217- /* UTF-8 decoding */
218- short i ;
219- if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
220- else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
221- else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
222- else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
223- else {
224- rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
225- }
212+ #define FLUSH_POS (bytes ) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
226213
227- for ( i = 1 ; i < ch_len ; i ++ ) {
228- ch = ( ch << 6 ) | ( in_utf8_str [pos + i ] & 0x3F ) ;
229- }
214+ while ( pos < len ) {
215+ unsigned char ch = ptr [pos ] ;
216+ unsigned char ch_len = escape_table [ ch ];
230217
231- /* JSON policy */
232- should_escape =
233- (ch < 0x20 ) ||
234- (ch == '"' ) ||
235- (ch == '\\' ) ||
236- (ch > 0x7F ) ||
237- (out_script_safe && (ch == '/' )) ||
238- (out_script_safe && (ch == 0x2028 )) ||
239- (out_script_safe && (ch == 0x2029 ));
218+ if (RB_UNLIKELY (ch_len )) {
219+ switch (ch_len ) {
220+ case 0 :
221+ pos ++ ;
222+ break ;
223+ case 1 : {
224+ FLUSH_POS (1 );
225+ switch (ch ) {
226+ case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
227+ case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
228+ case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
229+ case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
230+ case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
231+ case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
232+ case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
233+ case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
234+ default : {
235+ scratch [2 ] = hexdig [ch >> 12 ];
236+ scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
237+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
238+ scratch [5 ] = hexdig [ch & 0xf ];
239+ fbuffer_append (out_buffer , scratch , 6 );
240+ break ;
241+ }
242+ }
243+ break ;
244+ }
245+ default : {
246+ uint32_t wchar = 0 ;
247+ switch (ch_len ) {
248+ case 2 :
249+ wchar = ptr [pos ] & 0x1F ;
250+ break ;
251+ case 3 :
252+ wchar = ptr [pos ] & 0x0F ;
253+ break ;
254+ case 4 :
255+ wchar = ptr [pos ] & 0x07 ;
256+ break ;
257+ }
240258
241- /* JSON encoding */
242- if (should_escape ) {
243- if (pos > beg ) {
244- fbuffer_append (out_buffer , & in_utf8_str [beg ], pos - beg );
245- }
259+ for (short i = 1 ; i < ch_len ; i ++ ) {
260+ wchar = (wchar <<6 ) | (ptr [pos + i ] & 0x3F );
261+ }
246262
247- beg = pos + ch_len ;
248- switch (ch ) {
249- case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
250- case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
251- case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
252- case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
253- case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
254- case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
255- case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
256- case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
257- default :
258- if (ch <= 0xFFFF ) {
259- scratch [2 ] = hexdig [ch >> 12 ];
260- scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
261- scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
262- scratch [5 ] = hexdig [ch & 0xf ];
263+ FLUSH_POS (ch_len );
264+
265+ if (wchar <= 0xFFFF ) {
266+ scratch [2 ] = hexdig [wchar >> 12 ];
267+ scratch [3 ] = hexdig [(wchar >> 8 ) & 0xf ];
268+ scratch [4 ] = hexdig [(wchar >> 4 ) & 0xf ];
269+ scratch [5 ] = hexdig [wchar & 0xf ];
263270 fbuffer_append (out_buffer , scratch , 6 );
264271 } else {
265272 uint16_t hi , lo ;
266- ch -= 0x10000 ;
267- hi = 0xD800 + (uint16_t )(ch >> 10 );
268- lo = 0xDC00 + (uint16_t )(ch & 0x3FF );
273+ wchar -= 0x10000 ;
274+ hi = 0xD800 + (uint16_t )(wchar >> 10 );
275+ lo = 0xDC00 + (uint16_t )(wchar & 0x3FF );
269276
270277 scratch [2 ] = hexdig [hi >> 12 ];
271278 scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
@@ -279,17 +286,21 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE in_string
279286
280287 fbuffer_append (out_buffer , scratch , 12 );
281288 }
289+
290+ break ;
291+ }
282292 }
293+ } else {
294+ pos ++ ;
283295 }
284-
285- pos += ch_len ;
286296 }
297+ #undef FLUSH_POS
287298
288- if (beg < in_utf8_len ) {
289- fbuffer_append (out_buffer , & in_utf8_str [beg ], in_utf8_len - beg );
299+ if (beg < len ) {
300+ fbuffer_append (out_buffer , & ptr [beg ], len - beg );
290301 }
291302
292- RB_GC_GUARD (in_string );
303+ RB_GC_GUARD (str );
293304}
294305
295306static char * fstrndup (const char * ptr , unsigned long len ) {
@@ -747,7 +758,7 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
747758 break ;
748759 case ENC_CODERANGE_VALID :
749760 if (RB_UNLIKELY (state -> ascii_only )) {
750- convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe );
761+ convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table , state -> script_safe );
751762 } else {
752763 convert_UTF8_to_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table , state -> script_safe );
753764 }
0 commit comments