@@ -25,14 +25,13 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend;
2525 * Everything else (should be UTF-8) is just passed through and
2626 * appended to the result.
2727 */
28- static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE in_string , bool out_ascii_only , bool out_script_safe )
28+ static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE in_string , bool out_script_safe )
2929{
3030 const char * hexdig = "0123456789abcdef" ;
3131 char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
3232
3333 const char * in_utf8_str = RSTRING_PTR (in_string );
3434 unsigned long in_utf8_len = RSTRING_LEN (in_string );
35- bool in_is_ascii_only = rb_enc_str_asciionly_p (in_string );
3635
3736 unsigned long beg = 0 , pos ;
3837
@@ -42,46 +41,183 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
4241 bool should_escape ;
4342
4443 /* UTF-8 decoding */
45- if (in_is_ascii_only ) {
46- ch = in_utf8_str [pos ];
47- ch_len = 1 ;
48- } else {
49- short i ;
50- if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
51- else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
52- else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
53- else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
54- else
55- rb_raise (rb_path2class ("JSON::GeneratorError" ),
56- "source sequence is illegal/malformed utf-8" );
57- if ((pos + ch_len ) > in_utf8_len )
58- rb_raise (rb_path2class ("JSON::GeneratorError" ),
59- "partial character in source, but hit end" );
60- for (i = 1 ; i < ch_len ; i ++ ) {
61- if ((in_utf8_str [pos + i ] & 0xC0 ) != 0x80 ) /* leading 2 bits should be 0b10 */
62- rb_raise (rb_path2class ("JSON::GeneratorError" ),
63- "source sequence is illegal/malformed utf-8" );
64- ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
44+ short i ;
45+ if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
46+ else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
47+ else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
48+ else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
49+ else {
50+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
51+ }
52+
53+ for (i = 1 ; i < ch_len ; i ++ ) {
54+ ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
55+ }
56+
57+ /* JSON policy */
58+ should_escape =
59+ (ch < 0x20 ) ||
60+ (ch == '"' ) ||
61+ (ch == '\\' ) ||
62+ (out_script_safe && (ch == '/' )) ||
63+ (out_script_safe && (ch == 0x2028 )) ||
64+ (out_script_safe && (ch == 0x2029 ));
65+
66+ /* JSON encoding */
67+ if (should_escape ) {
68+ if (pos > beg ) {
69+ fbuffer_append (out_buffer , & in_utf8_str [beg ], pos - beg );
70+ }
71+
72+ beg = pos + ch_len ;
73+ switch (ch ) {
74+ case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
75+ case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
76+ case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
77+ case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
78+ case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
79+ case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
80+ case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
81+ case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
82+ default :
83+ if (ch <= 0xFFFF ) {
84+ scratch [2 ] = hexdig [ch >> 12 ];
85+ scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
86+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
87+ scratch [5 ] = hexdig [ch & 0xf ];
88+ fbuffer_append (out_buffer , scratch , 6 );
89+ } else {
90+ uint16_t hi , lo ;
91+ ch -= 0x10000 ;
92+ hi = 0xD800 + (uint16_t )(ch >> 10 );
93+ lo = 0xDC00 + (uint16_t )(ch & 0x3FF );
94+
95+ scratch [2 ] = hexdig [hi >> 12 ];
96+ scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
97+ scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
98+ scratch [5 ] = hexdig [hi & 0xf ];
99+
100+ scratch [8 ] = hexdig [lo >> 12 ];
101+ scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
102+ scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
103+ scratch [11 ] = hexdig [lo & 0xf ];
104+
105+ fbuffer_append (out_buffer , scratch , 12 );
106+ }
65107 }
66- if (ch > 0x10FFFF )
67- rb_raise (rb_path2class ("JSON::GeneratorError" ),
68- "source sequence is illegal/malformed utf-8" );
69108 }
70109
110+ pos += ch_len ;
111+ }
112+
113+ if (beg < in_utf8_len ) {
114+ fbuffer_append (out_buffer , & in_utf8_str [beg ], in_utf8_len - beg );
115+ }
116+
117+ RB_GC_GUARD (in_string );
118+ }
119+
120+ static void convert_ASCII_to_JSON (FBuffer * out_buffer , VALUE str , bool out_script_safe )
121+ {
122+ const char * hexdig = "0123456789abcdef" ;
123+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
124+
125+ const char * ptr = RSTRING_PTR (str );
126+ unsigned long len = RSTRING_LEN (str );
127+
128+ unsigned long beg = 0 , pos ;
129+
130+ for (pos = 0 ; pos < len ;) {
131+ unsigned char ch = ptr [pos ];
132+ bool should_escape ;
133+
71134 /* JSON policy */
72135 should_escape =
73136 (ch < 0x20 ) ||
74137 (ch == '"' ) ||
75138 (ch == '\\' ) ||
76- (out_ascii_only && (ch > 0x7F )) ||
139+ (out_script_safe && (ch == '/' ));
140+
141+ /* JSON encoding */
142+ if (should_escape ) {
143+ if (pos > beg ) {
144+ fbuffer_append (out_buffer , & ptr [beg ], pos - beg );
145+ }
146+
147+ beg = pos + 1 ;
148+ switch (ch ) {
149+ case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
150+ case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
151+ case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
152+ case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
153+ case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
154+ case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
155+ case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
156+ case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
157+ default :
158+ scratch [2 ] = hexdig [ch >> 12 ];
159+ scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
160+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
161+ scratch [5 ] = hexdig [ch & 0xf ];
162+ fbuffer_append (out_buffer , scratch , 6 );
163+ }
164+ }
165+
166+ pos ++ ;
167+ }
168+
169+ if (beg < len ) {
170+ fbuffer_append (out_buffer , & ptr [beg ], len - beg );
171+ }
172+
173+ RB_GC_GUARD (str );
174+ }
175+
176+ static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE in_string , bool out_script_safe )
177+ {
178+ const char * hexdig = "0123456789abcdef" ;
179+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
180+
181+ const char * in_utf8_str = RSTRING_PTR (in_string );
182+ unsigned long in_utf8_len = RSTRING_LEN (in_string );
183+
184+ unsigned long beg = 0 , pos ;
185+
186+ for (pos = 0 ; pos < in_utf8_len ;) {
187+ uint32_t ch ;
188+ short ch_len ;
189+ bool should_escape ;
190+
191+ /* UTF-8 decoding */
192+ short i ;
193+ if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
194+ else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
195+ else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
196+ else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
197+ else {
198+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
199+ }
200+
201+ for (i = 1 ; i < ch_len ; i ++ ) {
202+ ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
203+ }
204+
205+ /* JSON policy */
206+ should_escape =
207+ (ch < 0x20 ) ||
208+ (ch == '"' ) ||
209+ (ch == '\\' ) ||
210+ (ch > 0x7F ) ||
77211 (out_script_safe && (ch == '/' )) ||
78212 (out_script_safe && (ch == 0x2028 )) ||
79213 (out_script_safe && (ch == 0x2029 ));
80214
81215 /* JSON encoding */
82216 if (should_escape ) {
83- if (pos > beg )
217+ if (pos > beg ) {
84218 fbuffer_append (out_buffer , & in_utf8_str [beg ], pos - beg );
219+ }
220+
85221 beg = pos + ch_len ;
86222 switch (ch ) {
87223 case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
@@ -122,8 +258,11 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
122258
123259 pos += ch_len ;
124260 }
125- if (beg < in_utf8_len )
261+
262+ if (beg < in_utf8_len ) {
126263 fbuffer_append (out_buffer , & in_utf8_str [beg ], in_utf8_len - beg );
264+ }
265+
127266 RB_GC_GUARD (in_string );
128267}
129268
@@ -570,11 +709,27 @@ static int enc_utf8_compatible_p(int enc_idx)
570709
571710static void generate_json_string (FBuffer * buffer , VALUE Vstate , JSON_Generator_State * state , VALUE obj )
572711{
573- fbuffer_append_char (buffer , '"' );
574712 if (!enc_utf8_compatible_p (RB_ENCODING_GET (obj ))) {
575713 obj = rb_str_export_to_enc (obj , rb_utf8_encoding ());
576714 }
577- convert_UTF8_to_JSON (buffer , obj , state -> ascii_only , state -> script_safe );
715+
716+ fbuffer_append_char (buffer , '"' );
717+
718+ switch (rb_enc_str_coderange (obj )) {
719+ case ENC_CODERANGE_7BIT :
720+ convert_ASCII_to_JSON (buffer , obj , state -> script_safe );
721+ break ;
722+ case ENC_CODERANGE_VALID :
723+ if (RB_UNLIKELY (state -> ascii_only )) {
724+ convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe );
725+ } else {
726+ convert_UTF8_to_JSON (buffer , obj , state -> script_safe );
727+ }
728+ break ;
729+ default :
730+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
731+ break ;
732+ }
578733 fbuffer_append_char (buffer , '"' );
579734}
580735
0 commit comments