@@ -96,6 +96,74 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
9696 raise_generator_error_str (invalid_object , str );
9797}
9898
99+ // 0 - single byte char that don't need to be escaped.
100+ // (x | 8) - char that needs to be escaped.
101+ static const unsigned char CHAR_LENGTH_MASK = 7 ;
102+ static const unsigned char ESCAPE_MASK = 8 ;
103+
104+ static const unsigned char escape_table [256 ] = {
105+ // ASCII Control Characters
106+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
107+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
108+ // ASCII Characters
109+ 0 , 0 , 9 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // '"'
110+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
111+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
112+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , // '\\'
113+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
114+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
115+ };
116+
117+ static const unsigned char ascii_only_escape_table [256 ] = {
118+ // ASCII Control Characters
119+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
120+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
121+ // ASCII Characters
122+ 0 , 0 , 9 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // '"'
123+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
124+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
125+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , // '\\'
126+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
127+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
128+ // Continuation byte
129+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
130+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
131+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
132+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
133+ // First byte of a 2-byte code point
134+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
135+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
136+ // First byte of a 3-byte code point
137+ 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 ,
138+ //First byte of a 4+ byte code point
139+ 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
140+ };
141+
142+ static const unsigned char script_safe_escape_table [256 ] = {
143+ // ASCII Control Characters
144+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
145+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
146+ // ASCII Characters
147+ 0 , 0 , 9 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , // '"' and '/'
148+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
149+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
150+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , // '\\'
151+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
152+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
153+ // Continuation byte
154+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
155+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
156+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
157+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
158+ // First byte of a 2-byte code point
159+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
160+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
161+ // First byte of a 3-byte code point
162+ 3 , 3 ,11 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , // 0xE2 is the start of \u2028 and \u2029
163+ //First byte of a 4+ byte code point
164+ 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
165+ };
166+
99167/* Converts in_string to a JSON string (without the wrapping '"'
100168 * characters) in FBuffer out_buffer.
101169 *
@@ -106,13 +174,13 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
106174 *
107175 * - If out_ascii_only: non-ASCII characters (>0x7F)
108176 *
109- * - If out_script_safe : forwardslash, line separator (U+2028), and
177+ * - If script_safe : forwardslash (/) , line separator (U+2028), and
110178 * paragraph separator (U+2029)
111179 *
112180 * Everything else (should be UTF-8) is just passed through and
113181 * appended to the result.
114182 */
115- static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE str , const char escape_table [256 ], bool out_script_safe )
183+ static inline void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE str , const unsigned char escape_table [256 ])
116184{
117185 const char * hexdig = "0123456789abcdef" ;
118186 char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
@@ -131,7 +199,7 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca
131199
132200 if (RB_UNLIKELY (ch_len )) {
133201 switch (ch_len ) {
134- case 1 : {
202+ case 9 : {
135203 FLUSH_POS (1 );
136204 switch (ch ) {
137205 case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
@@ -153,9 +221,9 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca
153221 }
154222 break ;
155223 }
156- case 3 : {
224+ case 11 : {
157225 unsigned char b2 = ptr [pos + 1 ];
158- if (RB_UNLIKELY (out_script_safe && ch == 0xE2 && b2 == 0x80 )) {
226+ if (RB_UNLIKELY (b2 == 0x80 )) {
159227 unsigned char b3 = ptr [pos + 2 ];
160228 if (b3 == 0xA8 ) {
161229 FLUSH_POS (3 );
@@ -167,6 +235,7 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca
167235 break ;
168236 }
169237 }
238+ ch_len = 3 ;
170239 // fallthrough
171240 }
172241 default :
@@ -186,104 +255,7 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca
186255 RB_GC_GUARD (str );
187256}
188257
189- static const char escape_table [256 ] = {
190- // ASCII Control Characters
191- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
192- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
193- // ASCII Characters
194- 0 ,0 ,1 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // '"'
195- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
196- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
197- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,0 , // '\\'
198- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
199- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
200- // Continuation byte
201- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
202- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
203- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
204- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
205- // First byte of a 2-byte code point
206- 2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,
207- 2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,
208- // First byte of a 4-byte code point
209- 3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,
210- //First byte of a 4+byte code point
211- 4 ,4 ,4 ,4 ,4 ,4 ,4 ,4 ,5 ,5 ,5 ,5 ,6 ,6 ,1 ,1 ,
212- };
213-
214- static const char script_safe_escape_table [256 ] = {
215- // ASCII Control Characters
216- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
217- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
218- // ASCII Characters
219- 0 ,0 ,1 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 , // '"' and '/'
220- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
221- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
222- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,0 , // '\\'
223- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
224- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
225- // Continuation byte
226- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
227- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
228- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
229- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
230- // First byte of a 2-byte code point
231- 2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,
232- 2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,
233- // First byte of a 4-byte code point
234- 3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,
235- //First byte of a 4+byte code point
236- 4 ,4 ,4 ,4 ,4 ,4 ,4 ,4 ,5 ,5 ,5 ,5 ,6 ,6 ,1 ,1 ,
237- };
238-
239- static void convert_ASCII_to_JSON (FBuffer * out_buffer , VALUE str , const char escape_table [256 ])
240- {
241- const char * hexdig = "0123456789abcdef" ;
242- char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
243-
244- const char * ptr = RSTRING_PTR (str );
245- unsigned long len = RSTRING_LEN (str );
246-
247- unsigned long beg = 0 , pos ;
248-
249- for (pos = 0 ; pos < len ;) {
250- unsigned char ch = ptr [pos ];
251- /* JSON encoding */
252- if (escape_table [ch ]) {
253- if (pos > beg ) {
254- fbuffer_append (out_buffer , & ptr [beg ], pos - beg );
255- }
256-
257- beg = pos + 1 ;
258- switch (ch ) {
259- case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
260- case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
261- case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
262- case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
263- case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
264- case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
265- case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
266- case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
267- default :
268- scratch [2 ] = '0' ;
269- scratch [3 ] = '0' ;
270- scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
271- scratch [5 ] = hexdig [ch & 0xf ];
272- fbuffer_append (out_buffer , scratch , 6 );
273- }
274- }
275-
276- pos ++ ;
277- }
278-
279- if (beg < len ) {
280- fbuffer_append (out_buffer , & ptr [beg ], len - beg );
281- }
282-
283- RB_GC_GUARD (str );
284- }
285-
286- static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE str , const char escape_table [256 ], bool out_script_safe )
258+ static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE str , const unsigned char escape_table [256 ])
287259{
288260 const char * hexdig = "0123456789abcdef" ;
289261 char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
@@ -301,7 +273,7 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons
301273
302274 if (RB_UNLIKELY (ch_len )) {
303275 switch (ch_len ) {
304- case 1 : {
276+ case 1 | ESCAPE_MASK : {
305277 FLUSH_POS (1 );
306278 switch (ch ) {
307279 case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
@@ -325,6 +297,8 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons
325297 }
326298 default : {
327299 uint32_t wchar = 0 ;
300+ ch_len = ch_len & CHAR_LENGTH_MASK ;
301+
328302 switch (ch_len ) {
329303 case 2 :
330304 wchar = ptr [pos ] & 0x1F ;
@@ -935,13 +909,11 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
935909
936910 switch (rb_enc_str_coderange (obj )) {
937911 case ENC_CODERANGE_7BIT :
938- convert_ASCII_to_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table );
939- break ;
940912 case ENC_CODERANGE_VALID :
941913 if (RB_UNLIKELY (state -> ascii_only )) {
942- convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table , state -> script_safe );
914+ convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
943915 } else {
944- convert_UTF8_to_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table , state -> script_safe );
916+ convert_UTF8_to_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table );
945917 }
946918 break ;
947919 default :
0 commit comments