@@ -103,20 +103,20 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
103103static const unsigned char CHAR_LENGTH_MASK = 7 ;
104104static const unsigned char ESCAPE_MASK = 8 ;
105105
106- static const unsigned char escape_table [256 ] = {
107- // ASCII Control Characters
108- 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
109- 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
110- // ASCII Characters
111- 0 , 0 , 9 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // '"'
112- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
113- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
114- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , // '\\'
115- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
116- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
117- };
106+ typedef struct _search_state {
107+ const char * ptr ;
108+ const char * end ;
109+ const char * cursor ;
110+ FBuffer * buffer ;
111+ } search_state ;
118112
119- static const unsigned char ascii_only_escape_table [256 ] = {
113+ static inline void search_flush (search_state * search )
114+ {
115+ fbuffer_append (search -> buffer , search -> cursor , search -> ptr - search -> cursor );
116+ search -> cursor = search -> ptr ;
117+ }
118+
119+ static const unsigned char escape_table_basic [256 ] = {
120120 // ASCII Control Characters
121121 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
122122 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
@@ -127,20 +127,105 @@ static const unsigned char ascii_only_escape_table[256] = {
127127 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , // '\\'
128128 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
129129 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
130- // Continuation byte
131- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
132- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
133- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
134- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
135- // First byte of a 2-byte code point
136- 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
137- 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
138- // First byte of a 3-byte code point
139- 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 ,
140- //First byte of a 4+ byte code point
141- 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
142130};
143131
132+ static inline unsigned char search_escape_basic (search_state * search )
133+ {
134+ while (search -> ptr < search -> end ) {
135+ if (RB_UNLIKELY (escape_table_basic [(const unsigned char )* search -> ptr ])) {
136+ search_flush (search );
137+ return 1 ;
138+ } else {
139+ search -> ptr ++ ;
140+ }
141+ }
142+ search_flush (search );
143+ return 0 ;
144+ }
145+
146+ static inline void escape_UTF8_char_basic (search_state * search ) {
147+ const unsigned char ch = (unsigned char )* search -> ptr ;
148+ switch (ch ) {
149+ case '"' : fbuffer_append (search -> buffer , "\\\"" , 2 ); break ;
150+ case '\\' : fbuffer_append (search -> buffer , "\\\\" , 2 ); break ;
151+ case '/' : fbuffer_append (search -> buffer , "\\/" , 2 ); break ;
152+ case '\b' : fbuffer_append (search -> buffer , "\\b" , 2 ); break ;
153+ case '\f' : fbuffer_append (search -> buffer , "\\f" , 2 ); break ;
154+ case '\n' : fbuffer_append (search -> buffer , "\\n" , 2 ); break ;
155+ case '\r' : fbuffer_append (search -> buffer , "\\r" , 2 ); break ;
156+ case '\t' : fbuffer_append (search -> buffer , "\\t" , 2 ); break ;
157+ default : {
158+ const char * hexdig = "0123456789abcdef" ;
159+ char scratch [6 ] = { '\\' , 'u' , '0' , '0' , 0 , 0 };
160+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
161+ scratch [5 ] = hexdig [ch & 0xf ];
162+ fbuffer_append (search -> buffer , scratch , 6 );
163+ break ;
164+ }
165+ }
166+ search -> ptr ++ ;
167+ search -> cursor = search -> ptr ;
168+ }
169+
170+ /* Converts in_string to a JSON string (without the wrapping '"'
171+ * characters) in FBuffer out_buffer.
172+ *
173+ * Character are JSON-escaped according to:
174+ *
175+ * - Always: ASCII control characters (0x00-0x1F), dquote, and
176+ * backslash.
177+ *
178+ * - If out_ascii_only: non-ASCII characters (>0x7F)
179+ *
180+ * - If script_safe: forwardslash (/), line separator (U+2028), and
181+ * paragraph separator (U+2029)
182+ *
183+ * Everything else (should be UTF-8) is just passed through and
184+ * appended to the result.
185+ */
186+ static inline void convert_UTF8_to_JSON (search_state * search )
187+ {
188+ while (search_escape_basic (search )) {
189+ escape_UTF8_char_basic (search );
190+ }
191+ }
192+
193+ static inline void escape_UTF8_char (search_state * search , unsigned char ch_len ) {
194+ const unsigned char ch = (unsigned char )* search -> ptr ;
195+ switch (ch_len ) {
196+ case 1 : {
197+ switch (ch ) {
198+ case '"' : fbuffer_append (search -> buffer , "\\\"" , 2 ); break ;
199+ case '\\' : fbuffer_append (search -> buffer , "\\\\" , 2 ); break ;
200+ case '/' : fbuffer_append (search -> buffer , "\\/" , 2 ); break ;
201+ case '\b' : fbuffer_append (search -> buffer , "\\b" , 2 ); break ;
202+ case '\f' : fbuffer_append (search -> buffer , "\\f" , 2 ); break ;
203+ case '\n' : fbuffer_append (search -> buffer , "\\n" , 2 ); break ;
204+ case '\r' : fbuffer_append (search -> buffer , "\\r" , 2 ); break ;
205+ case '\t' : fbuffer_append (search -> buffer , "\\t" , 2 ); break ;
206+ default : {
207+ const char * hexdig = "0123456789abcdef" ;
208+ char scratch [6 ] = { '\\' , 'u' , '0' , '0' , 0 , 0 };
209+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
210+ scratch [5 ] = hexdig [ch & 0xf ];
211+ fbuffer_append (search -> buffer , scratch , 6 );
212+ break ;
213+ }
214+ }
215+ break ;
216+ }
217+ case 3 : {
218+ if (search -> ptr [2 ] & 1 ) {
219+ fbuffer_append (search -> buffer , "\\u2029" , 6 );
220+ } else {
221+ fbuffer_append (search -> buffer , "\\u2028" , 6 );
222+ }
223+ break ;
224+ }
225+ }
226+ search -> cursor = (search -> ptr += ch_len );
227+ }
228+
144229static const unsigned char script_safe_escape_table [256 ] = {
145230 // ASCII Control Characters
146231 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
@@ -166,25 +251,11 @@ static const unsigned char script_safe_escape_table[256] = {
166251 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
167252};
168253
169-
170- typedef struct _search_state {
171- const char * ptr ;
172- const char * end ;
173- const char * cursor ;
174- FBuffer * buffer ;
175- } search_state ;
176-
177- static inline void search_flush (search_state * search )
178- {
179- fbuffer_append (search -> buffer , search -> cursor , search -> ptr - search -> cursor );
180- search -> cursor = search -> ptr ;
181- }
182-
183- static inline unsigned char search_escape (search_state * search , const unsigned char escape_table [256 ])
254+ static inline unsigned char search_script_safe_escape (search_state * search )
184255{
185256 while (search -> ptr < search -> end ) {
186257 unsigned char ch = (unsigned char )* search -> ptr ;
187- unsigned char ch_len = escape_table [ch ];
258+ unsigned char ch_len = script_safe_escape_table [ch ];
188259
189260 if (RB_UNLIKELY (ch_len )) {
190261 if (ch_len & ESCAPE_MASK ) {
@@ -208,66 +279,39 @@ static inline unsigned char search_escape(search_state *search, const unsigned c
208279 return 0 ;
209280}
210281
211- static inline void fast_escape_UTF8_char (search_state * search , unsigned char ch_len ) {
212- const unsigned char ch = (unsigned char )* search -> ptr ;
213- switch (ch_len ) {
214- case 1 : {
215- switch (ch ) {
216- case '"' : fbuffer_append (search -> buffer , "\\\"" , 2 ); break ;
217- case '\\' : fbuffer_append (search -> buffer , "\\\\" , 2 ); break ;
218- case '/' : fbuffer_append (search -> buffer , "\\/" , 2 ); break ;
219- case '\b' : fbuffer_append (search -> buffer , "\\b" , 2 ); break ;
220- case '\f' : fbuffer_append (search -> buffer , "\\f" , 2 ); break ;
221- case '\n' : fbuffer_append (search -> buffer , "\\n" , 2 ); break ;
222- case '\r' : fbuffer_append (search -> buffer , "\\r" , 2 ); break ;
223- case '\t' : fbuffer_append (search -> buffer , "\\t" , 2 ); break ;
224- default : {
225- const char * hexdig = "0123456789abcdef" ;
226- char scratch [6 ] = { '\\' , 'u' , '0' , '0' , 0 , 0 };
227- scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
228- scratch [5 ] = hexdig [ch & 0xf ];
229- fbuffer_append (search -> buffer , scratch , 6 );
230- break ;
231- }
232- }
233- break ;
234- }
235- case 3 : {
236- if (search -> ptr [2 ] & 1 ) {
237- fbuffer_append (search -> buffer , "\\u2029" , 6 );
238- } else {
239- fbuffer_append (search -> buffer , "\\u2028" , 6 );
240- }
241- break ;
242- }
243- }
244- search -> cursor = (search -> ptr += ch_len );
245- }
246-
247- /* Converts in_string to a JSON string (without the wrapping '"'
248- * characters) in FBuffer out_buffer.
249- *
250- * Character are JSON-escaped according to:
251- *
252- * - Always: ASCII control characters (0x00-0x1F), dquote, and
253- * backslash.
254- *
255- * - If out_ascii_only: non-ASCII characters (>0x7F)
256- *
257- * - If script_safe: forwardslash (/), line separator (U+2028), and
258- * paragraph separator (U+2029)
259- *
260- * Everything else (should be UTF-8) is just passed through and
261- * appended to the result.
262- */
263- static inline void convert_UTF8_to_JSON (search_state * search , const unsigned char escape_table [256 ])
282+ static void convert_UTF8_to_script_safe_JSON (search_state * search )
264283{
265284 unsigned char ch_len ;
266- while ((ch_len = search_escape (search , escape_table ))) {
267- fast_escape_UTF8_char (search , ch_len );
285+ while ((ch_len = search_script_safe_escape (search ))) {
286+ escape_UTF8_char (search , ch_len );
268287 }
269288}
270289
290+ static const unsigned char ascii_only_escape_table [256 ] = {
291+ // ASCII Control Characters
292+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
293+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
294+ // ASCII Characters
295+ 0 , 0 , 9 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // '"'
296+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
297+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
298+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , // '\\'
299+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
300+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
301+ // Continuation byte
302+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
303+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
304+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
305+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
306+ // First byte of a 2-byte code point
307+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
308+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
309+ // First byte of a 3-byte code point
310+ 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 ,
311+ //First byte of a 4+ byte code point
312+ 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
313+ };
314+
271315static inline unsigned char search_ascii_only_escape (search_state * search , const unsigned char escape_table [256 ])
272316{
273317 while (search -> ptr < search -> end ) {
@@ -934,8 +978,10 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
934978 case ENC_CODERANGE_VALID :
935979 if (RB_UNLIKELY (state -> ascii_only )) {
936980 convert_UTF8_to_ASCII_only_JSON (& search , state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
981+ } else if (RB_UNLIKELY (state -> script_safe )) {
982+ convert_UTF8_to_script_safe_JSON (& search );
937983 } else {
938- convert_UTF8_to_JSON (& search , state -> script_safe ? script_safe_escape_table : escape_table );
984+ convert_UTF8_to_JSON (& search );
939985 }
940986 break ;
941987 default :
0 commit comments