5
5
*/
6
6
package json .ext ;
7
7
8
+ import java .io .IOException ;
9
+ import java .io .OutputStream ;
10
+ import java .nio .charset .StandardCharsets ;
11
+
8
12
import org .jcodings .Encoding ;
9
13
import org .jcodings .specific .ASCIIEncoding ;
10
14
import org .jcodings .specific .USASCIIEncoding ;
17
21
import org .jruby .util .ByteList ;
18
22
import org .jruby .util .StringSupport ;
19
23
20
- import java . io . IOException ;
21
- import java . io . OutputStream ;
22
- import java . nio . charset . StandardCharsets ;
24
+ import jdk . incubator . vector . ByteVector ;
25
+ import jdk . incubator . vector . VectorSpecies ;
26
+ import json . ext . VectorizedEscapeScanner ;
23
27
24
28
/**
25
29
* An encoder that reads from the given source and outputs its representation
@@ -130,14 +134,22 @@ class StringEncoder extends ByteListTranscoder {
130
134
new byte [] {'0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' ,
131
135
'8' , '9' , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' };
132
136
133
- StringEncoder (boolean scriptSafe ) {
137
+ private StringEncoder (boolean scriptSafe ) {
134
138
this (scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE );
135
139
}
136
140
137
141
StringEncoder (byte [] escapeTable ) {
138
142
this .escapeTable = escapeTable ;
139
143
}
140
144
145
+ public static StringEncoder scriptSafeEncoder () {
146
+ return new StringEncoder (SCRIPT_SAFE_ESCAPE_TABLE );
147
+ }
148
+
149
+ public static StringEncoder basicEncoder () {
150
+ return new StringEncoder (ESCAPE_TABLE );
151
+ }
152
+
141
153
// C: generate_json_string
142
154
void generate (ThreadContext context , RubyString object , OutputStream buffer ) throws IOException {
143
155
object = ensureValidEncoding (context , object );
@@ -198,41 +210,89 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st
198
210
return str ;
199
211
}
200
212
213
+ boolean searchEscape (EscapeScanner .State state ) throws IOException {
214
+ byte [] escapeTable = StringEncoder .this .escapeTable ;
215
+
216
+ while (state .pos < state .len ) {
217
+ state .ch = Byte .toUnsignedInt (state .ptrBytes [state .ptr + state .pos ]);
218
+ int ch_len = escapeTable [state .ch ];
219
+
220
+ if (ch_len > 0 ) {
221
+ return true ;
222
+ }
223
+
224
+ state .pos ++;
225
+ }
226
+
227
+ return false ;
228
+ }
229
+
230
+ void encodeBasic (ByteList src ) throws IOException {
231
+ EscapeScanner .State state = new EscapeScanner .State ();
232
+ state .ptrBytes = src .unsafeBytes ();
233
+ state .ptr = src .begin ();
234
+ state .len = src .realSize ();
235
+ state .beg = 0 ;
236
+ state .pos = 0 ;
237
+
238
+ byte [] hexdig = HEX ;
239
+ byte [] scratch = aux ;
240
+
241
+ EscapeScanner scanner = EscapeScanner .basicScanner ();
242
+
243
+ while (scanner .scan (state )) {
244
+ int ch = Byte .toUnsignedInt (state .ptrBytes [state .ptr + state .pos ]);
245
+ state .beg = state .pos = flushPos (state .pos , state .beg , state .ptrBytes , state .ptr , 1 );
246
+ escapeAscii (ch , scratch , hexdig );
247
+ }
248
+
249
+ if (state .beg < state .len ) {
250
+ append (state .ptrBytes , state .ptr + state .beg , state .len - state .beg );
251
+ }
252
+ }
253
+
201
254
// C: convert_UTF8_to_JSON
202
255
void encode (ByteList src ) throws IOException {
256
+ if (this .escapeTable == StringEncoder .ESCAPE_TABLE ) {
257
+ encodeBasic (src );
258
+ return ;
259
+ }
260
+
203
261
byte [] hexdig = HEX ;
204
262
byte [] scratch = aux ;
205
263
byte [] escapeTable = this .escapeTable ;
206
264
207
- byte [] ptrBytes = src .unsafeBytes ();
208
- int ptr = src .begin ();
209
- int len = src .realSize ();
210
-
211
- int beg = 0 ;
212
- int pos = 0 ;
213
-
214
- while (pos < len ) {
215
- int ch = Byte .toUnsignedInt (ptrBytes [ptr + pos ]);
265
+ EscapeScanner .State state = new EscapeScanner .State ();
266
+ state .ptrBytes = src .unsafeBytes ();
267
+ state .ptr = src .begin ();
268
+ state .len = src .realSize ();
269
+ state .beg = 0 ;
270
+ state .pos = 0 ;
271
+
272
+ while (searchEscape (state )) {
273
+ // We found an escape character, so we need to flush up to this point
274
+ // and then handle the escape character.
275
+ state .beg = flushPos (state .pos , state .beg , state .ptrBytes , state .ptr , 0 );
276
+ int ch = Byte .toUnsignedInt (state .ptrBytes [state .ptr + state .pos ]);
216
277
int ch_len = escapeTable [ch ];
217
- /* JSON encoding */
218
278
219
279
if (ch_len > 0 ) {
220
280
switch (ch_len ) {
221
281
case 9 : {
222
- beg = pos = flushPos (pos , beg , ptrBytes , ptr , 1 );
282
+ state . beg = state . pos = flushPos (state . pos , state . beg , state . ptrBytes , state . ptr , 1 );
223
283
escapeAscii (ch , scratch , hexdig );
224
284
break ;
225
285
}
226
286
case 11 : {
227
- int b2 = Byte .toUnsignedInt (ptrBytes [ptr + pos + 1 ]);
287
+ int b2 = Byte .toUnsignedInt (state . ptrBytes [state . ptr + state . pos + 1 ]);
228
288
if (b2 == 0x80 ) {
229
- int b3 = Byte .toUnsignedInt (ptrBytes [ptr + pos + 2 ]);
289
+ int b3 = Byte .toUnsignedInt (state . ptrBytes [state . ptr + state . pos + 2 ]);
230
290
if (b3 == 0xA8 ) {
231
- beg = pos = flushPos (pos , beg , ptrBytes , ptr , 3 );
291
+ state . beg = state . pos = flushPos (state . pos , state . beg , state . ptrBytes , state . ptr , 3 );
232
292
append (BACKSLASH_U2028 , 0 , 6 );
233
293
break ;
234
294
} else if (b3 == 0xA9 ) {
235
- beg = pos = flushPos (pos , beg , ptrBytes , ptr , 3 );
295
+ state . beg = state . pos = flushPos (state . pos , state . beg , state . ptrBytes , state . ptr , 3 );
236
296
append (BACKSLASH_U2029 , 0 , 6 );
237
297
break ;
238
298
}
@@ -241,16 +301,55 @@ void encode(ByteList src) throws IOException {
241
301
// fallthrough
242
302
}
243
303
default :
244
- pos += ch_len ;
304
+ state . pos += ch_len ;
245
305
break ;
246
306
}
247
307
} else {
248
- pos ++;
308
+ // This should be unreachable.
309
+ state .pos ++;
249
310
}
250
311
}
251
312
252
- if (beg < len ) {
253
- append (ptrBytes , ptr + beg , len - beg );
313
+ // while (state.pos < state.len) {
314
+ // int ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]);
315
+ // int ch_len = escapeTable[ch];
316
+ // /* JSON encoding */
317
+
318
+ // if (ch_len > 0) {
319
+ // switch (ch_len) {
320
+ // case 9: {
321
+ // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 1);
322
+ // escapeAscii(ch, scratch, hexdig);
323
+ // break;
324
+ // }
325
+ // case 11: {
326
+ // int b2 = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos + 1]);
327
+ // if (b2 == 0x80) {
328
+ // int b3 = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos + 2]);
329
+ // if (b3 == 0xA8) {
330
+ // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 3);
331
+ // append(BACKSLASH_U2028, 0, 6);
332
+ // break;
333
+ // } else if (b3 == 0xA9) {
334
+ // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 3);
335
+ // append(BACKSLASH_U2029, 0, 6);
336
+ // break;
337
+ // }
338
+ // }
339
+ // ch_len = 3;
340
+ // // fallthrough
341
+ // }
342
+ // default:
343
+ // state.pos += ch_len;
344
+ // break;
345
+ // }
346
+ // } else {
347
+ // state.pos++;
348
+ // }
349
+ // }
350
+
351
+ if (state .beg < state .len ) {
352
+ append (state .ptrBytes , state .ptr + state .beg , state .len - state .beg );
254
353
}
255
354
}
256
355
0 commit comments