Skip to content

Commit ae83838

Browse files
authored
Merge pull request #835 from samyron/sm/use-segmented-outputstream-and-swar
Java Extension Optimizations
2 parents d3f7f04 + 43a8a83 commit ae83838

8 files changed

+275
-10
lines changed

Rakefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,11 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
116116
file JRUBY_GENERATOR_JAR => :compile do
117117
cd 'java/src' do
118118
generator_classes = FileList[
119-
"json/ext/ByteList*.class",
119+
"json/ext/*ByteList*.class",
120120
"json/ext/OptionsReader*.class",
121121
"json/ext/Generator*.class",
122122
"json/ext/RuntimeInfo*.class",
123-
"json/ext/StringEncoder*.class",
123+
"json/ext/*StringEncoder*.class",
124124
"json/ext/Utils*.class"
125125
]
126126
sh 'jar', 'cf', File.basename(JRUBY_GENERATOR_JAR), *generator_classes
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package json.ext;
2+
3+
import java.io.OutputStream;
4+
5+
import org.jcodings.Encoding;
6+
import org.jruby.util.ByteList;
7+
8+
abstract class AbstractByteListDirectOutputStream extends OutputStream {
9+
10+
private static final String PROP_SEGMENTED_BUFFER = "jruby.json.useSegmentedOutputStream";
11+
private static final String PROP_SEGMENTED_BUFFER_DEFAULT = "true";
12+
13+
private static final boolean USE_SEGMENTED_BUFFER;
14+
15+
static {
16+
String useSegmentedOutputStream = System.getProperty(PROP_SEGMENTED_BUFFER, PROP_SEGMENTED_BUFFER_DEFAULT);
17+
USE_SEGMENTED_BUFFER = Boolean.parseBoolean(useSegmentedOutputStream);
18+
// XXX Is there a logger we can use here?
19+
// System.out.println("Using segmented output stream: " + USE_SEGMENTED_BUFFER);
20+
}
21+
22+
public static AbstractByteListDirectOutputStream create(int estimatedSize) {
23+
if (USE_SEGMENTED_BUFFER) {
24+
return new SegmentedByteListDirectOutputStream(estimatedSize);
25+
} else {
26+
return new ByteListDirectOutputStream(estimatedSize);
27+
}
28+
}
29+
30+
public abstract ByteList toByteListDirect(Encoding encoding);
31+
}

java/src/json/ext/ByteListDirectOutputStream.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
import org.jruby.util.ByteList;
55

66
import java.io.IOException;
7-
import java.io.OutputStream;
87
import java.util.Arrays;
98

10-
public class ByteListDirectOutputStream extends OutputStream {
9+
public class ByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
1110
private byte[] buffer;
1211
private int length;
1312

java/src/json/ext/Generator.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import org.jruby.util.IOOutputStream;
2727
import org.jruby.util.TypeConverter;
2828

29+
import json.ext.ByteListDirectOutputStream;
30+
2931
import java.io.BufferedOutputStream;
3032
import java.io.IOException;
3133
import java.io.OutputStream;
@@ -232,7 +234,7 @@ public StringEncoder getStringEncoder(ThreadContext context) {
232234
GeneratorState state = getState(context);
233235
stringEncoder = state.asciiOnly() ?
234236
new StringEncoderAsciiOnly(state.scriptSafe()) :
235-
new StringEncoder(state.scriptSafe());
237+
(state.scriptSafe()) ? new StringEncoder(state.scriptSafe()) : StringEncoder.createBasicEncoder();
236238
}
237239
return stringEncoder;
238240
}
@@ -252,7 +254,7 @@ int guessSize(ThreadContext context, Session session, T object) {
252254
}
253255

254256
RubyString generateNew(ThreadContext context, Session session, T object) {
255-
ByteListDirectOutputStream buffer = new ByteListDirectOutputStream(guessSize(context, session, object));
257+
AbstractByteListDirectOutputStream buffer = AbstractByteListDirectOutputStream.create(guessSize(context, session, object));
256258
generateToBuffer(context, session, object, buffer);
257259
return RubyString.newString(context.runtime, buffer.toByteListDirect(UTF8Encoding.INSTANCE));
258260
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package json.ext;
2+
3+
import java.io.IOException;
4+
import java.nio.ByteBuffer;
5+
6+
import org.jruby.util.ByteList;
7+
8+
public class SWARBasicStringEncoder extends StringEncoder {
9+
10+
public SWARBasicStringEncoder() {
11+
super(ESCAPE_TABLE);
12+
}
13+
14+
@Override
15+
void encode(ByteList src) throws IOException {
16+
byte[] hexdig = HEX;
17+
byte[] scratch = aux;
18+
19+
byte[] ptrBytes = src.unsafeBytes();
20+
int ptr = src.begin();
21+
int len = src.realSize();
22+
23+
int beg = 0;
24+
int pos = 0;
25+
26+
ByteBuffer bb = ByteBuffer.wrap(ptrBytes, 0, len);
27+
while (pos + 8 <= len) {
28+
long x = bb.getLong(ptr + pos);
29+
if (skipChunk(x)) {
30+
pos += 8;
31+
continue;
32+
}
33+
int chunkEnd = pos + 8;
34+
while (pos < chunkEnd) {
35+
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
36+
int ch_len = ESCAPE_TABLE[ch];
37+
if (ch_len > 0) {
38+
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
39+
escapeAscii(ch, scratch, hexdig);
40+
} else {
41+
pos++;
42+
}
43+
}
44+
}
45+
46+
if (pos + 4 <= len) {
47+
int x = bb.getInt(ptr + pos);
48+
if (skipChunk(x)) {
49+
pos += 4;
50+
}
51+
}
52+
53+
while (pos < len) {
54+
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
55+
int ch_len = ESCAPE_TABLE[ch];
56+
if (ch_len > 0) {
57+
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
58+
escapeAscii(ch, scratch, hexdig);
59+
} else {
60+
pos++;
61+
}
62+
}
63+
64+
if (beg < len) {
65+
append(ptrBytes, ptr + beg, len - beg);
66+
}
67+
}
68+
69+
private boolean skipChunk(long x) {
70+
long is_ascii = 0x8080808080808080L & ~x;
71+
long xor2 = x ^ 0x0202020202020202L;
72+
long lt32_or_eq34 = xor2 - 0x2121212121212121L;
73+
long sub92 = x ^ 0x5C5C5C5C5C5C5C5CL;
74+
long eq92 = (sub92 - 0x0101010101010101L);
75+
return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
76+
}
77+
78+
private boolean skipChunk(int x) {
79+
int is_ascii = 0x80808080 & ~x;
80+
int xor2 = x ^ 0x02020202;
81+
int lt32_or_eq34 = xor2 - 0x21212121;
82+
int sub92 = x ^ 0x5C5C5C5C;
83+
int eq92 = (sub92 - 0x01010101);
84+
return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
85+
}
86+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package json.ext;
2+
3+
import org.jcodings.Encoding;
4+
import org.jruby.util.ByteList;
5+
6+
import java.io.IOException;
7+
8+
public class SegmentedByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
9+
private static final int DEFAULT_CAPACITY = 1024;
10+
11+
private int totalLength;
12+
// Why 21? The minimum segment size is 1024 bytes. If we double the segment size each time
13+
// we need a new segment, we only need 21 segments to reach the maximum array size in Java.
14+
private byte[][] segments = new byte[21][];
15+
private int currentSegmentIndex;
16+
private int currentSegmentLength;
17+
private byte[] currentSegment;
18+
19+
SegmentedByteListDirectOutputStream(int size) {
20+
currentSegment = new byte[Math.max(size, DEFAULT_CAPACITY)];
21+
segments[0] = currentSegment;
22+
}
23+
24+
public ByteList toByteListDirect(Encoding encoding) {
25+
byte[] buffer = new byte[totalLength];
26+
int pos = 0;
27+
// We handle the current segment separately.
28+
for (int i = 0; i < currentSegmentIndex; i++) {
29+
byte[] segment = segments[i];
30+
System.arraycopy(segment, 0, buffer, pos, segment.length);
31+
pos += segment.length;
32+
}
33+
System.arraycopy(currentSegment, 0, buffer, pos, currentSegmentLength);
34+
return new ByteList(buffer, 0, totalLength, encoding, false);
35+
}
36+
37+
@Override
38+
public void write(int b) throws IOException {
39+
if (currentSegmentLength == currentSegment.length) {
40+
if (totalLength + 1 < 0) {
41+
throw new IOException("Total length exceeds maximum length of an array.");
42+
}
43+
currentSegmentIndex++;
44+
int capacity = currentSegment.length * 2;
45+
capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
46+
currentSegment = new byte[capacity];
47+
currentSegmentLength = 0;
48+
segments[currentSegmentIndex] = currentSegment;
49+
}
50+
currentSegment[currentSegmentLength++] = (byte) b;
51+
totalLength++;
52+
}
53+
54+
@Override
55+
public void write(byte[] bytes, int start, int length) throws IOException {
56+
int remaining = length;
57+
58+
while (remaining > 0) {
59+
if (currentSegmentLength == currentSegment.length) {
60+
if (totalLength + remaining < 0) {
61+
throw new IOException("Total length exceeds maximum length of an array.");
62+
}
63+
currentSegmentIndex++;
64+
int capacity = currentSegment.length << 1;
65+
capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
66+
capacity = (capacity < remaining) ? remaining : capacity;
67+
currentSegment = new byte[capacity];
68+
currentSegmentLength = 0;
69+
segments[currentSegmentIndex] = currentSegment;
70+
}
71+
int toWrite = Math.min(remaining, currentSegment.length - currentSegmentLength);
72+
System.arraycopy(bytes, start, currentSegment, currentSegmentLength, toWrite);
73+
currentSegmentLength += toWrite;
74+
start += toWrite;
75+
remaining -= toWrite;
76+
}
77+
totalLength += length;
78+
}
79+
80+
@Override
81+
public void write(byte[] bytes) throws IOException {
82+
write(bytes, 0, bytes.length);
83+
}
84+
}

java/src/json/ext/StringEncoder.java

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
*/
66
package json.ext;
77

8+
import java.io.IOException;
9+
import java.io.OutputStream;
10+
import java.nio.charset.StandardCharsets;
11+
812
import org.jcodings.Encoding;
913
import org.jcodings.specific.ASCIIEncoding;
1014
import org.jcodings.specific.USASCIIEncoding;
@@ -17,10 +21,6 @@
1721
import org.jruby.util.ByteList;
1822
import org.jruby.util.StringSupport;
1923

20-
import java.io.IOException;
21-
import java.io.OutputStream;
22-
import java.nio.charset.StandardCharsets;
23-
2424
/**
2525
* An encoder that reads from the given source and outputs its representation
2626
* to another ByteList. The source string is fully checked for UTF-8 validity,
@@ -114,6 +114,17 @@ class StringEncoder extends ByteListTranscoder {
114114

115115
protected final byte[] escapeTable;
116116

117+
private static final String USE_SWAR_BASIC_ENCODER_PROP = "jruby.json.useSWARBasicEncoder";
118+
private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true";
119+
private static final boolean USE_BASIC_SWAR_ENCODER;
120+
121+
static {
122+
USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean(
123+
System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT));
124+
// XXX Is there a logger we can use here?
125+
// System.out.println("Using SWAR basic encoder: " + USE_BASIC_SWAR_ENCODER);
126+
}
127+
117128
OutputStream out;
118129

119130
// Escaped characters will reuse this array, to avoid new allocations
@@ -138,6 +149,14 @@ class StringEncoder extends ByteListTranscoder {
138149
this.escapeTable = escapeTable;
139150
}
140151

152+
static StringEncoder createBasicEncoder() {
153+
if (USE_BASIC_SWAR_ENCODER) {
154+
return new SWARBasicStringEncoder();
155+
} else {
156+
return new StringEncoder(false);
157+
}
158+
}
159+
141160
// C: generate_json_string
142161
void generate(ThreadContext context, RubyString object, OutputStream buffer) throws IOException {
143162
object = ensureValidEncoding(context, object);
@@ -198,8 +217,40 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st
198217
return str;
199218
}
200219

220+
void encodeBasic(ByteList src) throws IOException {
221+
byte[] hexdig = HEX;
222+
byte[] scratch = aux;
223+
224+
byte[] ptrBytes = src.unsafeBytes();
225+
int ptr = src.begin();
226+
int len = src.realSize();
227+
228+
int beg = 0;
229+
int pos = 0;
230+
231+
while (pos < len) {
232+
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
233+
int ch_len = ESCAPE_TABLE[ch];
234+
if (ch_len > 0) {
235+
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
236+
escapeAscii(ch, scratch, hexdig);
237+
} else {
238+
pos++;
239+
}
240+
}
241+
242+
if (beg < len) {
243+
append(ptrBytes, ptr + beg, len - beg);
244+
}
245+
}
246+
201247
// C: convert_UTF8_to_JSON
202248
void encode(ByteList src) throws IOException {
249+
if (escapeTable == ESCAPE_TABLE) {
250+
encodeBasic(src);
251+
return;
252+
}
253+
203254
byte[] hexdig = HEX;
204255
byte[] scratch = aux;
205256
byte[] escapeTable = this.escapeTable;

test/json/json_generator_test.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,18 @@ def test_backslash
504504
json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
505505
assert_equal json, generate(data)
506506
#
507+
data = '"""""'
508+
json = '"\"\"\"\"\""'
509+
assert_equal json, generate(data)
510+
#
511+
data = "abc\n"
512+
json = '"abc\\n"'
513+
assert_equal json, generate(data)
514+
#
515+
data = "\nabc"
516+
json = '"\\nabc"'
517+
assert_equal json, generate(data)
518+
#
507519
data = ["'"]
508520
json = '["\\\'"]'
509521
assert_equal '["\'"]', generate(data)

0 commit comments

Comments
 (0)