Skip to content

Java Extension Optimizations #835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
file JRUBY_GENERATOR_JAR => :compile do
cd 'java/src' do
generator_classes = FileList[
"json/ext/ByteList*.class",
"json/ext/*ByteList*.class",
"json/ext/OptionsReader*.class",
"json/ext/Generator*.class",
"json/ext/RuntimeInfo*.class",
Expand Down
31 changes: 31 additions & 0 deletions java/src/json/ext/AbstractByteListDirectOutputStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package json.ext;

import java.io.OutputStream;

import org.jcodings.Encoding;
import org.jruby.util.ByteList;

abstract class AbstractByteListDirectOutputStream extends OutputStream {

private static final String PROP_SEGMENTED_BUFFER = "json.useSegmentedOutputStream";
private static final String PROP_SEGMENTED_BUFFER_DEFAULT = "true";

private static final boolean USE_SEGMENTED_BUFFER;

static {
String useSegmentedOutputStream = System.getProperty(PROP_SEGMENTED_BUFFER, PROP_SEGMENTED_BUFFER_DEFAULT);
USE_SEGMENTED_BUFFER = Boolean.parseBoolean(useSegmentedOutputStream);
// XXX Is there a logger we can use here?
// System.out.println("Using segmented output stream: " + USE_SEGMENTED_BUFFER);
}

public static AbstractByteListDirectOutputStream create(int estimatedSize) {
if (USE_SEGMENTED_BUFFER) {
return new SegmentedByteListDirectOutputStream(estimatedSize);
} else {
return new ByteListDirectOutputStream(estimatedSize);
}
}

public abstract ByteList toByteListDirect(Encoding encoding);
}
3 changes: 1 addition & 2 deletions java/src/json/ext/ByteListDirectOutputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
import org.jruby.util.ByteList;

import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;

public class ByteListDirectOutputStream extends OutputStream {
public class ByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
private byte[] buffer;
private int length;

Expand Down
4 changes: 3 additions & 1 deletion java/src/json/ext/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import org.jruby.util.IOOutputStream;
import org.jruby.util.TypeConverter;

import json.ext.ByteListDirectOutputStream;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
Expand Down Expand Up @@ -252,7 +254,7 @@ int guessSize(ThreadContext context, Session session, T object) {
}

RubyString generateNew(ThreadContext context, Session session, T object) {
ByteListDirectOutputStream buffer = new ByteListDirectOutputStream(guessSize(context, session, object));
AbstractByteListDirectOutputStream buffer = AbstractByteListDirectOutputStream.create(guessSize(context, session, object));
generateToBuffer(context, session, object, buffer);
return RubyString.newString(context.runtime, buffer.toByteListDirect(UTF8Encoding.INSTANCE));
}
Expand Down
82 changes: 82 additions & 0 deletions java/src/json/ext/SegmentedByteListDirectOutputStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package json.ext;

import org.jcodings.Encoding;
import org.jruby.util.ByteList;

import java.io.IOException;

public class SegmentedByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
private static final int DEFAULT_CAPACITY = 1024;

private int totalLength;
private byte[][] segments = new byte[21][];
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why 21? The minimum segment size is 1024 for the first segment. The code doubles the segment size for each additional segment. Based on this doubling, we only need 21 segments before we hit Integer.MAX_VALUE.

private int currentSegmentIndex;
private int currentSegmentLength;
private byte[] currentSegment;

SegmentedByteListDirectOutputStream(int size) {
currentSegment = new byte[Math.max(size, DEFAULT_CAPACITY)];
segments[0] = currentSegment;
}

public ByteList toByteListDirect(Encoding encoding) {
byte[] buffer = new byte[totalLength];
int pos = 0;
// We handle the current segment separately.
for (int i = 0; i < currentSegmentIndex; i++) {
byte[] segment = segments[i];
System.arraycopy(segment, 0, buffer, pos, segment.length);
pos += segment.length;
}
System.arraycopy(currentSegment, 0, buffer, pos, currentSegmentLength);
return new ByteList(buffer, 0, totalLength, encoding, false);
}

@Override
public void write(int b) throws IOException {
if (currentSegmentLength == currentSegment.length) {
if (totalLength + 1 < 0) {
throw new IOException("Total length exceeds maximum length of an array.");
}
currentSegmentIndex++;
int capacity = currentSegment.length * 2;
capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
currentSegment = new byte[capacity];
currentSegmentLength = 0;
segments[currentSegmentIndex] = currentSegment;
}
currentSegment[currentSegmentLength++] = (byte) b;
totalLength++;
}

@Override
public void write(byte[] bytes, int start, int length) throws IOException {
int remaining = length;

while (remaining > 0) {
if (currentSegmentLength == currentSegment.length) {
if (totalLength + remaining < 0) {
throw new IOException("Total length exceeds maximum length of an array.");
}
currentSegmentIndex++;
int capacity = currentSegment.length << 1;
capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
capacity = (capacity < remaining) ? remaining : capacity;
currentSegment = new byte[capacity];
currentSegmentLength = 0;
segments[currentSegmentIndex] = currentSegment;
}
int toWrite = Math.min(remaining, currentSegment.length - currentSegmentLength);
System.arraycopy(bytes, start, currentSegment, currentSegmentLength, toWrite);
currentSegmentLength += toWrite;
start += toWrite;
remaining -= toWrite;
}
totalLength += length;
}

@Override
public void write(byte[] bytes) throws IOException {
write(bytes, 0, bytes.length);
}
}
121 changes: 117 additions & 4 deletions java/src/json/ext/StringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
*/
package json.ext;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

import org.jcodings.Encoding;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
Expand All @@ -17,10 +22,6 @@
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;

/**
* An encoder that reads from the given source and outputs its representation
* to another ByteList. The source string is fully checked for UTF-8 validity,
Expand Down Expand Up @@ -114,6 +115,17 @@ class StringEncoder extends ByteListTranscoder {

protected final byte[] escapeTable;

private static final String USE_SWAR_BASIC_ENCODER_PROP = "json.useSWARBasicEncoder";
private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true";
private static final boolean USE_BASIC_SWAR_ENCODER;

static {
USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean(
System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT));
// XXX Is there a logger we can use here?
// System.out.println("Using SWAR basic encoder: " + USE_BASIC_SWAR_ENCODER);
}

OutputStream out;

// Escaped characters will reuse this array, to avoid new allocations
Expand Down Expand Up @@ -198,8 +210,109 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st
return str;
}

void encodeBasicSWAR(ByteList src) throws IOException {
byte[] hexdig = HEX;
byte[] scratch = aux;

byte[] ptrBytes = src.unsafeBytes();
int ptr = src.begin();
int len = src.realSize();

int beg = 0;
int pos = 0;

ByteBuffer bb = ByteBuffer.wrap(ptrBytes, 0, len);
while (pos + 8 <= len) {
long x = bb.getLong(ptr + pos);
long is_ascii = 0x8080808080808080L & ~x;
long xor2 = x ^ 0x0202020202020202L;
long lt32_or_eq34 = xor2 - 0x2121212121212121L;
long sub92 = x ^ 0x5C5C5C5C5C5C5C5CL;
long eq92 = (sub92 - 0x0101010101010101L);
boolean needs_escape = ((lt32_or_eq34 | eq92) & is_ascii) != 0;
if (needs_escape) {
// Find the exact byte that needs escaping
for (int i = 0; i < 8; i++) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos + i]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos + i, beg, ptrBytes, ptr, 1);
escapeAscii(ch, scratch, hexdig);
break;
}
}
continue;
}

pos += 8;
}

// Handle remaining bytes one by one
while (pos < len) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, scratch, hexdig);
} else {
pos++;
}
}

while (pos < len) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, scratch, hexdig);
} else {
pos++;
}
}

if (beg < len) {
append(ptrBytes, ptr + beg, len - beg);
}
}

void encodeBasic(ByteList src) throws IOException{
byte[] hexdig = HEX;
byte[] scratch = aux;

byte[] ptrBytes = src.unsafeBytes();
int ptr = src.begin();
int len = src.realSize();

int beg = 0;
int pos = 0;

while (pos < len) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, scratch, hexdig);
} else {
pos++;
}
}

if (beg < len) {
append(ptrBytes, ptr + beg, len - beg);
}
}

// C: convert_UTF8_to_JSON
void encode(ByteList src) throws IOException {
if (escapeTable == ESCAPE_TABLE) {
if (USE_BASIC_SWAR_ENCODER) {
encodeBasicSWAR(src);
} else {
encodeBasic(src);
}
return;
}

byte[] hexdig = HEX;
byte[] scratch = aux;
byte[] escapeTable = this.escapeTable;
Expand Down
Loading