diff --git a/Rakefile b/Rakefile index 52e178ed..616e7e98 100644 --- a/Rakefile +++ b/Rakefile @@ -116,7 +116,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' file JRUBY_GENERATOR_JAR => :compile do cd 'java/src' do generator_classes = FileList[ - "json/ext/ByteList*.class", + "json/ext/*ByteList*.class", "json/ext/OptionsReader*.class", "json/ext/Generator*.class", "json/ext/RuntimeInfo*.class", diff --git a/java/src/json/ext/AbstractByteListDirectOutputStream.java b/java/src/json/ext/AbstractByteListDirectOutputStream.java new file mode 100644 index 00000000..64ca29d3 --- /dev/null +++ b/java/src/json/ext/AbstractByteListDirectOutputStream.java @@ -0,0 +1,31 @@ +package json.ext; + +import java.io.OutputStream; + +import org.jcodings.Encoding; +import org.jruby.util.ByteList; + +abstract class AbstractByteListDirectOutputStream extends OutputStream { + + private static final String PROP_SEGMENTED_BUFFER = "json.useSegmentedOutputStream"; + private static final String PROP_SEGMENTED_BUFFER_DEFAULT = "true"; + + private static final boolean USE_SEGMENTED_BUFFER; + + static { + String useSegmentedOutputStream = System.getProperty(PROP_SEGMENTED_BUFFER, PROP_SEGMENTED_BUFFER_DEFAULT); + USE_SEGMENTED_BUFFER = Boolean.parseBoolean(useSegmentedOutputStream); + // XXX Is there a logger we can use here? + // System.out.println("Using segmented output stream: " + USE_SEGMENTED_BUFFER); + } + + public static AbstractByteListDirectOutputStream create(int estimatedSize) { + if (USE_SEGMENTED_BUFFER) { + return new SegmentedByteListDirectOutputStream(estimatedSize); + } else { + return new ByteListDirectOutputStream(estimatedSize); + } + } + + public abstract ByteList toByteListDirect(Encoding encoding); +} diff --git a/java/src/json/ext/ByteListDirectOutputStream.java b/java/src/json/ext/ByteListDirectOutputStream.java index b22d4812..a92753ca 100644 --- a/java/src/json/ext/ByteListDirectOutputStream.java +++ b/java/src/json/ext/ByteListDirectOutputStream.java @@ -4,10 +4,9 @@ import org.jruby.util.ByteList; import java.io.IOException; -import java.io.OutputStream; import java.util.Arrays; -public class ByteListDirectOutputStream extends OutputStream { +public class ByteListDirectOutputStream extends AbstractByteListDirectOutputStream { private byte[] buffer; private int length; diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 85250920..5ed2f057 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -26,6 +26,8 @@ import org.jruby.util.IOOutputStream; import org.jruby.util.TypeConverter; +import json.ext.ByteListDirectOutputStream; + import java.io.BufferedOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -252,7 +254,7 @@ int guessSize(ThreadContext context, Session session, T object) { } RubyString generateNew(ThreadContext context, Session session, T object) { - ByteListDirectOutputStream buffer = new ByteListDirectOutputStream(guessSize(context, session, object)); + AbstractByteListDirectOutputStream buffer = AbstractByteListDirectOutputStream.create(guessSize(context, session, object)); generateToBuffer(context, session, object, buffer); return RubyString.newString(context.runtime, buffer.toByteListDirect(UTF8Encoding.INSTANCE)); } diff --git a/java/src/json/ext/SegmentedByteListDirectOutputStream.java b/java/src/json/ext/SegmentedByteListDirectOutputStream.java new file mode 100644 index 00000000..abad1661 --- /dev/null +++ b/java/src/json/ext/SegmentedByteListDirectOutputStream.java @@ -0,0 +1,82 @@ +package json.ext; + +import org.jcodings.Encoding; +import org.jruby.util.ByteList; + +import java.io.IOException; + +public class SegmentedByteListDirectOutputStream extends AbstractByteListDirectOutputStream { + private static final int DEFAULT_CAPACITY = 1024; + + private int totalLength; + private byte[][] segments = new byte[21][]; + private int currentSegmentIndex; + private int currentSegmentLength; + private byte[] currentSegment; + + SegmentedByteListDirectOutputStream(int size) { + currentSegment = new byte[Math.max(size, DEFAULT_CAPACITY)]; + segments[0] = currentSegment; + } + + public ByteList toByteListDirect(Encoding encoding) { + byte[] buffer = new byte[totalLength]; + int pos = 0; + // We handle the current segment separately. + for (int i = 0; i < currentSegmentIndex; i++) { + byte[] segment = segments[i]; + System.arraycopy(segment, 0, buffer, pos, segment.length); + pos += segment.length; + } + System.arraycopy(currentSegment, 0, buffer, pos, currentSegmentLength); + return new ByteList(buffer, 0, totalLength, encoding, false); + } + + @Override + public void write(int b) throws IOException { + if (currentSegmentLength == currentSegment.length) { + if (totalLength + 1 < 0) { + throw new IOException("Total length exceeds maximum length of an array."); + } + currentSegmentIndex++; + int capacity = currentSegment.length * 2; + capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity; + currentSegment = new byte[capacity]; + currentSegmentLength = 0; + segments[currentSegmentIndex] = currentSegment; + } + currentSegment[currentSegmentLength++] = (byte) b; + totalLength++; + } + + @Override + public void write(byte[] bytes, int start, int length) throws IOException { + int remaining = length; + + while (remaining > 0) { + if (currentSegmentLength == currentSegment.length) { + if (totalLength + remaining < 0) { + throw new IOException("Total length exceeds maximum length of an array."); + } + currentSegmentIndex++; + int capacity = currentSegment.length << 1; + capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity; + capacity = (capacity < remaining) ? remaining : capacity; + currentSegment = new byte[capacity]; + currentSegmentLength = 0; + segments[currentSegmentIndex] = currentSegment; + } + int toWrite = Math.min(remaining, currentSegment.length - currentSegmentLength); + System.arraycopy(bytes, start, currentSegment, currentSegmentLength, toWrite); + currentSegmentLength += toWrite; + start += toWrite; + remaining -= toWrite; + } + totalLength += length; + } + + @Override + public void write(byte[] bytes) throws IOException { + write(bytes, 0, bytes.length); + } +} diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index d178d0bd..6783be9d 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -5,6 +5,11 @@ */ package json.ext; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; + import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; import org.jcodings.specific.USASCIIEncoding; @@ -17,10 +22,6 @@ import org.jruby.util.ByteList; import org.jruby.util.StringSupport; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.StandardCharsets; - /** * An encoder that reads from the given source and outputs its representation * to another ByteList. The source string is fully checked for UTF-8 validity, @@ -114,6 +115,17 @@ class StringEncoder extends ByteListTranscoder { protected final byte[] escapeTable; + private static final String USE_SWAR_BASIC_ENCODER_PROP = "json.useSWARBasicEncoder"; + private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true"; + private static final boolean USE_BASIC_SWAR_ENCODER; + + static { + USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean( + System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT)); + // XXX Is there a logger we can use here? + // System.out.println("Using SWAR basic encoder: " + USE_BASIC_SWAR_ENCODER); + } + OutputStream out; // Escaped characters will reuse this array, to avoid new allocations @@ -198,8 +210,109 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st return str; } + void encodeBasicSWAR(ByteList src) throws IOException { + byte[] hexdig = HEX; + byte[] scratch = aux; + + byte[] ptrBytes = src.unsafeBytes(); + int ptr = src.begin(); + int len = src.realSize(); + + int beg = 0; + int pos = 0; + + ByteBuffer bb = ByteBuffer.wrap(ptrBytes, 0, len); + while (pos + 8 <= len) { + long x = bb.getLong(ptr + pos); + long is_ascii = 0x8080808080808080L & ~x; + long xor2 = x ^ 0x0202020202020202L; + long lt32_or_eq34 = xor2 - 0x2121212121212121L; + long sub92 = x ^ 0x5C5C5C5C5C5C5C5CL; + long eq92 = (sub92 - 0x0101010101010101L); + boolean needs_escape = ((lt32_or_eq34 | eq92) & is_ascii) != 0; + if (needs_escape) { + // Find the exact byte that needs escaping + for (int i = 0; i < 8; i++) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos + i]); + int ch_len = ESCAPE_TABLE[ch]; + if (ch_len > 0) { + beg = pos = flushPos(pos + i, beg, ptrBytes, ptr, 1); + escapeAscii(ch, scratch, hexdig); + break; + } + } + continue; + } + + pos += 8; + } + + // Handle remaining bytes one by one + while (pos < len) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = ESCAPE_TABLE[ch]; + if (ch_len > 0) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, scratch, hexdig); + } else { + pos++; + } + } + + while (pos < len) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = ESCAPE_TABLE[ch]; + if (ch_len > 0) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, scratch, hexdig); + } else { + pos++; + } + } + + if (beg < len) { + append(ptrBytes, ptr + beg, len - beg); + } + } + + void encodeBasic(ByteList src) throws IOException{ + byte[] hexdig = HEX; + byte[] scratch = aux; + + byte[] ptrBytes = src.unsafeBytes(); + int ptr = src.begin(); + int len = src.realSize(); + + int beg = 0; + int pos = 0; + + while (pos < len) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = ESCAPE_TABLE[ch]; + if (ch_len > 0) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, scratch, hexdig); + } else { + pos++; + } + } + + if (beg < len) { + append(ptrBytes, ptr + beg, len - beg); + } + } + // C: convert_UTF8_to_JSON void encode(ByteList src) throws IOException { + if (escapeTable == ESCAPE_TABLE) { + if (USE_BASIC_SWAR_ENCODER) { + encodeBasicSWAR(src); + } else { + encodeBasic(src); + } + return; + } + byte[] hexdig = HEX; byte[] scratch = aux; byte[] escapeTable = this.escapeTable;