ruby · samyron · Aug 12, 2025 · Aug 13, 2025 · Aug 14, 2025 · Aug 14, 2025
diff --git a/Rakefile b/Rakefile
@@ -116,7 +116,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
   file JRUBY_GENERATOR_JAR => :compile do
     cd 'java/src' do
       generator_classes = FileList[
-        "json/ext/ByteList*.class",
+        "json/ext/*ByteList*.class",
         "json/ext/OptionsReader*.class",
         "json/ext/Generator*.class",
         "json/ext/RuntimeInfo*.class",

diff --git a/java/src/json/ext/AbstractByteListDirectOutputStream.java b/java/src/json/ext/AbstractByteListDirectOutputStream.java
@@ -0,0 +1,31 @@
+package json.ext;
+
+import java.io.OutputStream;
+
+import org.jcodings.Encoding;
+import org.jruby.util.ByteList;
+
+abstract class AbstractByteListDirectOutputStream extends OutputStream {
+
+    private static final String PROP_SEGMENTED_BUFFER = "json.useSegmentedOutputStream";
+    private static final String PROP_SEGMENTED_BUFFER_DEFAULT = "true";
+
+    private static final boolean USE_SEGMENTED_BUFFER;
+
+    static {
+        String useSegmentedOutputStream = System.getProperty(PROP_SEGMENTED_BUFFER, PROP_SEGMENTED_BUFFER_DEFAULT);
+        USE_SEGMENTED_BUFFER = Boolean.parseBoolean(useSegmentedOutputStream);
+        // XXX Is there a logger we can use here?
+        // System.out.println("Using segmented output stream: " + USE_SEGMENTED_BUFFER);
+    }
+
+    public static AbstractByteListDirectOutputStream create(int estimatedSize) {
+        if (USE_SEGMENTED_BUFFER) {
+            return new SegmentedByteListDirectOutputStream(estimatedSize);
+        } else {
+            return new ByteListDirectOutputStream(estimatedSize);
+        }
+    }
+
+    public abstract ByteList toByteListDirect(Encoding encoding);
+}
diff --git a/java/src/json/ext/ByteListDirectOutputStream.java b/java/src/json/ext/ByteListDirectOutputStream.java
@@ -4,10 +4,9 @@
 import org.jruby.util.ByteList;
 
 import java.io.IOException;
-import java.io.OutputStream;
 import java.util.Arrays;
 
-public class ByteListDirectOutputStream extends OutputStream {
+public class ByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
     private byte[] buffer;
     private int length;
 

diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java
@@ -26,6 +26,8 @@
 import org.jruby.util.IOOutputStream;
 import org.jruby.util.TypeConverter;
 
+import json.ext.ByteListDirectOutputStream;
+
 import java.io.BufferedOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
@@ -252,7 +254,7 @@ int guessSize(ThreadContext context, Session session, T object) {
         }
 
         RubyString generateNew(ThreadContext context, Session session, T object) {
-            ByteListDirectOutputStream buffer = new ByteListDirectOutputStream(guessSize(context, session, object));
+            AbstractByteListDirectOutputStream buffer = AbstractByteListDirectOutputStream.create(guessSize(context, session, object));
             generateToBuffer(context, session, object, buffer);
             return RubyString.newString(context.runtime, buffer.toByteListDirect(UTF8Encoding.INSTANCE));
         }

diff --git a/java/src/json/ext/SegmentedByteListDirectOutputStream.java b/java/src/json/ext/SegmentedByteListDirectOutputStream.java
@@ -0,0 +1,82 @@
+package json.ext;
+
+import org.jcodings.Encoding;
+import org.jruby.util.ByteList;
+
+import java.io.IOException;
+
+public class SegmentedByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
+    private static final int DEFAULT_CAPACITY = 1024;
+
+    private int totalLength;
+    private byte[][] segments = new byte[21][];
+    private int currentSegmentIndex;
+    private int currentSegmentLength;
+    private byte[] currentSegment;
+
+    SegmentedByteListDirectOutputStream(int size) {
+        currentSegment = new byte[Math.max(size, DEFAULT_CAPACITY)];
+        segments[0] = currentSegment;
+    }
+
+    public ByteList toByteListDirect(Encoding encoding) {
+        byte[] buffer = new byte[totalLength];
+        int pos = 0;
+        // We handle the current segment separately.
+        for (int i = 0; i < currentSegmentIndex; i++) {
+            byte[] segment = segments[i];
+            System.arraycopy(segment, 0, buffer, pos, segment.length);
+            pos += segment.length;
+        }
+        System.arraycopy(currentSegment, 0, buffer, pos, currentSegmentLength);
+        return new ByteList(buffer, 0, totalLength, encoding, false);
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+        if (currentSegmentLength == currentSegment.length) {
+            if (totalLength + 1 < 0) {
+                throw new IOException("Total length exceeds maximum length of an array.");
+            }
+            currentSegmentIndex++;
+            int capacity = currentSegment.length * 2;
+            capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
+            currentSegment = new byte[capacity];
+            currentSegmentLength = 0;
+            segments[currentSegmentIndex] = currentSegment;
+        }
+        currentSegment[currentSegmentLength++] = (byte) b;
+        totalLength++;
+    }
+
+    @Override
+    public void write(byte[] bytes, int start, int length) throws IOException {
+        int remaining = length;
+
+        while (remaining > 0) {
+            if (currentSegmentLength == currentSegment.length) {
+                if (totalLength + remaining < 0) {
+                    throw new IOException("Total length exceeds maximum length of an array.");
+                }
+                currentSegmentIndex++;
+                int capacity = currentSegment.length << 1;
+                capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
+                capacity = (capacity < remaining) ? remaining : capacity;
+                currentSegment = new byte[capacity];
+                currentSegmentLength = 0;
+                segments[currentSegmentIndex] = currentSegment;
+            }
+            int toWrite = Math.min(remaining, currentSegment.length - currentSegmentLength);
+            System.arraycopy(bytes, start, currentSegment, currentSegmentLength, toWrite);
+            currentSegmentLength += toWrite;
+            start += toWrite;
+            remaining -= toWrite;
+        }
+        totalLength += length;
+    }
+
+    @Override
+    public void write(byte[] bytes) throws IOException {
+        write(bytes, 0, bytes.length);
+    }
+}
diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java
@@ -5,6 +5,11 @@
  */
 package json.ext;
 
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+
 import org.jcodings.Encoding;
 import org.jcodings.specific.ASCIIEncoding;
 import org.jcodings.specific.USASCIIEncoding;
@@ -17,10 +22,6 @@
 import org.jruby.util.ByteList;
 import org.jruby.util.StringSupport;
 
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
-
 /**
  * An encoder that reads from the given source and outputs its representation
  * to another ByteList. The source string is fully checked for UTF-8 validity,
@@ -114,6 +115,17 @@ class StringEncoder extends ByteListTranscoder {
 
     protected final byte[] escapeTable;
 
+    private static final String USE_SWAR_BASIC_ENCODER_PROP = "json.useSWARBasicEncoder";
+    private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true";
+    private static final boolean USE_BASIC_SWAR_ENCODER;
+
+    static {
+        USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean(
+            System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT));
+        // XXX Is there a logger we can use here?
+        // System.out.println("Using SWAR basic encoder: " + USE_BASIC_SWAR_ENCODER);
+    }
+
     OutputStream out;
 
     // Escaped characters will reuse this array, to avoid new allocations
@@ -198,8 +210,109 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st
         return str;
     }
 
+    void encodeBasicSWAR(ByteList src) throws IOException {
+        byte[] hexdig = HEX;
+        byte[] scratch = aux;
+
+        byte[] ptrBytes = src.unsafeBytes();
+        int ptr = src.begin();
+        int len = src.realSize();
+
+        int beg = 0;
+        int pos = 0;
+
+        ByteBuffer bb = ByteBuffer.wrap(ptrBytes, 0, len);
+        while (pos + 8 <= len) {
+            long x = bb.getLong(ptr + pos);
+            long is_ascii = 0x8080808080808080L & ~x;
+            long xor2 = x ^ 0x0202020202020202L;
+            long lt32_or_eq34 = xor2 - 0x2121212121212121L;
+            long sub92 = x ^ 0x5C5C5C5C5C5C5C5CL;
+            long eq92 = (sub92 - 0x0101010101010101L);
+            boolean needs_escape =  ((lt32_or_eq34 | eq92) & is_ascii) != 0;
+            if (needs_escape) {
+                // Find the exact byte that needs escaping
+                for (int i = 0; i < 8; i++) {
+                    int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos + i]);
+                    int ch_len = ESCAPE_TABLE[ch];
+                    if (ch_len > 0) {
+                        beg = pos = flushPos(pos + i, beg, ptrBytes, ptr, 1);
+                        escapeAscii(ch, scratch, hexdig);
+                        break;
+                    }
+                }
+                continue;
+            }
+
+            pos += 8;
+        }
+
+        // Handle remaining bytes one by one
+        while (pos < len) {
+            int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
+            int ch_len = ESCAPE_TABLE[ch];
+            if (ch_len > 0) {
+                beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
+                escapeAscii(ch, scratch, hexdig);
+            } else {
+                pos++;
+            }
+        }
+
+        while (pos < len) {
+            int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
+            int ch_len = ESCAPE_TABLE[ch];
+            if (ch_len > 0) {
+                beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
+                escapeAscii(ch, scratch, hexdig);
+            } else {
+                pos++;
+            }
+        }
+
+        if (beg < len) {
+            append(ptrBytes, ptr + beg, len - beg);
+        }
+    }
+
+    void encodeBasic(ByteList src) throws IOException{
+        byte[] hexdig = HEX;
+        byte[] scratch = aux;
+
+        byte[] ptrBytes = src.unsafeBytes();
+        int ptr = src.begin();
+        int len = src.realSize();
+
+        int beg = 0;
+        int pos = 0;
+
+        while (pos < len) {
+            int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
+            int ch_len = ESCAPE_TABLE[ch];
+            if (ch_len > 0) {
+                beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
+                escapeAscii(ch, scratch, hexdig);
+            } else {
+                pos++;
+            }
+        }
+
+        if (beg < len) {
+            append(ptrBytes, ptr + beg, len - beg);
+        }
+    }
+
     // C: convert_UTF8_to_JSON
     void encode(ByteList src) throws IOException {
+        if (escapeTable == ESCAPE_TABLE) {
+            if (USE_BASIC_SWAR_ENCODER) {
+                encodeBasicSWAR(src);
+            } else {
+                encodeBasic(src);
+            }
+            return;
+        }
+
         byte[] hexdig = HEX;
         byte[] scratch = aux;
         byte[] escapeTable = this.escapeTable;