Skip to content

Commit 2766f66

Browse files
committed
When dumping to IO, dump directly
Json.dump allows you to pass an IO to which the dump output will be sent, but it still buffers the entire output in memory before sending it to the given IO. This leads to issues on JRuby like jruby/jruby#6265 when it tries to create a byte[] that exceeds the maximum size of a signed int (JVM's array size limit). This commit plumbs the IO all the way through the generation logic so that it can be written to directly without filling a temporary memory buffer first. This allow JRuby to dump object graphs that would normally produce more content than the JVM can hold in a single array, providing a workaround for jruby/jruby#6265. It is unfortunately a bit slow to dump directly to IO due to the many small writes that all acquire locks and participate in the IO encoding subsystem. A more direct path that can skip some of these pieces could be more competitive with the in-memory version, but functionally it expands the size of graphs that cana be dumped when using JRuby. See #54
1 parent c233be9 commit 2766f66

File tree

8 files changed

+228
-128
lines changed

8 files changed

+228
-128
lines changed

java/src/json/ext/ByteListTranscoder.java

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
import org.jruby.runtime.ThreadContext;
1010
import org.jruby.util.ByteList;
1111

12+
import java.io.IOException;
13+
import java.io.OutputStream;
14+
1215
/**
1316
* A class specialized in transcoding a certain String format into another,
1417
* using UTF-8 ByteLists as both input and output.
@@ -23,7 +26,7 @@ abstract class ByteListTranscoder {
2326
/** Position of the next character to read */
2427
protected int pos;
2528

26-
private ByteList out;
29+
private OutputStream out;
2730
/**
2831
* When a character that can be copied straight into the output is found,
2932
* its index is stored on this variable, and copying is delayed until
@@ -37,11 +40,11 @@ protected ByteListTranscoder(ThreadContext context) {
3740
this.context = context;
3841
}
3942

40-
protected void init(ByteList src, ByteList out) {
43+
protected void init(ByteList src, OutputStream out) {
4144
this.init(src, 0, src.length(), out);
4245
}
4346

44-
protected void init(ByteList src, int start, int end, ByteList out) {
47+
protected void init(ByteList src, int start, int end, OutputStream out) {
4548
this.src = src;
4649
this.pos = start;
4750
this.charStart = start;
@@ -142,19 +145,19 @@ protected void quoteStart() {
142145
* recently read character, or {@link #charStart} to quote
143146
* until the character before it.
144147
*/
145-
protected void quoteStop(int endPos) {
148+
protected void quoteStop(int endPos) throws IOException {
146149
if (quoteStart != -1) {
147-
out.append(src, quoteStart, endPos - quoteStart);
150+
out.write(src.unsafeBytes(), src.begin() + quoteStart, src.begin() + endPos - quoteStart);
148151
quoteStart = -1;
149152
}
150153
}
151154

152-
protected void append(int b) {
153-
out.append(b);
155+
protected void append(int b) throws IOException {
156+
out.write(b);
154157
}
155158

156-
protected void append(byte[] origin, int start, int length) {
157-
out.append(origin, start, length);
159+
protected void append(byte[] origin, int start, int length) throws IOException {
160+
out.write(origin, start, length);
158161
}
159162

160163

java/src/json/ext/Generator.java

Lines changed: 100 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
*/
66
package json.ext;
77

8+
import org.jcodings.specific.USASCIIEncoding;
9+
import org.jcodings.specific.UTF8Encoding;
810
import org.jruby.Ruby;
911
import org.jruby.RubyArray;
1012
import org.jruby.RubyBasicObject;
@@ -13,10 +15,17 @@
1315
import org.jruby.RubyFixnum;
1416
import org.jruby.RubyFloat;
1517
import org.jruby.RubyHash;
18+
import org.jruby.RubyIO;
1619
import org.jruby.RubyString;
20+
import org.jruby.runtime.Helpers;
1721
import org.jruby.runtime.ThreadContext;
1822
import org.jruby.runtime.builtin.IRubyObject;
1923
import org.jruby.util.ByteList;
24+
import org.jruby.util.IOOutputStream;
25+
26+
import java.io.ByteArrayOutputStream;
27+
import java.io.IOException;
28+
import java.io.OutputStream;
2029

2130
public final class Generator {
2231
private Generator() {
@@ -55,6 +64,18 @@ private Generator() {
5564
return handler.generateNew(session, object);
5665
}
5766

67+
/**
68+
* Encodes the given object as a JSON string, as in other forms, but
69+
* outputs directly to the given stream
70+
*/
71+
public static <T extends IRubyObject> void
72+
generateJson(ThreadContext context, T object,
73+
GeneratorState config, OutputStream out) {
74+
Session session = new Session(context, config);
75+
Handler<? super T> handler = getHandlerFor(context.runtime, object);
76+
handler.generateNew(session, object, out);
77+
}
78+
5879
/**
5980
* Returns the best serialization handler for the given object.
6081
*/
@@ -159,6 +180,16 @@ public <T extends IRubyObject> T infect(T object) {
159180

160181
/* Handler base classes */
161182

183+
static class ByteListOutputStream extends ByteArrayOutputStream {
184+
public ByteListOutputStream(int size) {
185+
super(size);
186+
}
187+
188+
public ByteList toByteListDirect() {
189+
return new ByteList(buf, 0, count);
190+
}
191+
}
192+
162193
private static abstract class Handler<T extends IRubyObject> {
163194
/**
164195
* Returns an estimative of how much space the serialization of the
@@ -171,16 +202,33 @@ int guessSize(Session session, T object) {
171202

172203
RubyString generateNew(Session session, T object) {
173204
RubyString result;
174-
ByteList buffer = new ByteList(guessSize(session, object));
175-
generate(session, object, buffer);
176-
result = RubyString.newString(session.getRuntime(), buffer);
205+
ByteListOutputStream blos = new ByteListOutputStream(guessSize(session, object));
206+
generateNew(session, object, blos);
207+
result = RubyString.newString(session.getRuntime(), blos.toByteListDirect());
177208
ThreadContext context = session.getContext();
178209
RuntimeInfo info = session.getInfo();
179210
result.force_encoding(context, info.utf8.get());
180211
return result;
181212
}
182213

183-
abstract void generate(Session session, T object, ByteList buffer);
214+
void generateNew(Session session, T object, RubyIO buffer) {
215+
buffer.setEnc2(UTF8Encoding.INSTANCE);
216+
generateNew(session, object, buffer);
217+
}
218+
219+
void generateNew(Session session, T object, OutputStream buffer) {
220+
try {
221+
generate(session, object, buffer);
222+
} catch (IOException ioe) {
223+
throw Helpers.newIOErrorFromException(session.getRuntime(), ioe);
224+
}
225+
}
226+
227+
abstract void generate(Session session, T object, OutputStream os) throws IOException;
228+
229+
protected void writeByteList(OutputStream os, ByteList byteList) throws IOException {
230+
os.write(byteList.unsafeBytes(), byteList.begin(), byteList.realSize());
231+
}
184232
}
185233

186234
/**
@@ -205,8 +253,8 @@ RubyString generateNew(Session session, T object) {
205253
}
206254

207255
@Override
208-
void generate(Session session, T object, ByteList buffer) {
209-
buffer.append(keyword);
256+
void generate(Session session, T object, OutputStream buffer) throws IOException {
257+
writeByteList(buffer, keyword);
210258
}
211259
}
212260

@@ -216,26 +264,27 @@ void generate(Session session, T object, ByteList buffer) {
216264
static final Handler<RubyBignum> BIGNUM_HANDLER =
217265
new Handler<RubyBignum>() {
218266
@Override
219-
void generate(Session session, RubyBignum object, ByteList buffer) {
267+
void generate(Session session, RubyBignum object, OutputStream buffer) throws IOException {
220268
// JRUBY-4751: RubyBignum.to_s() returns generic object
221269
// representation (fixed in 1.5, but we maintain backwards
222270
// compatibility; call to_s(IRubyObject[]) then
223-
buffer.append(((RubyString)object.to_s(IRubyObject.NULL_ARRAY)).getByteList());
271+
byte[] bigIntStr = object.getBigIntegerValue().toString().getBytes();
272+
buffer.write(bigIntStr, 0, bigIntStr.length);
224273
}
225274
};
226275

227276
static final Handler<RubyFixnum> FIXNUM_HANDLER =
228277
new Handler<RubyFixnum>() {
229278
@Override
230-
void generate(Session session, RubyFixnum object, ByteList buffer) {
231-
buffer.append(object.to_s().getByteList());
279+
void generate(Session session, RubyFixnum object, OutputStream buffer) throws IOException {
280+
writeByteList(buffer, object.to_s().getByteList());
232281
}
233282
};
234283

235284
static final Handler<RubyFloat> FLOAT_HANDLER =
236285
new Handler<RubyFloat>() {
237286
@Override
238-
void generate(Session session, RubyFloat object, ByteList buffer) {
287+
void generate(Session session, RubyFloat object, OutputStream buffer) throws IOException {
239288
double value = RubyFloat.num2dbl(object);
240289

241290
if (Double.isInfinite(value) || Double.isNaN(value)) {
@@ -245,7 +294,7 @@ void generate(Session session, RubyFloat object, ByteList buffer) {
245294
object + " not allowed in JSON");
246295
}
247296
}
248-
buffer.append(((RubyString)object.to_s()).getByteList());
297+
writeByteList(buffer, ((RubyString)object.to_s()).getByteList());
249298
}
250299
};
251300

@@ -263,7 +312,7 @@ int guessSize(Session session, RubyArray object) {
263312
}
264313

265314
@Override
266-
void generate(Session session, RubyArray object, ByteList buffer) {
315+
void generate(Session session, RubyArray object, OutputStream buffer) throws IOException {
267316
ThreadContext context = session.getContext();
268317
Ruby runtime = context.getRuntime();
269318
GeneratorState state = session.getState();
@@ -280,29 +329,29 @@ void generate(Session session, RubyArray object, ByteList buffer) {
280329

281330
session.infectBy(object);
282331

283-
buffer.append((byte)'[');
284-
buffer.append(arrayNl);
332+
buffer.write((byte)'[');
333+
buffer.write(arrayNl.unsafeBytes());
285334
boolean firstItem = true;
286335
for (int i = 0, t = object.getLength(); i < t; i++) {
287336
IRubyObject element = object.eltInternal(i);
288337
session.infectBy(element);
289338
if (firstItem) {
290339
firstItem = false;
291340
} else {
292-
buffer.append(delim);
341+
buffer.write(delim);
293342
}
294-
buffer.append(shift);
343+
buffer.write(shift);
295344
Handler<IRubyObject> handler = (Handler<IRubyObject>) getHandlerFor(runtime, element);
296345
handler.generate(session, element, buffer);
297346
}
298347

299348
state.decreaseDepth();
300349
if (arrayNl.length() != 0) {
301-
buffer.append(arrayNl);
302-
buffer.append(shift, 0, state.getDepth() * indentUnit.length());
350+
buffer.write(arrayNl.unsafeBytes());
351+
buffer.write(shift, 0, state.getDepth() * indentUnit.length());
303352
}
304353

305-
buffer.append((byte)']');
354+
buffer.write((byte)']');
306355
}
307356
};
308357

@@ -321,7 +370,7 @@ int guessSize(Session session, RubyHash object) {
321370

322371
@Override
323372
void generate(final Session session, RubyHash object,
324-
final ByteList buffer) {
373+
final OutputStream buffer) throws IOException {
325374
ThreadContext context = session.getContext();
326375
final Ruby runtime = context.getRuntime();
327376
final GeneratorState state = session.getState();
@@ -332,39 +381,43 @@ void generate(final Session session, RubyHash object,
332381
final ByteList spaceBefore = state.getSpaceBefore();
333382
final ByteList space = state.getSpace();
334383

335-
buffer.append((byte)'{');
336-
buffer.append(objectNl);
384+
buffer.write((byte)'{');
385+
buffer.write(objectNl.unsafeBytes());
337386

338387
final boolean[] firstPair = new boolean[]{true};
339388
object.visitAll(new RubyHash.Visitor() {
340389
@Override
341390
public void visit(IRubyObject key, IRubyObject value) {
342-
if (firstPair[0]) {
343-
firstPair[0] = false;
344-
} else {
345-
buffer.append((byte)',');
346-
buffer.append(objectNl);
391+
try {
392+
if (firstPair[0]) {
393+
firstPair[0] = false;
394+
} else {
395+
buffer.write((byte) ',');
396+
buffer.write(objectNl.unsafeBytes());
397+
}
398+
if (objectNl.length() != 0) buffer.write(indent);
399+
400+
STRING_HANDLER.generate(session, key.asString(), buffer);
401+
session.infectBy(key);
402+
403+
buffer.write(spaceBefore.unsafeBytes());
404+
buffer.write((byte) ':');
405+
buffer.write(space.unsafeBytes());
406+
407+
Handler<IRubyObject> valueHandler = (Handler<IRubyObject>) getHandlerFor(runtime, value);
408+
valueHandler.generate(session, value, buffer);
409+
session.infectBy(value);
410+
} catch (IOException ioe) {
411+
throw Helpers.newIOErrorFromException(session.getRuntime(), ioe);
347412
}
348-
if (objectNl.length() != 0) buffer.append(indent);
349-
350-
STRING_HANDLER.generate(session, key.asString(), buffer);
351-
session.infectBy(key);
352-
353-
buffer.append(spaceBefore);
354-
buffer.append((byte)':');
355-
buffer.append(space);
356-
357-
Handler<IRubyObject> valueHandler = (Handler<IRubyObject>) getHandlerFor(runtime, value);
358-
valueHandler.generate(session, value, buffer);
359-
session.infectBy(value);
360413
}
361414
});
362415
state.decreaseDepth();
363416
if (!firstPair[0] && objectNl.length() != 0) {
364-
buffer.append(objectNl);
417+
buffer.write(objectNl.unsafeBytes());
365418
}
366-
buffer.append(Utils.repeat(state.getIndent(), state.getDepth()));
367-
buffer.append((byte)'}');
419+
buffer.write(Utils.repeat(state.getIndent(), state.getDepth()));
420+
buffer.write((byte)'}');
368421
}
369422
};
370423

@@ -379,7 +432,7 @@ int guessSize(Session session, RubyString object) {
379432
}
380433

381434
@Override
382-
void generate(Session session, RubyString object, ByteList buffer) {
435+
void generate(Session session, RubyString object, OutputStream buffer) throws IOException {
383436
RuntimeInfo info = session.getInfo();
384437
RubyString src;
385438

@@ -414,7 +467,7 @@ RubyString generateNew(Session session, IRubyObject object) {
414467
}
415468

416469
@Override
417-
void generate(Session session, IRubyObject object, ByteList buffer) {
470+
void generate(Session session, IRubyObject object, OutputStream buffer) throws IOException {
418471
RubyString str = object.asString();
419472
STRING_HANDLER.generate(session, str, buffer);
420473
}
@@ -439,9 +492,8 @@ RubyString generateNew(Session session, IRubyObject object) {
439492
}
440493

441494
@Override
442-
void generate(Session session, IRubyObject object, ByteList buffer) {
443-
RubyString result = generateNew(session, object);
444-
buffer.append(result.getByteList());
495+
void generate(Session session, IRubyObject object, OutputStream buffer) throws IOException {
496+
generateNew(session, object, buffer);
445497
}
446498
};
447499
}

0 commit comments

Comments
 (0)