7373import org .apache .commons .io .output .NullOutputStream ;
7474import org .apache .commons .io .output .NullWriter ;
7575import org .apache .commons .io .output .StringBuilderWriter ;
76- import org .apache .commons .io .output .ThresholdingOutputStream ;
7776import org .apache .commons .io .output .UnsynchronizedByteArrayOutputStream ;
7877
7978/**
@@ -222,6 +221,21 @@ public class IOUtils {
222221 */
223222 private static final char [] SCRATCH_CHAR_BUFFER_WO = charArray ();
224223
224+ /**
225+ * The maximum size of an array in many Java VMs.
226+ */
227+ private static final int MAX_ARRAY_LENGTH = Integer .MAX_VALUE - 8 ;
228+
229+ /*
230+ * Default maximum chunk size used when copying large streams into a byte array.
231+ * <p>
232+ * This value is somewhat arbitrary, currently aligned with the value used by
233+ * <a href="https://github.com/python/cpython/blob/3.14/Lib/_pyio.py">Python</a>
234+ * for copying streams.
235+ * </p>
236+ */
237+ private static final int DEFAULT_CHUNK_SIZE = 128 * 1024 ;
238+
225239 /**
226240 * Returns the given InputStream if it is already a {@link BufferedInputStream}, otherwise creates a
227241 * BufferedInputStream from the given InputStream.
@@ -2640,26 +2654,34 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
26402654 /**
26412655 * Reads all the bytes from an input stream in a byte array.
26422656 *
2643- * @param inputStream the {@link InputStream} to read; must not be {@code null}.
2644- * @return a new byte array.
2645- * @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}.
2646- * @throws IOException if an I/O error occurs while reading.
2647- * @throws NullPointerException if {@code inputStream} is {@code null}.
2657+ * <p>The memory used by this method is <strong>proportional</strong> to the number
2658+ * of bytes read, which is only limited by {@link Integer#MAX_VALUE}. This makes it unsuitable for
2659+ * processing large input streams, unless sufficient heap space is available.</p>
2660+ *
2661+ * @param inputStream The {@link InputStream} to read; must not be {@code null}.
2662+ * @return A new byte array.
2663+ * @throws IllegalArgumentException If the size of the stream is greater than the maximum array size.
2664+ * @throws IOException If an I/O error occurs while reading.
2665+ * @throws NullPointerException If {@code inputStream} is {@code null}.
26482666 */
26492667 public static byte [] toByteArray (final InputStream inputStream ) throws IOException {
2650- // We use a ThresholdingOutputStream to avoid reading AND writing more than Integer.MAX_VALUE.
2651- try (UnsynchronizedByteArrayOutputStream ubaOutput = UnsynchronizedByteArrayOutputStream .builder ().get ();
2652- ThresholdingOutputStream thresholdOutput = new ThresholdingOutputStream (Integer .MAX_VALUE , os -> {
2653- throw new IllegalArgumentException (String .format ("Cannot read more than %,d into a byte array" , Integer .MAX_VALUE ));
2654- }, os -> ubaOutput )) {
2655- copy (inputStream , thresholdOutput );
2656- return ubaOutput .toByteArray ();
2668+ final UnsynchronizedByteArrayOutputStream output =
2669+ copyToOutputStream (inputStream , MAX_ARRAY_LENGTH + 1 , DEFAULT_CHUNK_SIZE );
2670+ if (output .size () > MAX_ARRAY_LENGTH ) {
2671+ throw new IllegalArgumentException (
2672+ String .format ("Cannot read more than %,d into a byte array" , MAX_ARRAY_LENGTH ));
26572673 }
2674+ return output .toByteArray ();
26582675 }
26592676
26602677 /**
26612678 * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
26622679 *
2680+ * <p>The memory used by this method is <strong>proportional</strong> to the number
2681+ * of bytes read and limited by the specified {@code size}. This makes it suitable for
2682+ * processing large input streams, provided that <strong>sufficient</strong> heap space is
2683+ * available.</p>
2684+ *
26632685 * @param input the {@link InputStream} to read; must not be {@code null}.
26642686 * @param size the exact number of bytes to read; must be {@code >= 0}.
26652687 * @return a new byte array of length {@code size}.
@@ -2670,12 +2692,17 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
26702692 * @since 2.1
26712693 */
26722694 public static byte [] toByteArray (final InputStream input , final int size ) throws IOException {
2673- return toByteArray (Objects . requireNonNull ( input , "input" ):: read , size );
2695+ return toByteArray (input , size , DEFAULT_CHUNK_SIZE );
26742696 }
26752697
26762698 /**
26772699 * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
26782700 *
2701+ * <p>The memory used by this method is <strong>proportional</strong> to the number
2702+ * of bytes read and limited by the specified {@code size}. This makes it suitable for
2703+ * processing large input streams, provided that <strong>sufficient</strong> heap space is
2704+ * available.</p>
2705+ *
26792706 * @param input the {@link InputStream} to read; must not be {@code null}.
26802707 * @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}.
26812708 * @return a new byte array of length {@code size}.
@@ -2696,46 +2723,63 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
26962723 /**
26972724 * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
26982725 *
2699- * <p>When reading from an untrusted stream, this variant lowers the risk of
2700- * {@link OutOfMemoryError} by allocating data in buffers of up to {@code bufferSize}
2701- * bytes rather than in one large array.</p>
2726+ * <p>The memory used by this method is <strong>proportional</strong> to the number
2727+ * of bytes read and limited by the specified {@code size}. This makes it suitable for
2728+ * processing large input streams, provided that <strong>sufficient</strong> heap space is
2729+ * available.</p>
27022730 *
2703- * <p>Note, however, that this approach requires additional temporary memory
2704- * compared to {@link #toByteArray(InputStream, int)} .</p>
2731+ * <p>This method processes the input stream in successive chunks of up to
2732+ * {@code chunkSize} bytes .</p>
27052733 *
27062734 * @param input the {@link InputStream} to read; must not be {@code null}.
27072735 * @param size the exact number of bytes to read; must be {@code >= 0}.
27082736 * The actual bytes read are validated to equal {@code size}.
2709- * @param bufferSize the buffer size for incremental reading; must be {@code > 0}.
2737+ * @param chunkSize The chunk size for incremental reading; must be {@code > 0}.
27102738 * @return a new byte array of length {@code size}.
2711- * @throws IllegalArgumentException if {@code size} is negative or {@code bufferSize <= 0}.
2739+ * @throws IllegalArgumentException if {@code size} is negative or {@code chunkSize <= 0}.
27122740 * @throws EOFException if the stream ends before {@code size} bytes are read.
27132741 * @throws IOException if an I/O error occurs while reading.
27142742 * @throws NullPointerException if {@code input} is {@code null}.
27152743 * @since 2.21.0
27162744 */
2717- public static byte [] toByteArray (final InputStream input , final int size , final int bufferSize ) throws IOException {
2745+ public static byte [] toByteArray (final InputStream input , final int size , final int chunkSize ) throws IOException {
27182746 Objects .requireNonNull (input , "input" );
2719- if (bufferSize <= 0 ) {
2720- throw new IllegalArgumentException ("Buffer size must be greater than zero: " + bufferSize );
2747+ if (chunkSize <= 0 ) {
2748+ throw new IllegalArgumentException ("Chunk size must be greater than zero: " + chunkSize );
27212749 }
2722- if (size <= bufferSize ) {
2750+ if (size <= chunkSize ) {
27232751 // throws if size < 0
27242752 return toByteArray (input ::read , size );
27252753 }
2754+ final UnsynchronizedByteArrayOutputStream output = copyToOutputStream (input , size , chunkSize );
2755+ if (output .size () != size ) {
2756+ throw new EOFException ("Unexpected read size, current: " + output .size () + ", expected: " + size );
2757+ }
2758+ return output .toByteArray ();
2759+ }
2760+
2761+ /**
2762+ * Copies up to {@code size} bytes from the given {@link InputStream} into a new {@link UnsynchronizedByteArrayOutputStream}.
2763+ *
2764+ *
2765+ * @param input The {@link InputStream} to read; must not be {@code null}.
2766+ * @param limit The maximum number of bytes to read; must be {@code >= 0}.
2767+ * The actual bytes read are validated to equal {@code size}.
2768+ * @param bufferSize The buffer size of the output stream; must be {@code > 0}.
2769+ * @return a ByteArrayOutputStream containing the read bytes.
2770+ */
2771+ private static UnsynchronizedByteArrayOutputStream copyToOutputStream (
2772+ final InputStream input , final long limit , final int bufferSize ) throws IOException {
27262773 try (UnsynchronizedByteArrayOutputStream output = UnsynchronizedByteArrayOutputStream .builder ()
27272774 .setBufferSize (bufferSize )
27282775 .get ();
27292776 InputStream boundedInput = BoundedInputStream .builder ()
2730- .setMaxCount (size )
2777+ .setMaxCount (limit )
27312778 .setPropagateClose (false )
27322779 .setInputStream (input )
27332780 .get ()) {
27342781 output .write (boundedInput );
2735- if (output .size () != size ) {
2736- throw new EOFException ("Unexpected read size, current: " + output .size () + ", expected: " + size );
2737- }
2738- return output .toByteArray ();
2782+ return output ;
27392783 }
27402784 }
27412785
@@ -2756,13 +2800,9 @@ static byte[] toByteArray(final IOTriFunction<byte[], Integer, Integer, Integer>
27562800 return EMPTY_BYTE_ARRAY ;
27572801 }
27582802 final byte [] data = byteArray (size );
2759- int offset = 0 ;
2760- int read ;
2761- while (offset < size && (read = input .apply (data , offset , size - offset )) != EOF ) {
2762- offset += read ;
2763- }
2764- if (offset != size ) {
2765- throw new IOException ("Unexpected read size, current: " + offset + ", expected: " + size );
2803+ final int read = read (input , data , 0 , size );
2804+ if (read != size ) {
2805+ throw new IOException ("Unexpected read size, current: " + read + ", expected: " + size );
27662806 }
27672807 return data ;
27682808 }
0 commit comments