6565import org .apache .commons .io .function .IOConsumer ;
6666import org .apache .commons .io .function .IOSupplier ;
6767import org .apache .commons .io .function .IOTriFunction ;
68+ import org .apache .commons .io .input .BoundedInputStream ;
6869import org .apache .commons .io .input .CharSequenceReader ;
6970import org .apache .commons .io .input .QueueInputStream ;
7071import org .apache .commons .io .output .AppendableWriter ;
7172import org .apache .commons .io .output .ByteArrayOutputStream ;
7273import org .apache .commons .io .output .NullOutputStream ;
7374import org .apache .commons .io .output .NullWriter ;
7475import org .apache .commons .io .output .StringBuilderWriter ;
75- import org .apache .commons .io .output .ThresholdingOutputStream ;
7676import org .apache .commons .io .output .UnsynchronizedByteArrayOutputStream ;
7777
7878/**
@@ -221,6 +221,14 @@ public class IOUtils {
221221 */
222222 private static final char [] SCRATCH_CHAR_BUFFER_WO = charArray ();
223223
224+ /**
225+ * The maximum size of an array in many Java VMs.
226+ * <p>
227+ * The constant is copied from OpenJDK's {@link jdk.internal.util.ArraysSupport#SOFT_MAX_ARRAY_LENGTH}.
228+ * </p>
229+ */
230+ private static final int SOFT_MAX_ARRAY_LENGTH = Integer .MAX_VALUE - 8 ;
231+
224232 /**
225233 * Returns the given InputStream if it is already a {@link BufferedInputStream}, otherwise creates a
226234 * BufferedInputStream from the given InputStream.
@@ -2637,57 +2645,63 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
26372645 }
26382646
26392647 /**
2640- * Gets the contents of an {@link InputStream} as a {@code byte[]}.
2641- * <p>
2642- * This method buffers the input internally, so there is no need to use a {@link BufferedInputStream}.
2643- * </p>
2648+ * Reads all the bytes from an input stream in a byte array.
26442649 *
2645- * @param inputStream the {@link InputStream} to read.
2646- * @return the requested byte array.
2647- * @throws NullPointerException if the InputStream is {@code null}.
2648- * @throws IOException if an I/O error occurs or reading more than {@link Integer#MAX_VALUE} occurs.
2650+ * <p>The memory used by this method is <strong>proportional</strong> to the number
2651+ * of bytes read, which is only limited by {@link Integer#MAX_VALUE}. Only streams
2652+ * which fit into a single byte array with roughly 2 GiB limit can be processed
2653+ * with this method.</p>
2654+ *
2655+ * @param inputStream The {@link InputStream} to read; must not be {@code null}.
2656+ * @return A new byte array.
2657+ * @throws IllegalArgumentException If the size of the stream is greater than the maximum array size.
2658+ * @throws IOException If an I/O error occurs while reading.
2659+ * @throws NullPointerException If {@code inputStream} is {@code null}.
26492660 */
26502661 public static byte [] toByteArray (final InputStream inputStream ) throws IOException {
2651- // We use a ThresholdingOutputStream to avoid reading AND writing more than Integer.MAX_VALUE.
2652- try (UnsynchronizedByteArrayOutputStream ubaOutput = UnsynchronizedByteArrayOutputStream .builder ().get ();
2653- ThresholdingOutputStream thresholdOutput = new ThresholdingOutputStream (Integer .MAX_VALUE , os -> {
2654- throw new IllegalArgumentException (String .format ("Cannot read more than %,d into a byte array" , Integer .MAX_VALUE ));
2655- }, os -> ubaOutput )) {
2656- copy (inputStream , thresholdOutput );
2657- return ubaOutput .toByteArray ();
2662+ // Using SOFT_MAX_ARRAY_LENGTH guarantees that size() will not overflow
2663+ final UnsynchronizedByteArrayOutputStream output = copyToOutputStream (inputStream , SOFT_MAX_ARRAY_LENGTH + 1 , DEFAULT_BUFFER_SIZE );
2664+ if (output .size () > SOFT_MAX_ARRAY_LENGTH ) {
2665+ throw new IllegalArgumentException (String .format ("Cannot read more than %,d into a byte array" , SOFT_MAX_ARRAY_LENGTH ));
26582666 }
2667+ return output .toByteArray ();
26592668 }
26602669
26612670 /**
2662- * Gets the contents of an {@link InputStream} as a {@code byte[]}. Use this method instead of
2663- * {@link #toByteArray(InputStream)} when {@link InputStream} size is known.
2671+ * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
26642672 *
2665- * @param input the {@link InputStream} to read.
2666- * @param size the size of {@link InputStream} to read, where 0 < {@code size} <= length of input stream.
2667- * @return byte [] of length {@code size}.
2668- * @throws IOException if an I/O error occurs or {@link InputStream} length is smaller than parameter {@code size}.
2669- * @throws IllegalArgumentException if {@code size} is less than zero.
2673+ * <p>This variant always allocates the whole requested array size,
2674+ * for a dynamic growing variant use {@link #toByteArray(InputStream, int, int)},
2675+ * which enforces stricter memory usage constraints.</p>
2676+ *
2677+ * @param input the {@link InputStream} to read; must not be {@code null}.
2678+ * @param size the exact number of bytes to read; must be {@code >= 0}.
2679+ * @return a new byte array of length {@code size}.
2680+ * @throws IllegalArgumentException if {@code size} is negative.
2681+ * @throws EOFException if the stream ends before {@code size} bytes are read.
2682+ * @throws IOException if an I/O error occurs while reading.
2683+ * @throws NullPointerException if {@code input} is {@code null}.
26702684 * @since 2.1
26712685 */
2672- @ SuppressWarnings ("resource" )
26732686 public static byte [] toByteArray (final InputStream input , final int size ) throws IOException {
26742687 return toByteArray (Objects .requireNonNull (input , "input" )::read , size );
26752688 }
26762689
26772690 /**
2678- * Gets contents of an {@link InputStream} as a {@code byte[]}.
2679- * Use this method instead of {@link #toByteArray(InputStream)}
2680- * when {@link InputStream} size is known.
2681- * <strong>NOTE:</strong> the method checks that the length can safely be cast to an int without truncation
2682- * before using {@link IOUtils#toByteArray(InputStream, int)} to read into the byte array.
2683- * (Arrays can have no more than Integer.MAX_VALUE entries anyway.)
2691+ * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
26842692 *
2685- * @param input the {@link InputStream} to read.
2686- * @param size the size of {@link InputStream} to read, where 0 < {@code size} <= min(Integer.MAX_VALUE, length of input stream).
2687- * @return byte [] the requested byte array, of length {@code size}.
2688- * @throws IOException if an I/O error occurs or {@link InputStream} length is less than {@code size}.
2689- * @throws IllegalArgumentException if size is less than zero or size is greater than Integer.MAX_VALUE.
2690- * @see IOUtils#toByteArray(InputStream, int)
2693+ * <p>This variant always allocates the whole requested array size,
2694+ * for a dynamic growing variant use {@link #toByteArray(InputStream, int, int)},
2695+ * which enforces stricter memory usage constraints.</p>
2696+ *
2697+ * @param input the {@link InputStream} to read; must not be {@code null}.
2698+ * @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}.
2699+ * @return a new byte array of length {@code size}.
2700+ * @throws IllegalArgumentException if {@code size} is negative or does not fit into an int.
2701+ * @throws EOFException if the stream ends before {@code size} bytes are read.
2702+ * @throws IOException if an I/O error occurs while reading.
2703+ * @throws NullPointerException if {@code input} is {@code null}.
2704+ * @see #toByteArray(InputStream, int, int)
26912705 * @since 2.1
26922706 */
26932707 public static byte [] toByteArray (final InputStream input , final long size ) throws IOException {
@@ -2697,6 +2711,68 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
26972711 return toByteArray (input , (int ) size );
26982712 }
26992713
2714+ /**
2715+ * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
2716+ *
2717+ * <p>The memory used by this method is <strong>proportional</strong> to the number
2718+ * of bytes read and limited by the specified {@code size}. This makes it suitable for
2719+ * processing large input streams, provided that <strong>sufficient</strong> heap space is
2720+ * available.</p>
2721+ *
2722+ * <p>This method processes the input stream in successive chunks of up to
2723+ * {@code chunkSize} bytes.</p>
2724+ *
2725+ * @param input the {@link InputStream} to read; must not be {@code null}.
2726+ * @param size the exact number of bytes to read; must be {@code >= 0}.
2727+ * The actual bytes read are validated to equal {@code size}.
2728+ * @param chunkSize The chunk size for incremental reading; must be {@code > 0}.
2729+ * @return a new byte array of length {@code size}.
2730+ * @throws IllegalArgumentException if {@code size} is negative or {@code chunkSize <= 0}.
2731+ * @throws EOFException if the stream ends before {@code size} bytes are read.
2732+ * @throws IOException if an I/O error occurs while reading.
2733+ * @throws NullPointerException if {@code input} is {@code null}.
2734+ * @since 2.21.0
2735+ */
2736+ public static byte [] toByteArray (final InputStream input , final int size , final int chunkSize ) throws IOException {
2737+ Objects .requireNonNull (input , "input" );
2738+ if (chunkSize <= 0 ) {
2739+ throw new IllegalArgumentException ("Chunk size must be greater than zero: " + chunkSize );
2740+ }
2741+ if (size <= chunkSize ) {
2742+ // throws if size < 0
2743+ return toByteArray (input ::read , size );
2744+ }
2745+ final UnsynchronizedByteArrayOutputStream output = copyToOutputStream (input , size , chunkSize );
2746+ if (output .size () != size ) {
2747+ throw new EOFException ("Unexpected read size, current: " + output .size () + ", expected: " + size );
2748+ }
2749+ return output .toByteArray ();
2750+ }
2751+
2752+ /**
2753+ * Copies up to {@code size} bytes from the given {@link InputStream} into a new {@link UnsynchronizedByteArrayOutputStream}.
2754+ *
2755+ * @param input The {@link InputStream} to read; must not be {@code null}.
2756+ * @param limit The maximum number of bytes to read; must be {@code >= 0}.
2757+ * The actual bytes read are validated to equal {@code size}.
2758+ * @param bufferSize The buffer size of the output stream; must be {@code > 0}.
2759+ * @return a ByteArrayOutputStream containing the read bytes.
2760+ */
2761+ private static UnsynchronizedByteArrayOutputStream copyToOutputStream (
2762+ final InputStream input , final long limit , final int bufferSize ) throws IOException {
2763+ try (UnsynchronizedByteArrayOutputStream output = UnsynchronizedByteArrayOutputStream .builder ()
2764+ .setBufferSize (bufferSize )
2765+ .get ();
2766+ InputStream boundedInput = BoundedInputStream .builder ()
2767+ .setMaxCount (limit )
2768+ .setPropagateClose (false )
2769+ .setInputStream (input )
2770+ .get ()) {
2771+ output .write (boundedInput );
2772+ return output ;
2773+ }
2774+ }
2775+
27002776 /**
27012777 * Gets the contents of an input as a {@code byte[]}.
27022778 *
0 commit comments