Skip to content

Commit c2260f0

Browse files
jcwintersjwinters-atlassiangarydgregoryppkarwasz
authored
[LANG-1772] Restrict size of cache to prevent overflow errors (#1379)
* LANG-1772 restrict size of cache to prevent overflow errors * LANG-1772 Adding a largeheap maven profile. Skipping the testHugeStrings method in all other profiles. * LANG-1772 Adding javadoc as per request. Also moving max cache size inside the CachedRandomBits constructor - also checking if the padding produces overflow. No longer using an arbitrary value but being more precise. * LANG-1772 Suggestions from PR implemented * LANG-1772 Using a long for the intermediate calculation and restricting the result to MAX_INT/5, also restricting max cache length to MAX_INT/3, there are now no opportunities for overflow. The test checks at the boundary condition * Close HTML tag * LANG-1772 Introduced many constants, and attempted to add comprehensive documentation around the nextBits method and the size allocation for the cache * Update src/main/java/org/apache/commons/lang3/RandomStringUtils.java You're right, should be outside the min Co-authored-by: Piotr P. Karwasz <[email protected]> --------- Co-authored-by: James Winters <[email protected]> Co-authored-by: Gary Gregory <[email protected]> Co-authored-by: Piotr P. Karwasz <[email protected]>
1 parent 3f7868e commit c2260f0

File tree

4 files changed

+87
-15
lines changed

4 files changed

+87
-15
lines changed

pom.xml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,10 @@
110110
</distributionManagement>
111111

112112
<properties>
113-
<argLine>-Xmx512m</argLine>
113+
<heapSize>-Xmx512m</heapSize>
114+
<extraArgs/>
115+
<systemProperties/>
116+
<argLine>${heapSize} ${extraArgs} ${systemProperties}</argLine>
114117
<project.build.sourceEncoding>ISO-8859-1</project.build.sourceEncoding>
115118
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
116119
<!-- project.build.outputTimestamp is managed by Maven plugins, see https://maven.apache.org/guides/mini/guide-reproducible-builds.html -->
@@ -460,7 +463,7 @@
460463
<properties>
461464
<!-- LANG-1265: allow tests to access private fields/methods of java.base classes via reflection -->
462465
<!-- LANG-1667: allow tests to access private fields/methods of java.base/java.util such as ArrayList via reflection -->
463-
<argLine>-Xmx512m --add-opens java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.util=ALL-UNNAMED --add-opens java.base/java.time=ALL-UNNAMED --add-opens java.base/java.time.chrono=ALL-UNNAMED</argLine>
466+
<extraArgs>--add-opens java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.util=ALL-UNNAMED --add-opens java.base/java.time=ALL-UNNAMED --add-opens java.base/java.time.chrono=ALL-UNNAMED</extraArgs>
464467
</properties>
465468
</profile>
466469
<profile>
@@ -522,6 +525,13 @@
522525
</plugins>
523526
</build>
524527
</profile>
528+
<profile>
529+
<id>largeheap</id>
530+
<properties>
531+
<heapSize>-Xmx1024m</heapSize>
532+
<systemProperties>-Dtest.large.heap=true</systemProperties>
533+
</properties>
534+
</profile>
525535
</profiles>
526536
<developers>
527537
<developer>

src/main/java/org/apache/commons/lang3/CachedRandomBits.java

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,20 @@ final class CachedRandomBits {
5252
*/
5353
private int bitIndex;
5454

55+
/**
56+
* The maximum size of the cache.
57+
*
58+
* <p>
59+
* This is to prevent the possibility of overflow in the {@code if (bitIndex >> 3 >= cache.length)} in the {@link #nextBits(int)} method.
60+
* </p>
61+
*/
62+
private static final int MAX_CACHE_SIZE = Integer.MAX_VALUE >> 3;
63+
/** Maximum number of bits that can be generated (size of an int) */
64+
private static final int MAX_BITS = 32;
65+
/** Mask to extract the bit offset within a byte (0-7) */
66+
private static final int BIT_INDEX_MASK = 0x7;
67+
/** Number of bits in a byte */
68+
private static final int BITS_PER_BYTE = 8;
5569
/**
5670
* Creates a new instance.
5771
*
@@ -62,7 +76,7 @@ final class CachedRandomBits {
6276
if (cacheSize <= 0) {
6377
throw new IllegalArgumentException("cacheSize must be positive");
6478
}
65-
this.cache = new byte[cacheSize];
79+
this.cache = cacheSize <= MAX_CACHE_SIZE ? new byte[cacheSize] : new byte[MAX_CACHE_SIZE];
6680
this.random = Objects.requireNonNull(random, "random");
6781
this.random.nextBytes(this.cache);
6882
this.bitIndex = 0;
@@ -71,28 +85,50 @@ final class CachedRandomBits {
7185
/**
7286
* Generates a random integer with the specified number of bits.
7387
*
74-
* @param bits number of bits to generate, MUST be between 1 and 32
75-
* @return random integer with {@code bits} bits
88+
* <p>This method efficiently generates random bits by using a byte cache and bit manipulation:
89+
* <ul>
90+
* <li>Uses a byte array cache to avoid frequent calls to the underlying random number generator</li>
91+
* <li>Extracts bits from each byte using bit shifting and masking</li>
92+
* <li>Handles partial bytes to avoid wasting random bits</li>
93+
* <li>Accumulates bits until the requested number is reached</li>
94+
* </ul>
95+
* </p>
96+
*
97+
* @param bits number of bits to generate, MUST be between 1 and 32 (inclusive)
98+
* @return random integer containing exactly the requested number of random bits
99+
* @throws IllegalArgumentException if bits is not between 1 and 32
76100
*/
77101
public int nextBits(final int bits) {
78-
if (bits > 32 || bits <= 0) {
79-
throw new IllegalArgumentException("number of bits must be between 1 and 32");
102+
if (bits > MAX_BITS || bits <= 0) {
103+
throw new IllegalArgumentException("number of bits must be between 1 and " + MAX_BITS);
80104
}
81105
int result = 0;
82106
int generatedBits = 0; // number of generated bits up to now
83107
while (generatedBits < bits) {
108+
// Check if we need to refill the cache
109+
// Convert bitIndex to byte index by dividing by 8 (right shift by 3)
84110
if (bitIndex >> 3 >= cache.length) {
85-
// we exhausted the number of bits in the cache
86-
// this should only happen if the bitIndex is exactly matching the cache length
87-
assert bitIndex == cache.length * 8;
111+
// We exhausted the number of bits in the cache
112+
// This should only happen if the bitIndex is exactly matching the cache length
113+
assert bitIndex == cache.length * BITS_PER_BYTE;
88114
random.nextBytes(cache);
89115
bitIndex = 0;
90116
}
91-
// generatedBitsInIteration is the number of bits that we will generate
92-
// in this iteration of the while loop
93-
final int generatedBitsInIteration = Math.min(8 - (bitIndex & 0x7), bits - generatedBits);
117+
// Calculate how many bits we can extract from the current byte
118+
// 1. Get current position within byte (0-7) using bitIndex & 0x7
119+
// 2. Calculate remaining bits in byte: 8 - (position within byte)
120+
// 3. Take minimum of remaining bits in byte and bits still needed
121+
final int generatedBitsInIteration = Math.min(
122+
BITS_PER_BYTE - (bitIndex & BIT_INDEX_MASK),
123+
bits - generatedBits);
124+
// Shift existing result left to make room for new bits
94125
result = result << generatedBitsInIteration;
95-
result |= cache[bitIndex >> 3] >> (bitIndex & 0x7) & (1 << generatedBitsInIteration) - 1;
126+
// Extract and append new bits:
127+
// 1. Get byte from cache (bitIndex >> 3 converts bit index to byte index)
128+
// 2. Shift right by bit position within byte (bitIndex & 0x7)
129+
// 3. Mask to keep only the bits we want ((1 << generatedBitsInIteration) - 1)
130+
result |= cache[bitIndex >> 3] >> (bitIndex & BIT_INDEX_MASK) & ((1 << generatedBitsInIteration) - 1);
131+
// Update counters
96132
generatedBits += generatedBitsInIteration;
97133
bitIndex += generatedBitsInIteration;
98134
}

src/main/java/org/apache/commons/lang3/RandomStringUtils.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ public class RandomStringUtils {
9898
private static final int ASCII_A = 'A';
9999
private static final int ASCII_z = 'z';
100100

101+
private static final int CACHE_PADDING_BITS = 3;
102+
private static final int BITS_TO_BYTES_DIVISOR = 5;
103+
private static final int BASE_CACHE_SIZE_PADDING = 10;
104+
101105
/**
102106
* Gets the singleton instance based on {@link ThreadLocalRandom#current()}; <b>which is not cryptographically
103107
* secure</b>; use {@link #secure()} to use an algorithms/providers specified in the
@@ -329,7 +333,16 @@ public static String random(int count, int start, int end, final boolean letters
329333
// Ideally the cache size depends on multiple factor, including the cost of generating x bytes
330334
// of randomness as well as the probability of rejection. It is however not easy to know
331335
// those values programmatically for the general case.
332-
final CachedRandomBits arb = new CachedRandomBits((count * gapBits + 3) / 5 + 10, random);
336+
// Calculate cache size:
337+
// 1. Multiply count by bits needed per character (gapBits)
338+
// 2. Add padding bits (3) to handle partial bytes
339+
// 3. Divide by 5 to convert to bytes (normally this would be by 8, dividing by 5 allows for about 60% extra space)
340+
// 4. Add base padding (10) to handle small counts efficiently
341+
// 5. Ensure we don't exceed Integer.MAX_VALUE / 5 + 10 to provide a good balance between overflow prevention and
342+
// making the cache extremely large
343+
final long desiredCacheSize = ((long) count * gapBits + CACHE_PADDING_BITS) / BITS_TO_BYTES_DIVISOR + BASE_CACHE_SIZE_PADDING;
344+
final int cacheSize = (int) Math.min(desiredCacheSize, Integer.MAX_VALUE / BITS_TO_BYTES_DIVISOR + BASE_CACHE_SIZE_PADDING);
345+
final CachedRandomBits arb = new CachedRandomBits(cacheSize, random);
333346

334347
while (count-- != 0) {
335348
// Generate a random value between start (included) and end (excluded)

src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,20 @@
2929
import java.util.stream.Stream;
3030

3131
import org.junit.jupiter.api.Test;
32+
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
3233
import org.junit.jupiter.params.ParameterizedTest;
3334
import org.junit.jupiter.params.provider.MethodSource;
35+
import org.junit.jupiter.params.provider.ValueSource;
3436

3537
/**
3638
* Tests {@link RandomStringUtils}.
3739
*/
3840
public class RandomStringUtilsTest extends AbstractLangTest {
3941

4042
private static final int LOOP_COUNT = 1_000;
43+
/** Maximum safe value for count to avoid overflow: (21x + 3) / 5 + 10 < 0x0FFF_FFFF */
44+
private static final int MAX_SAFE_COUNT = 63_913_201;
45+
4146

4247
static Stream<RandomStringUtils> randomProvider() {
4348
return Stream.of(RandomStringUtils.secure(), RandomStringUtils.secureStrong(), RandomStringUtils.insecure());
@@ -802,4 +807,12 @@ public void testRandomWithChars(final RandomStringUtils rsu) {
802807
assertNotEquals(r1, r3);
803808
assertNotEquals(r2, r3);
804809
}
810+
811+
@ParameterizedTest
812+
@ValueSource(ints = {MAX_SAFE_COUNT, MAX_SAFE_COUNT + 1})
813+
@EnabledIfSystemProperty(named = "test.large.heap", matches = "true")
814+
public void testHugeStrings(final int expectedLength) {
815+
final String hugeString = RandomStringUtils.random(expectedLength);
816+
assertEquals(expectedLength, hugeString.length(), "hugeString.length() == expectedLength");
817+
}
805818
}

0 commit comments

Comments
 (0)