Skip to content

Commit 7410409

Browse files
authored
[Storage] jumbo blobs stress tests. (Azure#23119)
* let's see. * fix build. * ehh... * wip * try datalake. * shares. * checkstyle. * docs.
1 parent 3b8ca74 commit 7410409

File tree

14 files changed

+107
-43
lines changed

14 files changed

+107
-43
lines changed

common/perf-test-core/src/main/java/com/azure/perf/test/core/PerfStressProgram.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,11 @@ public static void runTests(PerfStressTest<?>[] tests, boolean sync, int paralle
225225
}
226226
} catch (InterruptedException | ExecutionException e) {
227227
System.err.println("Error occurred when submitting jobs to ForkJoinPool. " + System.lineSeparator() + e);
228+
e.printStackTrace(System.err);
228229
throw new RuntimeException(e);
229230
} catch (Exception e) {
230231
System.err.println("Error occurred running tests: " + System.lineSeparator() + e);
232+
e.printStackTrace(System.err);
231233
} finally {
232234
progressStatus.dispose();
233235
}

common/perf-test-core/src/main/java/com/azure/perf/test/core/RepeatingInputStream.java

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,14 @@
1010
* Represents a repeating input stream with mark support enabled.
1111
*/
1212
public class RepeatingInputStream extends InputStream {
13-
private static final int RANDOM_BYTES_LENGTH = 1024 * 1024; // 1MB
13+
private static final int RANDOM_BYTES_LENGTH = Integer.parseInt(
14+
System.getProperty("azure.core.perf.test.data.buffer.size", "1048576")); // 1MB default;
1415
private static final byte[] RANDOM_BYTES;
15-
private final int size;
16+
private final long size;
1617

17-
private int mark = 0;
18-
private int readLimit = Integer.MAX_VALUE;
19-
private int pos = 0;
18+
private long mark = 0;
19+
private long readLimit = Long.MAX_VALUE;
20+
private long pos = 0;
2021

2122
static {
2223
Random random = new Random(0);
@@ -28,13 +29,13 @@ public class RepeatingInputStream extends InputStream {
2829
* Creates an Instance of the repeating input stream.
2930
* @param size the size of the stream.
3031
*/
31-
public RepeatingInputStream(int size) {
32+
public RepeatingInputStream(long size) {
3233
this.size = size;
3334
}
3435

3536
@Override
3637
public synchronized int read() {
37-
return (pos < size) ? (RANDOM_BYTES[pos++ % RANDOM_BYTES_LENGTH] & 0xFF) : -1;
38+
return (pos < size) ? (RANDOM_BYTES[(int) (pos++ % RANDOM_BYTES_LENGTH)] & 0xFF) : -1;
3839
}
3940

4041
@Override
@@ -61,6 +62,15 @@ public synchronized void mark(int readLimit) {
6162
this.mark = this.pos;
6263
}
6364

65+
/**
66+
* Same as {@link #mark(int)} but takes long.
67+
* @param readLimit read limit.
68+
*/
69+
public synchronized void mark(long readLimit) {
70+
this.readLimit = readLimit;
71+
this.mark = this.pos;
72+
}
73+
6474
@Override
6575
public boolean markSupported() {
6676
return true;

common/perf-test-core/src/main/java/com/azure/perf/test/core/TestDataCreationHelper.java

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
* Utility class to help with data creation for perf testing.
1717
*/
1818
public class TestDataCreationHelper {
19-
private static final int RANDOM_BYTES_LENGTH = 1024 * 1024; // 1MB
19+
private static final int RANDOM_BYTES_LENGTH = Integer.parseInt(
20+
System.getProperty("azure.core.perf.test.data.buffer.size", "1048576")); // 1MB default;
2021
private static final byte[] RANDOM_BYTES;
21-
private static final int SIZE = (1024 * 1024 * 1024) + 1;
2222

2323
static {
2424
Random random = new Random(0);
@@ -34,10 +34,10 @@ public class TestDataCreationHelper {
3434
* @return The created {@link Flux}
3535
*/
3636
private static Flux<ByteBuffer> createCircularByteBufferFlux(byte[] array, long size) {
37-
int quotient = (int) size / array.length;
38-
int remainder = (int) size % array.length;
37+
long quotient = size / array.length;
38+
int remainder = (int) (size % array.length);
3939

40-
return Flux.range(0, quotient)
40+
return Flux.just(Boolean.TRUE).repeat(quotient - 1)
4141
.map(i -> allocateByteBuffer(array, array.length))
4242
.concatWithValues(allocateByteBuffer(array, remainder));
4343
}
@@ -66,14 +66,9 @@ public static Flux<ByteBuffer> createRandomByteBufferFlux(long size) {
6666
*
6767
* @param size the size of the stream
6868
* @return the {@link InputStream} of {@code size}
69-
* @throws IllegalArgumentException if {@code size} is more than {@link #SIZE}
7069
*/
7170
public static InputStream createRandomInputStream(long size) {
72-
if (size > SIZE) {
73-
throw new IllegalArgumentException("size must be <= " + SIZE);
74-
}
75-
76-
return new RepeatingInputStream((int) size);
71+
return new RepeatingInputStream(size);
7772
}
7873

7974
/**
@@ -84,10 +79,10 @@ public static InputStream createRandomInputStream(long size) {
8479
* @throws IOException If an IO error occurs.
8580
*/
8681
public static void writeBytesToOutputStream(OutputStream outputStream, long size) throws IOException {
87-
int quotient = (int) size / RANDOM_BYTES.length;
88-
int remainder = (int) size % RANDOM_BYTES.length;
82+
long quotient = size / RANDOM_BYTES.length;
83+
int remainder = (int) (size % RANDOM_BYTES.length);
8984

90-
for (int i = 0; i < quotient; i++) {
85+
for (long i = 0; i < quotient; i++) {
9186
outputStream.write(RANDOM_BYTES);
9287
}
9388

eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,8 @@
735735
<Package name="com.azure.storage.blob.perf.core"/>
736736
<Package name="com.azure.storage.file.datalake.perf"/>
737737
<Package name="com.azure.storage.file.datalake.perf.core"/>
738+
<Package name="com.azure.storage.file.share.perf"/>
739+
<Package name="com.azure.storage.file.share.perf.core"/>
738740
</Or>
739741
<Bug pattern="BC_UNCONFIRMED_CAST, DM_EXIT, RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
740742
</Match>

sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobAsyncClient.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -572,12 +572,14 @@ buffers is not a common scenario for async like it is in sync (and we already bu
572572
// no specified length: use azure.core's converter
573573
if (data == null && options.getOptionalLength() == null) {
574574
// We can only buffer up to max int due to restrictions in ByteBuffer.
575-
int chunkSize = (int) Math.min(Integer.MAX_VALUE, parallelTransferOptions.getBlockSizeLong());
575+
int chunkSize = (int) Math.min(Constants.MAX_INPUT_STREAM_CONVERTER_BUFFER_LENGTH,
576+
parallelTransferOptions.getBlockSizeLong());
576577
data = FluxUtil.toFluxByteBuffer(options.getDataStream(), chunkSize);
577578
// specified length (legacy requirement): use custom converter. no marking because we buffer anyway.
578579
} else if (data == null) {
579580
// We can only buffer up to max int due to restrictions in ByteBuffer.
580-
int chunkSize = (int) Math.min(Integer.MAX_VALUE, parallelTransferOptions.getBlockSizeLong());
581+
int chunkSize = (int) Math.min(Constants.MAX_INPUT_STREAM_CONVERTER_BUFFER_LENGTH,
582+
parallelTransferOptions.getBlockSizeLong());
581583
data = Utility.convertStreamToByteBuffer(
582584
options.getDataStream(), options.getOptionalLength(), chunkSize, false);
583585
}

sdk/storage/azure-storage-common/src/main/java/com/azure/storage/common/implementation/Constants.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,16 @@ public final class Constants {
7171
*/
7272
public static final int BUFFER_COPY_LENGTH = 8 * KB;
7373

74+
/**
75+
* This constant is used to cap Stream->Flux converter's block size considering that:
76+
* - Integer.MAX (or near) leads to java.lang.OutOfMemoryError: Requested array size exceeds VM limit
77+
* - Allocating arrays that are very large can be less successful on busy heap and put extra pressure on GC to
78+
* de-fragment.
79+
* - Going to small on the other hand might be harmful to large upload scenarios. Max block size is 4000MB
80+
* so chunking that into blocks that are smaller produces a lot of garbage to just wrap this into ByteBuffers.
81+
*/
82+
public static final int MAX_INPUT_STREAM_CONVERTER_BUFFER_LENGTH = 64 * MB;
83+
7484
public static final String STORAGE_SCOPE = "https://storage.azure.com/.default";
7585

7686
public static final String STORAGE_LOG_STRING_TO_SIGN = "Azure-Storage-Log-String-To-Sign";

sdk/storage/azure-storage-file-datalake/src/main/java/com/azure/storage/file/datalake/DataLakeFileAsyncClient.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,12 +329,14 @@ public Mono<Response<PathInfo>> uploadWithResponse(FileParallelUploadOptions opt
329329
// no specified length: use azure.core's converter
330330
if (data == null && options.getOptionalLength() == null) {
331331
// We can only buffer up to max int due to restrictions in ByteBuffer.
332-
int chunkSize = (int) Math.min(Integer.MAX_VALUE, validatedParallelTransferOptions.getBlockSizeLong());
332+
int chunkSize = (int) Math.min(Constants.MAX_INPUT_STREAM_CONVERTER_BUFFER_LENGTH,
333+
validatedParallelTransferOptions.getBlockSizeLong());
333334
data = FluxUtil.toFluxByteBuffer(options.getDataStream(), chunkSize);
334335
// specified length (legacy requirement): use custom converter. no marking because we buffer anyway.
335336
} else if (data == null) {
336337
// We can only buffer up to max int due to restrictions in ByteBuffer.
337-
int chunkSize = (int) Math.min(Integer.MAX_VALUE, validatedParallelTransferOptions.getBlockSizeLong());
338+
int chunkSize = (int) Math.min(Constants.MAX_INPUT_STREAM_CONVERTER_BUFFER_LENGTH,
339+
validatedParallelTransferOptions.getBlockSizeLong());
338340
data = Utility.convertStreamToByteBuffer(
339341
options.getDataStream(), options.getOptionalLength(), chunkSize, false);
340342
}

sdk/storage/azure-storage-file-share/src/main/java/com/azure/storage/file/share/ShareFileAsyncClient.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,12 +1513,14 @@ Mono<Response<ShareFileUploadInfo>> uploadWithResponse(ShareFileUploadOptions op
15131513
// no specified length: use azure.core's converter
15141514
if (data == null && options.getLength() == null) {
15151515
// We can only buffer up to max int due to restrictions in ByteBuffer.
1516-
int chunkSize = (int) Math.min(Integer.MAX_VALUE, validatedParallelTransferOptions.getBlockSizeLong());
1516+
int chunkSize = (int) Math.min(Constants.MAX_INPUT_STREAM_CONVERTER_BUFFER_LENGTH,
1517+
validatedParallelTransferOptions.getBlockSizeLong());
15171518
data = FluxUtil.toFluxByteBuffer(options.getDataStream(), chunkSize);
15181519
// specified length (legacy requirement): use custom converter. no marking because we buffer anyway.
15191520
} else if (data == null) {
15201521
// We can only buffer up to max int due to restrictions in ByteBuffer.
1521-
int chunkSize = (int) Math.min(Integer.MAX_VALUE, validatedParallelTransferOptions.getBlockSizeLong());
1522+
int chunkSize = (int) Math.min(Constants.MAX_INPUT_STREAM_CONVERTER_BUFFER_LENGTH,
1523+
validatedParallelTransferOptions.getBlockSizeLong());
15221524
data = Utility.convertStreamToByteBuffer(
15231525
options.getDataStream(), options.getLength(), chunkSize, false);
15241526
}

sdk/storage/azure-storage-perf/memory-stress-scenarios.ps1

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ function Run-Scenario {
99
[Parameter(Mandatory=$true, Position=0)]
1010
[string] $HeapSize,
1111
[Parameter(Mandatory=$true, Position=1)]
12-
[string] $Scenario
12+
[string] $Scenario,
13+
[Parameter(Mandatory=$false, Position=2)]
14+
[string] $ExtraFlags
1315
)
1416
Write-Host "Executing '$Scenario' with '$HeapSize' heap"
15-
Invoke-Expression "& '$JavaPath' -Xms$HeapSize -Xmx$HeapSize -jar '$PerfJarPath' $Scenario"
17+
Invoke-Expression "& '$JavaPath' -Xms$HeapSize -Xmx$HeapSize '$ExtraFlags' -jar '$PerfJarPath' $Scenario"
1618
if ($LASTEXITCODE -ne 0) {
1719
Write-Host "Scenario failed, exiting"
1820
exit 1
@@ -31,6 +33,11 @@ Run-Scenario "50m" "uploadoutputstream --warmup 0 --duration 1 --size 1048576000
3133
Run-Scenario "50m" "uploadblobnolength --warmup 0 --duration 1 --size 1048576000 --transfer-single-upload-size 4194304 --transfer-block-size 1048576 --sync"
3234
Run-Scenario "50m" "uploadblob --warmup 0 --duration 1 --size 1048576000 --transfer-single-upload-size 4194304 --transfer-block-size 1048576 --sync"
3335
Run-Scenario "50m" "uploadblob --warmup 0 --duration 1 --size 1048576000 --transfer-single-upload-size 4194304 --transfer-block-size 1048576"
36+
# Jumbo blobs
37+
Run-Scenario "7g" "uploadblob --warmup 0 --duration 1 --size 8388608000 --transfer-block-size 2147483648 --transfer-concurrency 1" "-Dazure.core.perf.test.data.buffer.size=104857600"
38+
Run-Scenario "7g" "uploadblob --warmup 0 --duration 1 --size 8388608000 --transfer-block-size 2147483648 --transfer-concurrency 1 --sync" "-Dazure.core.perf.test.data.buffer.size=104857600"
39+
Run-Scenario "7g" "uploadblobnolength --warmup 0 --duration 1 --size 8388608000 --transfer-block-size 2147483648 --transfer-concurrency 1 --sync" "-Dazure.core.perf.test.data.buffer.size=104857600"
40+
Run-Scenario "7g" "uploadoutputstream --warmup 0 --duration 1 --size 8388608000 --transfer-block-size 2147483648 --transfer-concurrency 1 --sync" "-Dazure.core.perf.test.data.buffer.size=104857600"
3441

3542
# DataLake
3643
$env:STORAGE_CONNECTION_STRING=$env:STORAGE_DATA_LAKE_CONNECTION_STRING
@@ -40,3 +47,11 @@ Run-Scenario "300m" "uploadfiledatalake --warmup 0 --duration 1 --size 104857600
4047
# Small transfer options
4148
Run-Scenario "50m" "uploadfiledatalake --warmup 0 --duration 1 --size 1048576000 --transfer-single-upload-size 4194304 --transfer-block-size 1048576 --sync"
4249
Run-Scenario "50m" "uploadfiledatalake --warmup 0 --duration 1 --size 1048576000 --transfer-single-upload-size 4194304 --transfer-block-size 1048576"
50+
# Jumbo blobs
51+
Run-Scenario "7g" "uploadfiledatalake --warmup 0 --duration 1 --size 8388608000 --transfer-block-size 2147483648 --transfer-concurrency 1 --sync" "-Dazure.core.perf.test.data.buffer.size=104857600"
52+
Run-Scenario "7g" "uploadfiledatalake --warmup 0 --duration 1 --size 8388608000 --transfer-block-size 2147483648 --transfer-concurrency 1" "-Dazure.core.perf.test.data.buffer.size=104857600"
53+
54+
# Shares
55+
$env:STORAGE_CONNECTION_STRING=$env:PRIMARY_STORAGE_CONNECTION_STRING
56+
Run-Scenario "100m" "uploadfileshare --warmup 0 --duration 1 --size 1048576000 --sync"
57+
Run-Scenario "200m" "uploadfileshare --warmup 0 --duration 1 --size 1048576000"

sdk/storage/azure-storage-perf/src/main/java/com/azure/storage/blob/perf/UploadBlobNoLengthTest.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,14 @@ public class UploadBlobNoLengthTest extends BlobTestBase<StoragePerfStressOption
2222

2323
public UploadBlobNoLengthTest(StoragePerfStressOptions options) {
2424
super(options);
25-
inputStream = (RepeatingInputStream) createRandomInputStream(options.getSize());
26-
inputStream.mark(Integer.MAX_VALUE);
27-
byteBufferFlux = createRandomByteBufferFlux(options.getSize());
25+
if (options.isSync()) {
26+
inputStream = (RepeatingInputStream) createRandomInputStream(options.getSize());
27+
inputStream.mark(Long.MAX_VALUE);
28+
byteBufferFlux = null;
29+
} else {
30+
inputStream = null;
31+
byteBufferFlux = createRandomByteBufferFlux(options.getSize());
32+
}
2833
}
2934

3035
@Override

0 commit comments

Comments
 (0)