Skip to content

Commit 66aa691

Browse files
authored
Merge branch 'main' into esql_auto_partition
2 parents 334c1bc + 36280d2 commit 66aa691

File tree

2 files changed

+22
-22
lines changed

2 files changed

+22
-22
lines changed

docs/changelog/126009.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 126009
2+
summary: Change ModelLoaderUtils.split to return the correct number of chunks and ranges.
3+
area: Machine Learning
4+
type: bug
5+
issues:
6+
- 121799

x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtils.java

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -336,50 +336,44 @@ static InputStream getFileInputStream(URI uri) {
336336
* Split a stream of size {@code sizeInBytes} into {@code numberOfStreams} +1
337337
* ranges aligned on {@code chunkSizeBytes} boundaries. Each range contains a
338338
* whole number of chunks.
339-
* The first {@code numberOfStreams} ranges will be split evenly (in terms of
340-
* number of chunks not the byte size), the final range split
339+
* All ranges except the final range will be split approximately evenly
340+
* (in terms of number of chunks not the byte size), the final range split
341341
* is for the single final chunk and will be no more than {@code chunkSizeBytes}
342342
* in size. The separate range for the final chunk is because when streaming and
343343
* uploading a large model definition, writing the last part has to handled
344344
* as a special case.
345-
* Less ranges may be returned in case the stream size is too small.
345+
* Fewer ranges may be returned in case the stream size is too small.
346346
* @param sizeInBytes The total size of the stream
347347
* @param numberOfStreams Divide the bulk of the size into this many streams.
348348
* @param chunkSizeBytes The size of each chunk
349-
* @return List of {@code numberOfStreams} + 1 ranges.
349+
* @return List of {@code numberOfStreams} + 1 or fewer ranges.
350350
*/
351351
static List<RequestRange> split(long sizeInBytes, int numberOfStreams, long chunkSizeBytes) {
352352
int numberOfChunks = (int) ((sizeInBytes + chunkSizeBytes - 1) / chunkSizeBytes);
353+
354+
int numberOfRanges = numberOfStreams + 1;
353355
if (numberOfStreams > numberOfChunks) {
354-
numberOfStreams = numberOfChunks;
356+
numberOfRanges = numberOfChunks;
355357
}
356358
var ranges = new ArrayList<RequestRange>();
357359

358-
int baseChunksPerStream = numberOfChunks / numberOfStreams;
359-
int remainder = numberOfChunks % numberOfStreams;
360+
int baseChunksPerRange = (numberOfChunks - 1) / (numberOfRanges - 1);
361+
int remainder = (numberOfChunks - 1) % (numberOfRanges - 1);
360362
long startOffset = 0;
361363
int startChunkIndex = 0;
362364

363-
for (int i = 0; i < numberOfStreams - 1; i++) {
364-
int numChunksInStream = (i < remainder) ? baseChunksPerStream + 1 : baseChunksPerStream;
365-
long rangeEnd = startOffset + (numChunksInStream * chunkSizeBytes) - 1; // range index is 0 based
366-
ranges.add(new RequestRange(startOffset, rangeEnd, startChunkIndex, numChunksInStream));
367-
startOffset = rangeEnd + 1; // range is inclusive start and end
368-
startChunkIndex += numChunksInStream;
369-
}
365+
for (int i = 0; i < numberOfRanges - 1; i++) {
366+
int numChunksInRange = (i < remainder) ? baseChunksPerRange + 1 : baseChunksPerRange;
370367

371-
// Want the final range request to be a single chunk
372-
if (baseChunksPerStream > 1) {
373-
int numChunksExcludingFinal = baseChunksPerStream - 1;
374-
long rangeEnd = startOffset + (numChunksExcludingFinal * chunkSizeBytes) - 1;
375-
ranges.add(new RequestRange(startOffset, rangeEnd, startChunkIndex, numChunksExcludingFinal));
368+
long rangeEnd = startOffset + (((long) numChunksInRange) * chunkSizeBytes) - 1; // range index is 0 based
376369

377-
startOffset = rangeEnd + 1;
378-
startChunkIndex += numChunksExcludingFinal;
370+
ranges.add(new RequestRange(startOffset, rangeEnd, startChunkIndex, numChunksInRange));
371+
startOffset = rangeEnd + 1; // range is inclusive start and end
372+
startChunkIndex += numChunksInRange;
379373
}
380374

381375
// The final range is a single chunk the end of which should not exceed sizeInBytes
382-
long rangeEnd = Math.min(sizeInBytes, startOffset + (baseChunksPerStream * chunkSizeBytes)) - 1;
376+
long rangeEnd = Math.min(sizeInBytes, startOffset + (baseChunksPerRange * chunkSizeBytes)) - 1;
383377
ranges.add(new RequestRange(startOffset, rangeEnd, startChunkIndex, 1));
384378

385379
return ranges;

0 commit comments

Comments
 (0)