Skip to content

Commit fa92cfb

Browse files
authored
Fix s3 download ranges for chunks (#2860)
1 parent 54e199c commit fa92cfb

File tree

3 files changed

+23
-65
lines changed

3 files changed

+23
-65
lines changed

gems/aws-sdk-s3/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
Unreleased Changes
22
------------------
33

4+
* Issue - Fix multipart `download_file` so that it does not download bytes out of range (#2859).
5+
46
1.123.0 (2023-05-31)
57
------------------
68

gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,19 @@ def multipart_download
5858
resp = @client.head_object(@params.merge(part_number: 1))
5959
count = resp.parts_count
6060
if count.nil? || count <= 1
61-
resp.content_length < MIN_CHUNK_SIZE ?
62-
single_request :
61+
if resp.content_length <= MIN_CHUNK_SIZE
62+
single_request
63+
else
6364
multithreaded_get_by_ranges(construct_chunks(resp.content_length))
65+
end
6466
else
6567
# partNumber is an option
6668
resp = @client.head_object(@params)
67-
resp.content_length < MIN_CHUNK_SIZE ?
68-
single_request :
69+
if resp.content_length <= MIN_CHUNK_SIZE
70+
single_request
71+
else
6972
compute_mode(resp.content_length, count)
73+
end
7074
end
7175
end
7276

@@ -84,10 +88,11 @@ def construct_chunks(file_size)
8488
offset = 0
8589
default_chunk_size = compute_chunk(file_size)
8690
chunks = []
87-
while offset <= file_size
91+
while offset < file_size
8892
progress = offset + default_chunk_size
89-
chunks << "bytes=#{offset}-#{progress < file_size ? progress : file_size}"
90-
offset = progress + 1
93+
progress = file_size if progress > file_size
94+
chunks << "bytes=#{offset}-#{progress - 1}"
95+
offset = progress
9196
end
9297
chunks
9398
end
@@ -96,12 +101,9 @@ def compute_chunk(file_size)
96101
if @chunk_size && @chunk_size > file_size
97102
raise ArgumentError, ":chunk_size shouldn't exceed total file size."
98103
else
99-
chunk_size = @chunk_size || [
100-
(file_size.to_f / MAX_PARTS).ceil,
101-
MIN_CHUNK_SIZE
104+
@chunk_size || [
105+
(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE
102106
].max.to_i
103-
chunk_size -= 1 if file_size % chunk_size == 1
104-
chunk_size
105107
end
106108
end
107109

gems/aws-sdk-s3/spec/object/download_file_spec.rb

Lines changed: 7 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,10 @@ module S3
3636
)
3737
end
3838

39-
let(:boundary_obj) do
40-
S3::Object.new(
41-
bucket_name: 'bucket',
42-
key: 'boundary',
43-
client: client
44-
)
45-
end
46-
4739
let(:one_meg) { 1024 * 1024 }
4840
let(:small_file) { Tempfile.new('small-file') }
4941
let(:large_file) { Tempfile.new('large-file') }
5042
let(:single_part_file) { Tempfile.new('single-part-file') }
51-
let(:boundary_file) { Tempfile.new('range-boundary-file') }
5243
let(:version_id) { 'a-fake-version-id' }
5344

5445
before(:each) do
@@ -180,51 +171,14 @@ module S3
180171
)
181172
end
182173

183-
context 'file size is chunk size boundary' do
184-
let(:file_size) { 5 * one_meg + 1 }
185-
let(:chunk_size) { 5 * one_meg }
186-
187-
before :each do
188-
allow(client).to receive(:head_object).with({
189-
bucket: 'bucket',
190-
key: 'boundary',
191-
part_number: 1
192-
}).and_return(
193-
client.stub_data(
194-
:head_object,
195-
content_length: file_size
196-
)
197-
)
174+
it 'downloads the file in range chunks' do
175+
client.stub_responses(:get_object, ->(context) {
176+
ranges = context.params[:range].split('=').last.split('-')
177+
expect(ranges[1].to_i - ranges[0].to_i + 1).to eq(one_meg)
178+
{ content_range: "bytes #{ranges[0]}-#{ranges[1]}/#{20 * one_meg}" }
179+
})
198180

199-
client.stub_responses(:get_object, ->(context) {
200-
# S3 does not allow 1 byte ranges. This shouldn't be raised.
201-
if context.params[:range] == "bytes=#{file_size}-#{file_size}"
202-
'InvalidRange'
203-
else
204-
{ content_range: "bytes 0-#{chunk_size - 1}/#{file_size}" }
205-
end
206-
})
207-
end
208-
209-
it 'downloads the file with default chunk size' do
210-
expect(client).to receive(:head_object).with({
211-
bucket: 'bucket',
212-
key: 'boundary',
213-
part_number: 1
214-
}).exactly(1).times
215-
216-
boundary_obj.download_file(path)
217-
end
218-
219-
it 'downloads the file with provided chunk size' do
220-
expect(client).to receive(:head_object).with({
221-
bucket: 'bucket',
222-
key: 'boundary',
223-
part_number: 1
224-
}).exactly(1).times
225-
226-
boundary_obj.download_file(path, chunk_size: chunk_size)
227-
end
181+
large_obj.download_file(path, chunk_size: one_meg)
228182
end
229183
end
230184
end

0 commit comments

Comments
 (0)