Skip to content
This repository was archived by the owner on Oct 2, 2024. It is now read-only.

Commit b36b955

Browse files
authored
Merge pull request #35 from OneDrive/fragment-upload
Upload files >100mb
2 parents 307fdee + c28a964 commit b36b955

File tree

4 files changed

+498
-1
lines changed

4 files changed

+498
-1
lines changed
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# -*- coding: utf-8 -*-
2+
'''
3+
------------------------------------------------------------------------------
4+
Copyright (c) 2015 Microsoft Corporation
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in
14+
all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
THE SOFTWARE.
23+
------------------------------------------------------------------------------
24+
'''
25+
26+
import io
27+
import os
28+
29+
class FileSlice(io.RawIOBase):
30+
'''
31+
This class represents a window over a file handle. It will allow only access
32+
and read of bytes above start and no further than end/length bytes
33+
'''
34+
def __init__(self, handle, start, end=None, length=None):
35+
'''
36+
Creates new instance of FileSlice on file-like object handle. It behaves
37+
like normal file object, but only allow reading of bytes above start and
38+
no further than end/lenght byte
39+
40+
Args:
41+
handle (file-like object): file handle object to create a view over.
42+
File should be open in binary mode
43+
start (int): start byte number, makring the first byte that can be
44+
read from FileSlice
45+
end (int): Optional. Last byte of the file that can be read.
46+
length (int): Optional. Number of the bytes, starting from the
47+
start, that can be read from FileSlice
48+
49+
One of end or length must be provided
50+
'''
51+
assert end or length, "You need to provide one of end or length parameter"
52+
assert not (end and length), "You need to proivde only one parameter: end or length, not both"
53+
if start < 0:
54+
raise ValueError("Start of the file smaller than 0")
55+
if end and end < start:
56+
raise ValueError("End of the tile smaller than start")
57+
if length and length < 0:
58+
raise ValueError("Length smaller than 0")
59+
60+
self._handle = handle
61+
self._start = start
62+
if end:
63+
self._end = end
64+
else:
65+
self._end = start + length
66+
self._end = min(self._end, os.fstat(handle.fileno()).st_size)
67+
self.seek(0)
68+
69+
@property
70+
def _bytes_left(self):
71+
current_pos = self._handle.tell()
72+
return self._end - current_pos
73+
74+
def close(self):
75+
# do nothing, someone else might want to process this file
76+
return
77+
78+
@property
79+
def closed(self):
80+
return self._handle.closed
81+
82+
def fileno(self):
83+
return self._handle.fileno()
84+
85+
def flush(self):
86+
return self._handle.flush()
87+
88+
def len(self):
89+
# this is provided for requests, so it will properly recognize the size of the file
90+
return self._bytes_left
91+
92+
def __len__(self):
93+
# this is provided for requests, so it will properly recognize the size of the file
94+
return self.len()
95+
96+
def isatty(self):
97+
return self._handle.isatty()
98+
99+
def readable(self):
100+
return self._handle.readable()
101+
102+
def read(self, size=-1):
103+
if size == -1:
104+
read_size = self._bytes_left
105+
else:
106+
read_size = min(size, self._bytes_left)
107+
return self._handle.read(read_size)
108+
109+
def readall(self):
110+
return self._handle.read(self._bytes_left)
111+
112+
def readinto(self, b):
113+
if len(b) > self._bytes_left:
114+
r = self._handle.read(self._bytes_left)
115+
b[:len(r)] = r
116+
return len(r)
117+
return self._handle.readinto(b)
118+
119+
def readline(self, size=-1):
120+
return self._handle.readline(max(size, self._bytes_left))
121+
122+
def readlines(self, hint=-1):
123+
return self._handle.readlines(max(hint, self._bytes_left))
124+
125+
def seek(self, offset, whence=io.SEEK_SET):
126+
if whence == io.SEEK_SET:
127+
desired_pos = self._start + offset
128+
if whence == io.SEEK_CUR:
129+
desired_pos = self._handle.tell() + offset
130+
if whence == io.SEEK_END:
131+
desired_pos = self._end + offset
132+
133+
if desired_pos < self._start:
134+
raise ValueError("Seeking before the file slice")
135+
if desired_pos > self._end:
136+
raise ValueError("Seekeing past the end of file slice")
137+
138+
ret = self._handle.seek(desired_pos, io.SEEK_SET)
139+
if ret:
140+
return ret - self._start
141+
else:
142+
return ret
143+
144+
def seekable(self):
145+
return self._handle.seekable()
146+
147+
def tell(self):
148+
return self._handle.tell() - self._start
149+
150+
def truncate(self, size=None):
151+
raise IOError("Operation not supported")
152+
153+
def writable(self):
154+
return False
155+
156+
def write(self, b):
157+
raise IOError("Operation not supported")
158+
159+
def writelines(self, lines):
160+
raise IOError("Operation not supported")
161+

src/onedrivesdk/request_base.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def append_option(self, option):
113113
elif isinstance(option, QueryOption):
114114
self._query_options[option.key] = option.value
115115

116-
def send(self, content=None, path=None):
116+
def send(self, content=None, path=None, data=None):
117117
"""Send the request using the client specified
118118
at request initialization
119119
@@ -122,6 +122,8 @@ def send(self, content=None, path=None):
122122
that will be sent
123123
path (str): Defaults to None, the local path of the file which
124124
will be sent
125+
data (file object): Defaults to none, the file object of the
126+
file which will be sent
125127
126128
Returns:
127129
:class:`HttpResponse<onedrivesdk.http_response.HttpResponse>`:
@@ -141,6 +143,12 @@ def send(self, content=None, path=None):
141143
self._headers,
142144
self.request_url,
143145
path=path)
146+
elif data:
147+
response = self._client.http_provider.send(
148+
self.method,
149+
self._headers,
150+
self.request_url,
151+
data=data)
144152
else:
145153
content_dict = None
146154

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# -*- coding: utf-8 -*-
2+
'''
3+
------------------------------------------------------------------------------
4+
Copyright (c) 2015 Microsoft Corporation
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in
14+
all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
THE SOFTWARE.
23+
------------------------------------------------------------------------------
24+
'''
25+
26+
from ..error import OneDriveError
27+
from ..model.upload_session import UploadSession
28+
from ..model.item import Item
29+
from ..options import HeaderOption
30+
from ..request.item_request_builder import ItemRequestBuilder
31+
from ..request_builder_base import RequestBuilderBase
32+
from ..request_base import RequestBase
33+
from ..helpers.file_slice import FileSlice
34+
import asyncio
35+
import json
36+
import math
37+
import os
38+
import time
39+
40+
__PART_SIZE = 10 * 1024 * 1024 # recommended file size. Should be multiple of 320 * 1024
41+
__MAX_SINGLE_FILE_UPLOAD = 100 * 1024 * 1024
42+
43+
class ItemUploadFragment(RequestBase):
44+
def __init__(self, request_url, client, options, file_handle):
45+
super(ItemUploadFragment, self).__init__(request_url, client, options)
46+
self.method = "PUT"
47+
self._file_handle = file_handle
48+
49+
def post(self):
50+
"""Sends the POST request
51+
52+
Returns:
53+
:class:`UploadSession<onedrivesdk.model.upload_session.UploadSession>`:
54+
The resulting entity from the operation
55+
"""
56+
entity = UploadSession(json.loads(self.send(data=self._file_handle).content))
57+
return entity
58+
59+
@asyncio.coroutine
60+
def post_async(self):
61+
"""Sends the POST request using an asyncio coroutine
62+
63+
Yields:
64+
:class:`UploadedSession<onedrivesdk.model.upload_session.UploadedSession>`:
65+
The resulting entity from the operation
66+
"""
67+
future = self._client._loop.run_in_executor(None,
68+
self.post)
69+
entity = yield from future
70+
return entity
71+
72+
class ItemUploadFragmentBuilder(RequestBuilderBase):
73+
def __init__(self, request_url, client, content_local_path):
74+
super(ItemUploadFragmentBuilder, self).__init__(request_url, client)
75+
self._method_options = {}
76+
self._file_handle = open(content_local_path, "rb")
77+
self._total_length = os.stat(content_local_path).st_size
78+
79+
def __enter__(self):
80+
return self
81+
82+
def __exit__(self, type, value, traceback):
83+
self._file_handle.close()
84+
85+
def request(self, begin, length, options=None):
86+
"""Builds the request for the ItemUploadFragment
87+
88+
Args:
89+
options (list of :class:`Option<onedrivesdk.options.Option>`):
90+
Default to None, list of options to include in the request
91+
92+
Returns:
93+
:class:`ItemUploadFragment<onedrivesdk.request.item_upload_fragment.ItemUploadFragment>`:
94+
The request
95+
"""
96+
opts = None
97+
if not (options is None or len(options) == 0):
98+
opts = options.copy()
99+
else:
100+
opts = []
101+
102+
self.content_type = "application/octet-stream"
103+
104+
opts.append(HeaderOption("Content-Range", "bytes %d-%d/%d" % (begin, begin + length - 1, self._total_length)))
105+
opts.append(HeaderOption("Content-Length", length))
106+
107+
file_slice = FileSlice(self._file_handle, begin, length=length)
108+
req = ItemUploadFragment(self._request_url, self._client, opts, file_slice)
109+
return req
110+
111+
def post(self, begin, length, options=None):
112+
"""Sends the POST request
113+
114+
Returns:
115+
:class:`UploadedFragment<onedrivesdk.model.uploaded_fragment.UploadedFragment>`:
116+
The resulting UploadSession from the operation
117+
"""
118+
return self.request(begin, length, options).post()
119+
120+
@asyncio.coroutine
121+
def post_async(self, begin, length, options=None):
122+
"""Sends the POST request using an asyncio coroutine
123+
124+
Yields:
125+
:class:`UploadedFragment<onedrivesdk.model.uploaded_fragment.UploadedFragment>`:
126+
The resulting UploadSession from the operation
127+
"""
128+
entity = yield from self.request(begin, length, options).post_async()
129+
return entity
130+
131+
132+
def fragment_upload_async(self, local_path, conflict_behavior=None, upload_status=None):
133+
"""Uploads file using PUT using multipart upload if needed.
134+
135+
Args:
136+
local_path (str): The path to the local file to upload.
137+
conflict_behavior (str): conflict behavior if the file is already
138+
uploaded. Use None value if file should be replaced or "rename", if
139+
the new file should get a new name
140+
upload_status (func): function(current_part, total_parts) to be called
141+
with upload status for each 10MB part
142+
143+
Returns:
144+
Created entity.
145+
"""
146+
file_size = os.stat(local_path).st_size
147+
if file_size <= __MAX_SINGLE_FILE_UPLOAD:
148+
# fallback to single shot upload if file is small enough
149+
return self.content.request().upload_async(local_path)
150+
else:
151+
# multipart upload needed for larger files
152+
if conflict_behavior:
153+
item = Item({'@name.conflictBehavior': conflict_behavior})
154+
else:
155+
item = Item({})
156+
157+
session = self.create_session(item).post()
158+
159+
with ItemUploadFragmentBuilder(session.upload_url, self._client, local_path) as upload_builder:
160+
total_parts = math.ceil(file_size / __PART_SIZE)
161+
for i in range(total_parts):
162+
if upload_status:
163+
upload_status(i, total_parts)
164+
165+
length = min(__PART_SIZE, file_size - i * __PART_SIZE)
166+
tries = 0
167+
while True:
168+
try:
169+
tries += 1
170+
resp = upload_builder.post(i * __PART_SIZE, length)
171+
except OneDriveError as exc:
172+
if exc.status_code in (500, 502, 503, 504) and tries < 5:
173+
time.sleep(5)
174+
continue
175+
elif exc.status_code == 401:
176+
self._client.auth_provider.refresh_token()
177+
continue
178+
else:
179+
raise exc
180+
break # while True
181+
if upload_status:
182+
upload_status(total_parts, total_parts) # job completed
183+
# return last response
184+
return resp
185+
186+
# Overwrite the standard upload operation to use this one
187+
ItemRequestBuilder.upload_async = fragment_upload_async

0 commit comments

Comments
 (0)