Skip to content

Commit f9b2352

Browse files
authored
implemented chunked file upload v2 (#130)
There are two versions of the chunking API. Version 1 is the original version and version 2 was built as a backward compatible extension to support uploads directly to supporting target storages like S3. Version 2 is the recommended version to use. --- https://docs.nextcloud.com/server/latest/developer_manual/client_apis/WebDAV/chunking.html Signed-off-by: Alexander Piskun <[email protected]>
1 parent b59fd9e commit f9b2352

File tree

6 files changed

+77
-17
lines changed

6 files changed

+77
-17
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,17 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## [0.2.2 - 2023-09-2x]
6+
7+
### Added
8+
9+
- FilesAPI: [Chunked v2 upload](https://docs.nextcloud.com/server/latest/developer_manual/client_apis/WebDAV/chunking.html#chunked-upload-v2) support, enabled by default.
10+
- New option to disable `chunked v2 upload` if there is need for that: `CHUNKED_UPLOAD_V2`
11+
12+
### Changed
13+
14+
- Default `chunk_size` argument is now 5Mb instead of 4Mb.
15+
516
## [0.2.1 - 2023-09-14]
617

718
### Added

nc_py_api/_session.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,14 @@ class RuntimeOptions:
5555
timeout: Optional[int]
5656
timeout_dav: Optional[int]
5757
_nc_cert: Union[str, bool]
58+
upload_chunk_v2: bool
5859

5960
def __init__(self, **kwargs):
6061
self.xdebug_session = kwargs.get("xdebug_session", options.XDEBUG_SESSION)
6162
self.timeout = kwargs.get("npa_timeout", options.NPA_TIMEOUT)
6263
self.timeout_dav = kwargs.get("npa_timeout_dav", options.NPA_TIMEOUT_DAV)
6364
self._nc_cert = kwargs.get("npa_nc_cert", options.NPA_NC_CERT)
65+
self.upload_chunk_v2 = kwargs.get("chunked_upload_v2", options.CHUNKED_UPLOAD_V2)
6466

6567
@property
6668
def nc_cert(self) -> Union[str, bool]:

nc_py_api/files/files.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def download2stream(self, path: Union[str, FsNode], fp, **kwargs) -> None:
146146
:param path: path to download file.
147147
:param fp: filename (string), pathlib.Path object or a file object.
148148
The object must implement the ``file.write`` method and be able to write binary data.
149-
:param kwargs: **chunk_size** an int value specifying chunk size to write. Default = **4Mb**
149+
:param kwargs: **chunk_size** an int value specifying chunk size to write. Default = **5Mb**
150150
"""
151151
path = path.user_path if isinstance(path, FsNode) else path
152152
if isinstance(fp, (str, Path)):
@@ -179,7 +179,7 @@ def download_directory_as_zip(
179179
result_path,
180180
"wb",
181181
) as fp:
182-
for data_chunk in response.iter_raw(chunk_size=kwargs.get("chunk_size", 4 * 1024 * 1024)):
182+
for data_chunk in response.iter_raw(chunk_size=kwargs.get("chunk_size", 5 * 1024 * 1024)):
183183
fp.write(data_chunk)
184184
return Path(result_path)
185185

@@ -201,14 +201,15 @@ def upload_stream(self, path: Union[str, FsNode], fp, **kwargs) -> FsNode:
201201
:param path: file's upload path.
202202
:param fp: filename (string), pathlib.Path object or a file object.
203203
The object must implement the ``file.read`` method providing data with str or bytes type.
204-
:param kwargs: **chunk_size** an int value specifying chunk size to read. Default = **4Mb**
204+
:param kwargs: **chunk_size** an int value specifying chunk size to read. Default = **5Mb**
205205
"""
206206
path = path.user_path if isinstance(path, FsNode) else path
207+
chunk_size = kwargs.get("chunk_size", 5 * 1024 * 1024)
207208
if isinstance(fp, (str, Path)):
208209
with builtins.open(fp, "rb") as f:
209-
return self.__upload_stream(path, f, **kwargs)
210+
return self.__upload_stream(path, f, chunk_size)
210211
elif hasattr(fp, "read"):
211-
return self.__upload_stream(path, fp, **kwargs)
212+
return self.__upload_stream(path, fp, chunk_size)
212213
else:
213214
raise TypeError("`fp` must be a path to file or an object with `read` method.")
214215

@@ -688,36 +689,48 @@ def __download2stream(self, path: str, fp, **kwargs) -> None:
688689
) as response: # type: ignore
689690
self._session.response_headers = response.headers
690691
check_error(response.status_code, f"download_stream: user={self._session.user}, path={path}")
691-
for data_chunk in response.iter_raw(chunk_size=kwargs.get("chunk_size", 4 * 1024 * 1024)):
692+
for data_chunk in response.iter_raw(chunk_size=kwargs.get("chunk_size", 5 * 1024 * 1024)):
692693
fp.write(data_chunk)
693694

694-
def __upload_stream(self, path: str, fp, **kwargs) -> FsNode:
695-
_dav_path = self._dav_get_obj_path(self._session.user, random_string(64), root_path="/uploads")
696-
response = self._session.dav("MKCOL", _dav_path)
695+
def __upload_stream(self, path: str, fp, chunk_size: int) -> FsNode:
696+
_dav_path = self._dav_get_obj_path(self._session.user, "nc-py-api-" + random_string(56), root_path="/uploads")
697+
_v2 = bool(self._session.cfg.options.upload_chunk_v2 and chunk_size >= 5 * 1024 * 1024)
698+
full_path = self._dav_get_obj_path(self._session.user, path)
699+
headers = {"Destination": self._session.cfg.dav_endpoint + full_path}
700+
if _v2:
701+
response = self._session.dav("MKCOL", _dav_path, headers=headers)
702+
else:
703+
response = self._session.dav("MKCOL", _dav_path)
697704
check_error(response.status_code)
698705
try:
699-
chunk_size = kwargs.get("chunk_size", 4 * 1024 * 1024)
700-
start_bytes = end_bytes = 0
706+
start_bytes = end_bytes = chunk_number = 0
701707
while True:
702708
piece = fp.read(chunk_size)
703709
if not piece:
704710
break
705711
end_bytes = start_bytes + len(piece)
706-
_filename = str(start_bytes).rjust(15, "0") + "-" + str(end_bytes).rjust(15, "0")
707-
response = self._session.dav("PUT", _dav_path + "/" + _filename, data=piece)
712+
if _v2:
713+
response = self._session.dav(
714+
"PUT", _dav_path + "/" + str(chunk_number), data=piece, headers=headers
715+
)
716+
else:
717+
_filename = str(start_bytes).rjust(15, "0") + "-" + str(end_bytes).rjust(15, "0")
718+
response = self._session.dav("PUT", _dav_path + "/" + _filename, data=piece)
708719
check_error(
709-
response.status_code, f"upload_stream: user={self._session.user}, path={path}, cur_size={end_bytes}"
720+
response.status_code,
721+
f"upload_stream(v={_v2}): user={self._session.user}, path={path}, cur_size={end_bytes}",
710722
)
711723
start_bytes = end_bytes
712-
full_path = self._dav_get_obj_path(self._session.user, path)
713-
headers = {"Destination": self._session.cfg.dav_endpoint + full_path}
724+
chunk_number += 1
725+
714726
response = self._session.dav(
715727
"MOVE",
716728
_dav_path + "/.file",
717729
headers=headers,
718730
)
719731
check_error(
720-
response.status_code, f"upload_stream: user={self._session.user}, path={path}, total_size={end_bytes}"
732+
response.status_code,
733+
f"upload_stream(v={_v2}): user={self._session.user}, path={path}, total_size={end_bytes}",
721734
)
722735
return FsNode(full_path.strip("/"), **self.__get_etag_fileid_from_response(response))
723736
finally:

nc_py_api/options.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,10 @@
3838
NPA_NC_CERT = False
3939
elif str_val.lower() not in ("true", "1"):
4040
NPA_NC_CERT = str_val
41+
42+
CHUNKED_UPLOAD_V2 = True
43+
"""Option to enable/disable **version 2** chunked upload(better Object Storages support).
44+
45+
Additional information can be found in Nextcloud documentation:
46+
`Chunked file upload V2
47+
<https://docs.nextcloud.com/server/latest/developer_manual/client_apis/WebDAV/chunking.html#chunked-upload-v2>`_"""

tests/actual_tests/files_test.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,15 @@ def test_file_upload_file(nc_any):
201201
assert nc_any.files.download("test_dir_tmp/test_file_upload_file") == content
202202

203203

204+
def test_file_upload_chunked_v2(nc_any):
205+
with NamedTemporaryFile() as tmp_file:
206+
tmp_file.seek(7 * 1024 * 1024)
207+
tmp_file.write(b"\0")
208+
tmp_file.flush()
209+
nc_any.files.upload_stream("test_dir_tmp/test_file_upload_chunked_v2", tmp_file.name)
210+
assert len(nc_any.files.download("test_dir_tmp/test_file_upload_chunked_v2")) == 7 * 1024 * 1024 + 1
211+
212+
204213
@pytest.mark.parametrize("file_name", ("chunked_zero", "chunked_zero/", "chunked_zero//"))
205214
def test_file_upload_chunked_zero_size(nc_any, file_name):
206215
nc_any.files.delete("/test_dir_tmp/test_file_upload_del", not_fail=True)

tests/actual_tests/options_test.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import sys
33
from subprocess import PIPE, run
4+
from unittest import mock
45

56
import nc_py_api
67

@@ -51,3 +52,20 @@ def test_xdebug_session(nc_any):
5152
nc_py_api.options.XDEBUG_SESSION = "12345"
5253
new_nc = nc_py_api.Nextcloud() if isinstance(nc_any, nc_py_api.Nextcloud) else nc_py_api.NextcloudApp()
5354
assert new_nc._session.adapter.cookies["XDEBUG_SESSION"] == "12345"
55+
56+
57+
@mock.patch("nc_py_api.options.CHUNKED_UPLOAD_V2", False)
58+
def test_chunked_upload(nc_any):
59+
new_nc = nc_py_api.Nextcloud() if isinstance(nc_any, nc_py_api.Nextcloud) else nc_py_api.NextcloudApp()
60+
assert new_nc._session.cfg.options.upload_chunk_v2 is False
61+
62+
63+
def test_chunked_upload2(nc_any):
64+
new_nc = (
65+
nc_py_api.Nextcloud(chunked_upload_v2=False)
66+
if isinstance(nc_any, nc_py_api.Nextcloud)
67+
else nc_py_api.NextcloudApp(chunked_upload_v2=False)
68+
)
69+
assert new_nc._session.cfg.options.upload_chunk_v2 is False
70+
new_nc = nc_py_api.Nextcloud() if isinstance(nc_any, nc_py_api.Nextcloud) else nc_py_api.NextcloudApp()
71+
assert new_nc._session.cfg.options.upload_chunk_v2 is True

0 commit comments

Comments
 (0)