|
1 | 1 | import configparser |
2 | 2 | import copy |
| 3 | +import datetime |
3 | 4 | import logging |
4 | 5 | import os |
5 | 6 | import pathlib |
@@ -97,6 +98,45 @@ class Config: |
97 | 98 | files_api_client_download_max_total_recovers = None |
98 | 99 | files_api_client_download_max_total_recovers_without_progressing = 1 |
99 | 100 |
|
| 101 | + # File multipart upload parameters |
| 102 | + # ---------------------- |
| 103 | + |
| 104 | + # Minimal input stream size (bytes) to use multipart / resumable uploads. |
| 105 | + # For small files it's more efficient to make one single-shot upload request. |
| 106 | + # When uploading a file, SDK will initially buffer this many bytes from input stream. |
| 107 | + # This parameter can be less or bigger than multipart_upload_chunk_size. |
| 108 | + multipart_upload_min_stream_size: int = 5 * 1024 * 1024 |
| 109 | + |
| 110 | + # Maximum number of presigned URLs that can be requested at a time. |
| 111 | + # |
| 112 | + # The more URLs we request at once, the higher chance is that some of the URLs will expire |
| 113 | + # before we get to use it. We discover the presigned URL is expired *after* sending the |
| 114 | + # input stream partition to the server. So to retry the upload of this partition we must rewind |
| 115 | + # the stream back. In case of a non-seekable stream we cannot rewind, so we'll abort |
| 116 | + # the upload. To reduce the chance of this, we're requesting presigned URLs one by one |
| 117 | + # and using them immediately. |
| 118 | + multipart_upload_batch_url_count: int = 1 |
| 119 | + |
| 120 | + # Size of the chunk to use for multipart uploads. |
| 121 | + # |
| 122 | + # The smaller chunk is, the less chance for network errors (or URL get expired), |
| 123 | + # but the more requests we'll make. |
| 124 | + # For AWS, minimum is 5Mb: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html |
| 125 | + # For GCP, minimum is 256 KiB (and also recommended multiple is 256 KiB) |
| 126 | + # boto uses 8Mb: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig |
| 127 | + multipart_upload_chunk_size: int = 10 * 1024 * 1024 |
| 128 | + |
| 129 | + # use maximum duration of 1 hour |
| 130 | + multipart_upload_url_expiration_duration: datetime.timedelta = datetime.timedelta(hours=1) |
| 131 | + |
| 132 | + # This is not a "wall time" cutoff for the whole upload request, |
| 133 | + # but a maximum time between consecutive data reception events (even 1 byte) from the server |
| 134 | + multipart_upload_single_chunk_upload_timeout_seconds: float = 60 |
| 135 | + |
| 136 | + # Limit of retries during multipart upload. |
| 137 | + # Retry counter is reset when progressing along the stream. |
| 138 | + multipart_upload_max_retries = 3 |
| 139 | + |
100 | 140 | def __init__( |
101 | 141 | self, |
102 | 142 | *, |
|
0 commit comments