Skip to content

Commit d505433

Browse files
authored
Merge pull request #333 from reef-technologies/replication-monitoring
Replication monitoring
2 parents 6bf258d + a66bf29 commit d505433

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1693
-858
lines changed

CHANGELOG.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
66

77
## [Unreleased]
88

9+
As in version 1.16.0, the replication API may still be unstable in this release,
10+
though no known major changes to it are planned at this point. Early adopters may find it
11+
feasible to rely this implementation already.
12+
913
### Added
1014
* Add included_sources module, for keeping track of included modified third-party libraries
1115
* Add `include_existing_files` parameter to `ReplicationSetupHelper`
@@ -16,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1620
* Change the per part retry limit from 5 to 20 for data transfer operations. Please note that the retry system is not considered to be a part of the public interface and is subject to be adjusted
1721
* Do not wait more than 64 seconds between retry attempts (unless server asks for it)
1822
* On longer failures wait an additional (random, up to 1s) amount of time to prevent client synchronization
23+
* Flatten `ReplicationConfiguration` interface
24+
* Reorder actions of `ReplicationSetupHelper` to avoid zombie rules
1925

2026
### Fixed
2127
* Fix: downloading compressed files and decompressing them on the fly now does not cause a TruncatedOutput error
@@ -26,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2632
### Infrastructure
2733
* Add 3.11.0-beta.1 to CI
2834
* Change Sphinx major version from 5 to 6
35+
* Extract folder/bucket scanning into a new `scan` module
2936
* Enable pip cache in CI
3037

3138
## [1.16.0] - 2022-04-27
@@ -109,7 +116,7 @@ Expect substantial amount of work on sdk interface:
109116

110117
### Changed
111118
* The `importlib-metadata` requirement is less strictly bound now (just >=3.3.0 for python > 3.5).
112-
* `B2Api` `update_file_legal_hold` and `update_file_retention_setting` now return the set values
119+
* `B2Api` `update_file_legal_hold` and `update_file_retention_setting` now return the set values
113120

114121
### Added
115122
* `BucketIdNotFound` thrown based on B2 cloud response
@@ -125,7 +132,7 @@ Expect substantial amount of work on sdk interface:
125132
## [1.11.0] - 2021-06-24
126133

127134
### Changed
128-
* apiver `v2` interface released. `from b2sdk.v2 import ...` is now the recommended import,
135+
* apiver `v2` interface released. `from b2sdk.v2 import ...` is now the recommended import,
129136
but `from b2sdk.v1 import ...` works as before
130137

131138
## [1.10.0] - 2021-06-23
@@ -151,7 +158,7 @@ Expect substantial amount of work on sdk interface:
151158
* Old buckets (from past tests) are cleaned up before running integration tests in a single thread
152159

153160
### Removed
154-
* Remove deprecated `SyncReport` methods
161+
* Remove deprecated `SyncReport` methods
155162

156163
## [1.9.0] - 2021-06-07
157164

@@ -166,7 +173,7 @@ Expect substantial amount of work on sdk interface:
166173
* `B2Api` unittests for v0, v1 and v2 are now common
167174
* `B2Api.cancel_large_file` returns a `FileIdAndName` object instead of a `FileVersion` object in v2
168175
* `FileVersion` has a mandatory `api` parameter in v2
169-
* `B2Folder` holds a handle to B2Api
176+
* `B2Folder` holds a handle to B2Api
170177
* `Bucket` unit tests for v1 and v2 are now common
171178

172179
### Fixed
@@ -195,7 +202,7 @@ Expect substantial amount of work on sdk interface:
195202
* Encryption settings, types and providers are now part of the public API
196203

197204
### Removed
198-
* Remove `Bucket.copy_file` and `Bucket.start_large_file`
205+
* Remove `Bucket.copy_file` and `Bucket.start_large_file`
199206
* Remove `FileVersionInfo.format_ls_entry` and `FileVersionInfo.format_folder_ls_entry`
200207

201208
## [1.7.0] - 2021-04-22

b2sdk/_v3/__init__.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -164,14 +164,7 @@
164164
from b2sdk.sync.action import B2HideAction
165165
from b2sdk.sync.action import B2UploadAction
166166
from b2sdk.sync.action import LocalDeleteAction
167-
from b2sdk.sync.exception import EnvironmentEncodingError
168167
from b2sdk.sync.exception import IncompleteSync
169-
from b2sdk.sync.exception import InvalidArgument
170-
from b2sdk.sync.folder import AbstractFolder
171-
from b2sdk.sync.folder import B2Folder
172-
from b2sdk.sync.folder import LocalFolder
173-
from b2sdk.sync.folder_parser import parse_sync_folder
174-
from b2sdk.sync.path import AbstractSyncPath, B2SyncPath, LocalSyncPath
175168
from b2sdk.sync.policy import AbstractFileSyncPolicy
176169
from b2sdk.sync.policy import CompareVersionMode
177170
from b2sdk.sync.policy import NewerFileSyncMode
@@ -189,27 +182,43 @@
189182
from b2sdk.sync.policy_manager import POLICY_MANAGER
190183
from b2sdk.sync.report import SyncFileReporter
191184
from b2sdk.sync.report import SyncReport
192-
from b2sdk.sync.scan_policies import DEFAULT_SCAN_MANAGER
193-
from b2sdk.sync.scan_policies import IntegerRange
194-
from b2sdk.sync.scan_policies import RegexSet
195-
from b2sdk.sync.scan_policies import ScanPoliciesManager
196-
from b2sdk.sync.scan_policies import convert_dir_regex_to_dir_prefix_regex
197185
from b2sdk.sync.sync import KeepOrDeleteMode
198186
from b2sdk.sync.sync import Synchronizer
199-
from b2sdk.sync.sync import zip_folders
200187
from b2sdk.sync.encryption_provider import AbstractSyncEncryptionSettingsProvider
201188
from b2sdk.sync.encryption_provider import BasicSyncEncryptionSettingsProvider
202189
from b2sdk.sync.encryption_provider import ServerDefaultSyncEncryptionSettingsProvider
203190
from b2sdk.sync.encryption_provider import SERVER_DEFAULT_SYNC_ENCRYPTION_SETTINGS_PROVIDER
204191

192+
# scan
193+
194+
from b2sdk.scan.exception import EnvironmentEncodingError
195+
from b2sdk.scan.exception import InvalidArgument
196+
from b2sdk.scan.folder import AbstractFolder
197+
from b2sdk.scan.folder import B2Folder
198+
from b2sdk.scan.folder import LocalFolder
199+
from b2sdk.scan.folder_parser import parse_folder
200+
from b2sdk.scan.path import AbstractPath, B2Path, LocalPath
201+
from b2sdk.scan.policies import convert_dir_regex_to_dir_prefix_regex
202+
from b2sdk.scan.policies import DEFAULT_SCAN_MANAGER
203+
from b2sdk.scan.policies import IntegerRange
204+
from b2sdk.scan.policies import RegexSet
205+
from b2sdk.scan.policies import ScanPoliciesManager
206+
from b2sdk.scan.report import ProgressReport
207+
from b2sdk.scan.scan import zip_folders
208+
from b2sdk.scan.scan import AbstractScanResult
209+
from b2sdk.scan.scan import AbstractScanReport
210+
from b2sdk.scan.scan import CountAndSampleScanReport
211+
205212
# replication
206213

207214
from b2sdk.replication.setting import ReplicationConfigurationFactory
208215
from b2sdk.replication.setting import ReplicationConfiguration
209-
from b2sdk.replication.setting import ReplicationSourceConfiguration
210216
from b2sdk.replication.setting import ReplicationRule
211-
from b2sdk.replication.setting import ReplicationDestinationConfiguration
217+
from b2sdk.replication.types import ReplicationStatus
212218
from b2sdk.replication.setup import ReplicationSetupHelper
219+
from b2sdk.replication.monitoring import ReplicationScanResult
220+
from b2sdk.replication.monitoring import ReplicationReport
221+
from b2sdk.replication.monitoring import ReplicationMonitor
213222

214223
# other
215224

b2sdk/_v3/exception.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,14 @@
7171
from b2sdk.exception import SSECKeyError
7272
from b2sdk.exception import WrongEncryptionModeForBucketDefault
7373
from b2sdk.exception import interpret_b2_error
74-
from b2sdk.sync.exception import EmptyDirectory
75-
from b2sdk.sync.exception import EnvironmentEncodingError
7674
from b2sdk.sync.exception import IncompleteSync
77-
from b2sdk.sync.exception import InvalidArgument
78-
from b2sdk.sync.exception import NotADirectory
79-
from b2sdk.sync.exception import UnableToCreateDirectory
80-
from b2sdk.sync.exception import UnSyncableFilename
81-
from b2sdk.sync.exception import check_invalid_argument
75+
from b2sdk.scan.exception import UnableToCreateDirectory
76+
from b2sdk.scan.exception import EmptyDirectory
77+
from b2sdk.scan.exception import EnvironmentEncodingError
78+
from b2sdk.scan.exception import InvalidArgument
79+
from b2sdk.scan.exception import NotADirectory
80+
from b2sdk.scan.exception import UnsupportedFilename
81+
from b2sdk.scan.exception import check_invalid_argument
8282

8383
__all__ = (
8484
'AccessDenied',
@@ -144,7 +144,7 @@
144144
'UnknownHost',
145145
'UnrecognizedBucketType',
146146
'UnableToCreateDirectory',
147-
'UnSyncableFilename',
147+
'UnsupportedFilename',
148148
'UnsatisfiableRange',
149149
'UnusableFileName',
150150
'interpret_b2_error',

b2sdk/file_version.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ class FileVersion(BaseFileVersion):
199199
'action',
200200
]
201201

202+
# defined at https://www.backblaze.com/b2/docs/files.html#httpHeaderSizeLimit
203+
DEFAULT_HEADERS_LIMIT = 7000
204+
ADVANCED_HEADERS_LIMIT = 2048
205+
202206
def __init__(
203207
self,
204208
api: 'B2Api',
@@ -282,6 +286,41 @@ def download(
282286
encryption=encryption,
283287
)
284288

289+
def _get_upload_headers(self) -> bytes:
290+
"""
291+
Return encoded http headers, as when sending an upload request to b2 http api.
292+
WARNING: the headers do not contain newlines between headers and spaces between
293+
key and value. This implementation is in par with ADVANCED_HEADERS_LIMIT
294+
and is reasonable only for `has_large_header` method
295+
"""
296+
headers = self.api.raw_api.get_upload_file_headers(
297+
upload_auth_token=self.api.account_info.get_account_auth_token(),
298+
file_name=self.file_name,
299+
content_length=self.size,
300+
content_type=self.content_type,
301+
content_sha1=self.content_sha1,
302+
file_infos=self.file_info,
303+
server_side_encryption=self.server_side_encryption,
304+
file_retention=self.file_retention,
305+
legal_hold=self.legal_hold,
306+
)
307+
308+
headers_str = ''.join(
309+
f'{key}{value}' for key, value in headers.items() if value is not None
310+
)
311+
return headers_str.encode('utf8')
312+
313+
@property
314+
def has_large_header(self) -> bool:
315+
"""
316+
Determine whether FileVersion's info fits header size limit defined by B2.
317+
This function makes sense only for "advanced" buckets, i.e. those which
318+
have Server-Side Encryption or File Lock enabled.
319+
320+
See https://www.backblaze.com/b2/docs/files.html#httpHeaderSizeLimit.
321+
"""
322+
return len(self._get_upload_headers()) > self.ADVANCED_HEADERS_LIMIT
323+
285324

286325
class DownloadVersion(BaseFileVersion):
287326
"""

b2sdk/raw_api.py

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,42 @@ def update_file_retention(
303303
):
304304
pass
305305

306+
@classmethod
307+
def get_upload_file_headers(
308+
cls,
309+
upload_auth_token: str,
310+
file_name: str,
311+
content_length: int,
312+
content_type: str,
313+
content_sha1: str,
314+
file_infos: dict,
315+
server_side_encryption: Optional[EncryptionSetting],
316+
file_retention: Optional[FileRetentionSetting],
317+
legal_hold: Optional[LegalHold],
318+
) -> dict:
319+
headers = {
320+
'Authorization': upload_auth_token,
321+
'Content-Length': str(content_length),
322+
'X-Bz-File-Name': b2_url_encode(file_name),
323+
'Content-Type': content_type,
324+
'X-Bz-Content-Sha1': content_sha1,
325+
}
326+
for k, v in file_infos.items():
327+
headers[FILE_INFO_HEADER_PREFIX + k] = b2_url_encode(v)
328+
if server_side_encryption is not None:
329+
assert server_side_encryption.mode in (
330+
EncryptionMode.NONE, EncryptionMode.SSE_B2, EncryptionMode.SSE_C
331+
)
332+
server_side_encryption.add_to_upload_headers(headers)
333+
334+
if legal_hold is not None:
335+
legal_hold.add_to_upload_headers(headers)
336+
337+
if file_retention is not None:
338+
file_retention.add_to_to_upload_headers(headers)
339+
340+
return headers
341+
306342
@abstractmethod
307343
def upload_file(
308344
self,
@@ -857,27 +893,17 @@ def upload_file(
857893
"""
858894
# Raise UnusableFileName if the file_name doesn't meet the rules.
859895
self.check_b2_filename(file_name)
860-
headers = {
861-
'Authorization': upload_auth_token,
862-
'Content-Length': str(content_length),
863-
'X-Bz-File-Name': b2_url_encode(file_name),
864-
'Content-Type': content_type,
865-
'X-Bz-Content-Sha1': content_sha1,
866-
}
867-
for k, v in file_infos.items():
868-
headers[FILE_INFO_HEADER_PREFIX + k] = b2_url_encode(v)
869-
if server_side_encryption is not None:
870-
assert server_side_encryption.mode in (
871-
EncryptionMode.NONE, EncryptionMode.SSE_B2, EncryptionMode.SSE_C
872-
)
873-
server_side_encryption.add_to_upload_headers(headers)
874-
875-
if legal_hold is not None:
876-
legal_hold.add_to_upload_headers(headers)
877-
878-
if file_retention is not None:
879-
file_retention.add_to_to_upload_headers(headers)
880-
896+
headers = self.get_upload_file_headers(
897+
upload_auth_token=upload_auth_token,
898+
file_name=file_name,
899+
content_length=content_length,
900+
content_type=content_type,
901+
content_sha1=content_sha1,
902+
file_infos=file_infos,
903+
server_side_encryption=server_side_encryption,
904+
file_retention=file_retention,
905+
legal_hold=legal_hold,
906+
)
881907
return self.b2_http.post_content_return_json(upload_url, headers, data_stream)
882908

883909
def upload_part(

0 commit comments

Comments
 (0)