reef-technologies · agoncharov-reef · Aug 4, 2022 · Aug 4, 2022 · ppolewicz · Aug 7, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * Add `replication-pause` command
 * Add `replication-unpause` command
 * Add `--include-existing-files` to `replication-setup`
+* Add `replication-inspect` command
 
 ### Fixed
 * Fix `replication-setup` default priority setter

diff --git a/b2/console_tool.py b/b2/console_tool.py
@@ -74,6 +74,8 @@
     parse_sync_folder,
     ReplicationMonitor,
     ProgressReport,
+    TwoWayReplicationCheckGenerator,
+    CheckState,
 )
 from b2sdk.v2.exception import (
     B2Error,
@@ -190,6 +192,25 @@ def apply_or_none(fcn, value):
         return fcn(value)
 
 
+def to_human_readable(value: Any) -> str:
+    if isinstance(value, Enum):
+        return value.name
+
+    if isinstance(value, bool):
+        return 'Yes' if value else 'No'
+
+    if isinstance(value, Bucket):
+        return value.name
+
+    if isinstance(value, ApplicationKey):
+        return value.id_
+
+    if value is None:
+        return ''
+
+    return str(value)
+
+
 class DescriptionGetter:
     def __init__(self, described_cls):
         self.described_cls = described_cls
@@ -2568,11 +2589,94 @@ def alter_one_rule(cls, rule: ReplicationRule) -> Optional[ReplicationRule]:
         return rule
 
 
+@B2.register_subcommand
+class ReplicationInspect(Command):
+    """
+    Detect possible misconfigurations of replication by analyzing
+    replication rules, buckets and keys.
+
+    --output-format
+    "Console" output format is meant to be human-readable and is subject to change
+    in any further release. One should use "json" for reliable "no-breaking-changes"
+    output format.
+    """
+
+    @classmethod
+    def _setup_parser(cls, parser):
+        super()._setup_parser(parser)
+        parser.add_argument('--source-profile', metavar='SOURCE_PROFILE')
+        parser.add_argument('--destination-profile', metavar='DESTINATION_PROFILE')
+        parser.add_argument('--source-bucket', metavar='SOURCE_BUCKET_NAME')
+        parser.add_argument('--destination-bucket', metavar='DESTINATION_BUCKET_NAME')
+        parser.add_argument('--rule', metavar='REPLICATION_RULE_NAME')
+        parser.add_argument('--file-name-prefix', metavar='FILE_NAME_PREFIX')
+        parser.add_argument('--show-all-checks', action='store_true')
+
+        parser.add_argument('--output-format', default='console', choices=('console', 'json'))
+
+    def run(self, args):
+        source_api = _get_b2api_for_profile(args.source_profile)
+        destination_api = _get_b2api_for_profile(args.destination_profile or args.source_profile)
+
+        troubleshooter = TwoWayReplicationCheckGenerator(
+            source_api=source_api,
+            destination_api=destination_api,
+            filter_source_bucket_name=args.source_bucket,
+            filter_destination_bucket_name=args.destination_bucket,
+            filter_replication_rule_name=args.rule,
+            file_name_prefix=args.file_name_prefix,
+        )
+
+        results = [check.as_dict() for check in troubleshooter.iter_checks()]
+
+        if args.output_format == 'json':
+            self._print_json(
+                [
+                    {key: to_human_readable(value)
+                     for key, value in result.items()} for result in results
+                ]
+            )
+        elif args.output_format == 'console':
+            self._print_console(results, show_all_checks=args.show_all_checks)
+        else:
+            self._print_stderr(f'ERROR: format "{args.output_format}" is not supported')
+            return 1
+
+        return 0
+
+    def _print_console(self, results: List[dict], show_all_checks: bool = False) -> None:
+        for result in results:
+
+            # print keys starting with `_` as text before table
+            self._print('Configuration:')
+            for key, value in result.items():
+                if key.startswith('_'):
+                    self._print(
+                        ' ' * 2 + key[1:].replace('_', ' ') + ': ' + to_human_readable(value)
+                    )
+
+            # print other keys as rows rows
+            rows = {
+                key.replace('_', ' '): to_human_readable(value)
+                for key, value in result.items()
+                if not key.startswith('_') and (value != CheckState.OK or show_all_checks)
+            }.items()
+            self._print('Checks:')
+
+            key = None
+            for key, value in rows:
+                self._print(' ' * 2 + key + ': ' + value)
+            if not key:  # loop was not entered
-            if not key:  # loop was not entered
+            if key is None:  # loop was not entered
-            if not key:  # loop was not entered
+            if key is None:  # loop was not entered
+                self._print(' ' * 2 + '-')
+
+            self._print('')
+
+
 @B2.register_subcommand
 class ReplicationStatus(Command):
     """
-    Inspects files in only source or both source and destination buckets
-    (potentially from different accounts) and provides detailed replication statistics.
+    Inspect files in only source or both source and destination buckets
+    (potentially from different accounts) and provide detailed replication statistics.
 
     Please be aware that only latest file versions are inspected, so any previous
     file versions are not represented in these statistics.
@@ -2649,13 +2753,14 @@ def run(self, args):
             }
 
         if args.output_format == 'json':
-            self.output_json(results)
+            self._print_json(results)
         elif args.output_format == 'console':
-            self.output_console(results)
+            self._print_console(results)
         elif args.output_format == 'csv':
             self.output_csv(results)
         else:
             self._print_stderr(f'ERROR: format "{args.output_format}" is not supported')
+            return 1
 
         return 0
 
@@ -2683,29 +2788,13 @@ def get_results_for_rule(
     def filter_results_columns(cls, results: List[dict], columns: List[str]) -> List[dict]:
         return [{key: result[key] for key in columns} for result in results]
 
-    @classmethod
-    def to_human_readable(cls, value: Any) -> str:
-        if isinstance(value, Enum):
-            return value.name
-
-        if isinstance(value, bool):
-            return 'Yes' if value else 'No'
-
-        if value is None:
-            return ''
-
-        return str(value)
-
-    def output_json(self, results: Dict[str, List[dict]]) -> None:
-        self._print_json(results)
-
-    def output_console(self, results: Dict[str, List[dict]]) -> None:
+    def _print_console(self, results: Dict[str, List[dict]]) -> None:
         for rule_name, rule_results in results.items():
             self._print(f'Replication "{rule_name}":')
             rule_results = [
                 {
                     key.replace('_', '\n'):  # split key to minimize column size
-                    self.to_human_readable(value)
+                    to_human_readable(value)
                     for key, value in result.items()
                 } for result in rule_results
             ]
@@ -2721,7 +2810,7 @@ def output_csv(self, results: Dict[str, List[dict]]) -> None:
                     'rule name': rule_name,
                     **{
                         key.replace('_', '\n'):  # split key to minimize column size
-                        self.to_human_readable(value)
+                        to_human_readable(value)
                         for key, value in result.items()
                     },
                 } for result in rule_results

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 arrow>=1.0.2,<2.0.0
-b2sdk==1.17.3
+b2sdk==1.17.4
 docutils==0.19
 idna>=2.2.0; platform_system == 'Java'
 importlib-metadata>=3.3.0; python_version < '3.8'

diff --git a/test/integration/test_b2_command_line.py b/test/integration/test_b2_command_line.py
@@ -2343,6 +2343,136 @@ def test_replication_monitoring(b2_tool, bucket_name, b2_api):
     b2_api.clean_bucket(source_bucket_name)
 
 
+def test_replication_troubleshooting(b2_tool, bucket_name, b2_api):
+    key_one_name = 'clt-testKey-01' + random_hex(6)
+    created_key_stdout = b2_tool.should_succeed(
+        [
+            'create-key',
+            key_one_name,
+            'listBuckets,readFiles',
+        ]
+    )
+    key_one_id, _ = created_key_stdout.split()
+
+    key_two_name = 'clt-testKey-02' + random_hex(6)
+    created_key_stdout = b2_tool.should_succeed(
+        [
+            'create-key',
+            key_two_name,
+            'listBuckets,writeFiles',
+        ]
+    )
+    key_two_id, _ = created_key_stdout.split()
+
+    # ---------------- add test data ----------------
+    destination_bucket_name = bucket_name
+    _ = b2_tool.should_succeed_json(
+        ['upload-file', '--noProgress', '--quiet', destination_bucket_name, 'README.md', 'one/a']
+    )
+
+    # ---------------- set up replication destination ----------------
+
+    # update destination bucket info
+    destination_replication_configuration = {
+        'asReplicationSource': None,
+        'asReplicationDestination': {
+            'sourceToDestinationKeyMapping': {
+                key_one_id: key_two_id,
+            },
+        },
+    }
+    destination_replication_configuration_json = json.dumps(destination_replication_configuration)
+    destination_bucket = b2_tool.should_succeed_json(
+        [
+            'update-bucket',
+            destination_bucket_name,
+            'allPublic',
+            '--replication',
+            destination_replication_configuration_json,
+        ]
+    )
+
+    # ---------------- set up replication source ----------------
+    source_replication_configuration = {
+        "asReplicationSource":
+            {
+                "replicationRules":
+                    [
+                        {
+                            "destinationBucketId": destination_bucket['bucketId'],
+                            "fileNamePrefix": "one/",
+                            "includeExistingFiles": False,
+                            "isEnabled": True,
+                            "priority": 1,
+                            "replicationRuleName": "replication-one"
+                        }, {
+                            "destinationBucketId": destination_bucket['bucketId'],
+                            "fileNamePrefix": "two/",
+                            "includeExistingFiles": False,
+                            "isEnabled": True,
+                            "priority": 2,
+                            "replicationRuleName": "replication-two"
+                        }
+                    ],
+                "sourceApplicationKeyId": key_one_id,
+            },
+    }
+    source_replication_configuration_json = json.dumps(source_replication_configuration)
+
+    # create a source bucket and set up replication to destination bucket
+    source_bucket_name = b2_tool.generate_bucket_name()
+    b2_tool.should_succeed(
+        [
+            'create-bucket',
+            source_bucket_name,
+            'allPublic',
+            '--fileLockEnabled',
+            '--replication',
+            source_replication_configuration_json,
+            *get_bucketinfo(),
+        ]
+    )
+
+    # make test data
+    _ = b2_tool.should_succeed_json(
+        ['upload-file', '--noProgress', '--quiet', source_bucket_name, 'CHANGELOG.md', 'one/a']
+    )
+
+    # run troubleshooter
+    troubleshooter_results_json = b2_tool.should_succeed_json(
+        [
+            'replication-inspect',
+            '--source-bucket',
+            source_bucket_name,
+            '--rule',
+            'replication-two',
+            '--output-format',
+            'json',
+        ]
+    )
+
+    assert troubleshooter_results_json == [
+        {
+            "_destination_application_key": key_two_id,
+            "_destination_bucket": destination_bucket_name,
+            "_source_application_key": key_one_id,
+            "_source_bucket": source_bucket_name,
+            "_source_rule_name": "replication-two",
+            "destination_key_bucket_match": "OK",
+            "destination_key_capabilities": "OK",
+            "destination_key_exists": "OK",
+            "destination_key_name_prefix_match": "OK",
+            "file_lock_match": "NOT_OK",
+            "source_is_enabled": "OK",
+            "source_key_accepted_in_target_bucket": "OK",
+            "source_key_bucket_match": "OK",
+            "source_key_capabilities": "OK",
+            "source_key_exists": "OK",
+            "source_key_name_prefix_match": "OK"
+        }
+    ]
+
+
 def _assert_file_lock_configuration(
     b2_tool,
     file_id,