NVIDIA · fernandol-nvidia · Jan 13, 2026 · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025
@@ -144,6 +144,7 @@ disable=abstract-method,
         too-many-arguments,
         too-many-boolean-expressions,
         too-many-branches,
+        too-many-function-args,
         too-many-instance-attributes,
         too-many-locals,
         too-many-nested-blocks,

@@ -62,10 +62,6 @@ Bucket
      - String
      - The cloud storage region where the bucket is located.
      - ``us-east-1``
-   * - ``check_key``
-     - Boolean
-     - Whether to validate access keys before allowing access to the bucket.
-     - ``False``
    * - ``description``
      - String
      - Human-readable description of the bucket and its intended use.

@@ -71,6 +71,10 @@ user exit codes.
           - Mount operation failed.
         * - 2012
           - Upload operation failed.
+        * - 2013
+          - Data authorization check failed.
+        * - 2014
+          - Data access is unauthorized.
         * - 2020
           - Invalid authentication token for connecting to the service.
         * - 2021

@@ -17,15 +17,18 @@
 """
 
 import argparse
+import json
+import re
 import shutil
 import subprocess
 import sys
 from typing import IO, Iterable
 
 import shtab
 
-from src.lib.utils import client, client_configs, validation
 from src.lib.data import storage
+from src.lib.data.storage import constants
+from src.lib.utils import client, client_configs, credentials, osmo_errors, validation
 
 
 HELP_TEXT = """
@@ -179,6 +182,56 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
     )
 
 
+def _run_check_command(service_client: client.ServiceClient, args: argparse.Namespace):
+    """
+    Check the access to a backend URI
+    Args:
+        args : Parsed command line arguments.
+    """
+    # pylint: disable=unused-argument
+    is_storage_profile = bool(re.fullmatch(constants.STORAGE_PROFILE_REGEX, args.remote_uri))
+
+    storage_backend = storage.construct_storage_backend(
+        uri=args.remote_uri,
+        profile=is_storage_profile,
+        cache_config=client_configs.get_cache_config(),
+    )
+
+    data_cred = credentials.get_static_data_credential_from_config(
+        storage_backend.profile,
+        args.config_file,
+    )
+
+    try:
+        match args.access_type:
+            case storage.AccessType.READ.name:
+                storage_backend.data_auth(
+                    data_cred=data_cred,
+                    access_type=storage.AccessType.READ,
+                )
+            case storage.AccessType.WRITE.name:
+                storage_backend.data_auth(
+                    data_cred=data_cred,
+                    access_type=storage.AccessType.WRITE,
+                )
+            case storage.AccessType.DELETE.name:
+                storage_backend.data_auth(
+                    data_cred=data_cred,
+                    access_type=storage.AccessType.DELETE,
+                )
+            case _:
+                storage_backend.data_auth(
+                    data_cred=data_cred,
+                )
+
+        # Auth check passed
+        print(json.dumps({'status': 'pass'}))
+
+    except osmo_errors.OSMOCredentialError as err:
+        # Auth check failed (credentials issue)
+        print(json.dumps({'status': 'fail', 'error': str(err)}))
+
+
 def setup_parser(parser: argparse._SubParsersAction):
     """
     Dataset parser setup and run command based on parsing
@@ -285,3 +338,19 @@ def setup_parser(parser: argparse._SubParsersAction):
                                type=validation.is_regex,
                                help='Regex to filter which types of files to delete')
     delete_parser.set_defaults(func=_run_delete_command)
+
+    check_parser = subparsers.add_parser(
+        'check',
+        help='Check the access to a backend URI',
+        description='Check the access to a backend URI',
+    )
+    check_parser.add_argument('remote_uri',
+                              type=validation.is_storage_credential_path,
+                              help='URI where access will be checked to.')
+    check_parser.add_argument('--access-type', '-a',
+                              choices=list(storage.AccessType.__members__.keys()),
+                              help='Access type to check access to the backend URI.')
+    check_parser.add_argument('--config-file', '-c',
+                              type=validation.valid_path,
+                              help='Path to the config file to use for the access check.')
+    check_parser.set_defaults(func=_run_check_command)
@@ -30,14 +30,16 @@
 
 import ijson
 import shtab
-from tqdm import tqdm # type: ignore
+from tqdm import tqdm  # type: ignore
 
 from src.lib.data import (
     dataset as dataset_lib,
     storage as storage_lib,
 )
 from src.lib.utils import (
     client,
+    client_configs,
+    credentials,
     common,
     osmo_errors,
     validation,
@@ -369,8 +371,8 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
                 prompt_info = f'the latest version of Dataset {dataset.name} from bucket ' +\
                               f'{dataset.bucket}'
             confirm = common.prompt_user(f'Are you sure you want to mark {prompt_info} '
-                                          'as PENDING_DELETE? The storage objects will not be '
-                                          'deleted yet.')
+                                         'as PENDING_DELETE? The storage objects will not be '
+                                         'deleted yet.')
         if not confirm:
             return
 
@@ -390,7 +392,7 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
             f'All versions of {dataset.name} has been marked as PENDING_DELETE.'
             'Do you want to delete the storage objects and wipe the dataset?\n'
             'Note: Any concurrent uploads to this dataset may be effected.'
-            )
+        )
     elif delete_objects and args.force:
         confirm_delete_objects = True
 
@@ -413,7 +415,7 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
                 print(json.dumps(json_output, indent=common.JSON_INDENT_SIZE))
             else:
                 for version in delete_result['versions']:
-                    print(f'Dataset {dataset.name} version ' +\
+                    print(f'Dataset {dataset.name} version ' +
                           f'{version} bucket {dataset.bucket} has been marked as '
                           f'PENDING_DELETE.')
         return
@@ -524,7 +526,7 @@ def _run_collect_command(service_client: client.ServiceClient, args: argparse.Na
     if not collection.bucket:
         collection.bucket = dataset_lib.get_user_bucket(service_client)
 
-    payload = {'datasets': [common.DatasetStructure(dataset).to_dict()\
+    payload = {'datasets': [common.DatasetStructure(dataset).to_dict()
                             for dataset in args.datasets]}
     service_client.request(
         client.RequestMethod.POST,
@@ -549,9 +551,9 @@ def _run_recollect_command(service_client: client.ServiceClient, args: argparse.
 
     remove_datasets = []
     if args.remove:
-        remove_datasets = [common.DatasetStructure(dataset).to_dict()\
+        remove_datasets = [common.DatasetStructure(dataset).to_dict()
                            for dataset in args.remove]
-    payload = {'add_datasets': [common.DatasetStructure(dataset).to_dict()\
+    payload = {'add_datasets': [common.DatasetStructure(dataset).to_dict()
                                 for dataset in args.add],
                'remove_datasets': remove_datasets}
     result = service_client.request(
@@ -599,7 +601,7 @@ def _get_metadata_from_file(file_path: str) -> Dict | List:
     if isinstance(content, list):
         if not all(isinstance(x, (int, float)) for x in content) and \
            not all(isinstance(x, str) for x in content):
-            raise osmo_errors.OSMOError('All elements in an array should be of same type: str or'\
+            raise osmo_errors.OSMOError('All elements in an array should be of same type: str or'
                                         ' numeric.')
     elif isinstance(content, bool):
         content = [str(content)]
@@ -743,7 +745,7 @@ def _run_rename_command(service_client: client.ServiceClient, args: argparse.Nam
         client.RequestMethod.POST,
         f'api/bucket/{old_dataset.bucket}/dataset/{old_dataset.name}/attribute',
         params=params)
-    print(f'{old_dataset.name} has been renamed to {new_dataset.name} in bucket ' +\
+    print(f'{old_dataset.name} has been renamed to {new_dataset.name} in bucket ' +
           f'{old_dataset.bucket}')
 
 
@@ -820,10 +822,10 @@ def _run_checksum_command(service_client: client.ServiceClient, args: argparse.N
                 for file in objects:
                     # Add Relative Path + checksum path_checksums
                     path_checksums.append(file[len(path.rsplit('/', 1)[0]) + 1:] +
-                                            ' ' + common.etag_checksum(file))
+                                          ' ' + common.etag_checksum(file))
                     file_size_uploaded = file_information.get(file, 0)
                     t.set_postfix(file_name=file.split('/')[-1],
-                                    file_size=f'{file_size_uploaded} B', refresh=True)
+                                  file_size=f'{file_size_uploaded} B', refresh=True)
                     t.update(file_size_uploaded)
 
     path_checksums.sort()
@@ -954,6 +956,62 @@ def _run_migrate_command(service_client: client.ServiceClient, args: argparse.Na
     dataset_manager.migrate()
 
 
+def _run_check_command(service_client: client.ServiceClient, args: argparse.Namespace):
+    """
+    Check the access to a dataset for various operations
+    Args:
+        args: Parsed command line arguments.
+    """
+    dataset = common.DatasetStructure(args.name)
+
+    if not dataset.bucket:
+        dataset.bucket = dataset_lib.get_user_bucket(service_client)
+
+    try:
+        location_result = service_client.request(
+            client.RequestMethod.GET,
+            dataset_lib.common.construct_location_api_path(dataset),
+        )
+
+        storage_backend = storage_lib.construct_storage_backend(
+            location_result['path'],
+            cache_config=client_configs.get_cache_config(),
+        )
+
+        data_cred = credentials.get_static_data_credential_from_config(
+            storage_backend.profile,
+            args.config_file,
+        )
+
+        match args.access_type:
+            case storage_lib.AccessType.WRITE.name:
+                storage_backend.data_auth(
+                    data_cred=data_cred,
+                    access_type=storage_lib.AccessType.WRITE,
+                )
+            case storage_lib.AccessType.DELETE.name:
+                storage_backend.data_auth(
+                    data_cred=data_cred,
+                    access_type=storage_lib.AccessType.DELETE,
+                )
+            case storage_lib.AccessType.READ.name:
+                storage_backend.data_auth(
+                    data_cred=data_cred,
+                    access_type=storage_lib.AccessType.READ,
+                )
+            case _:
+                storage_backend.data_auth(
+                    data_cred=data_cred,
+                )
+
+        # Auth check passed
+        print(json.dumps({'status': 'pass'}))
+
+    except osmo_errors.OSMOCredentialError as err:
+        # Auth check failed (credentials issue)
+        print(json.dumps({'status': 'fail', 'error': str(err)}))
+
+
 def setup_parser(parser: argparse._SubParsersAction):
     """
     Dataset parser setup and run command based on parsing
@@ -1010,12 +1068,12 @@ def setup_parser(parser: argparse._SubParsersAction):
     upload_parser.add_argument('--metadata', '-m',
                                nargs='+',
                                default=[],
-                               help='Yaml files of metadata to '\
+                               help='Yaml files of metadata to '
                                     'assign to dataset version').complete = shtab.FILE
     upload_parser.add_argument('--labels', '-l',
                                nargs='+',
                                default=[],
-                               help='Yaml files of labels to '\
+                               help='Yaml files of labels to '
                                     'assign to dataset').complete = shtab.FILE
     upload_parser.add_argument('--regex', '-x',
                                type=validation.is_regex,
@@ -1075,7 +1133,7 @@ def setup_parser(parser: argparse._SubParsersAction):
                                       '[bucket/]DS[:tag/version].')
     download_parser.add_argument('path', type=validation.valid_path,
                                  help='Location where the dataset is downloaded to.').complete = \
-                                    shtab.FILE
+        shtab.FILE
     download_parser.add_argument('--regex', '-x',
                                  type=validation.is_regex,
                                  help='Regex to filter which types of files to download')
@@ -1242,15 +1300,15 @@ def setup_parser(parser: argparse._SubParsersAction):
                               dest='set',
                               nargs='+',
                               default=[],
-                              help='Set label for dataset in the form '\
-                                   '"<key>:<type>:<value>" where type is '\
-                                   'string or numeric'\
+                              help='Set label for dataset in the form '
+                                   '"<key>:<type>:<value>" where type is '
+                                   'string or numeric'
                                    'or the file-path').complete = shtab.FILE
     label_parser.add_argument('--delete', '-d',
                               dest='delete',
                               nargs='+',
                               default=[],
-                              help='Delete labels from dataset in the form "<key>"'\
+                              help='Delete labels from dataset in the form "<key>"'
                                    'or the file-path').complete = shtab.FILE
     label_parser.add_argument('--format-type', '-t',
                               dest='format_type',
@@ -1276,7 +1334,7 @@ def setup_parser(parser: argparse._SubParsersAction):
                                  default=[],
                                  help='Set metadata from dataset in the form '
                                       '"<key>:<type>:<value>" where type is '
-                                      'string or numeric'\
+                                      'string or numeric'
                                       'or the file-path').complete = shtab.FILE
     metadata_parser.add_argument('--delete', '-d',
                                  dest='delete',
@@ -1332,9 +1390,9 @@ def setup_parser(parser: argparse._SubParsersAction):
 
     # Handle 'inspect' command
     inspect_parser = subparsers.add_parser('inspect',
-                                        help='Display Dataset Directory',
-                                        epilog='Ex. osmo dataset inspect DS1:latest ' +
-                                               '--format-type json')
+                                           help='Display Dataset Directory',
+                                           epilog='Ex. osmo dataset inspect DS1:latest ' +
+                                           '--format-type json')
     inspect_parser.add_argument('name',
                                 help='Dataset name. Specify bucket and ' +
                                      'tag/version with [bucket/]DS[:tag/version].')
@@ -1381,3 +1439,20 @@ def setup_parser(parser: argparse._SubParsersAction):
     migrate_parser.add_argument('--benchmark-out', '-b',
                                 help='Path to folder where benchmark data will be written to.')
     migrate_parser.set_defaults(func=_run_migrate_command)
+
+    # Handle 'check' command (add after migrate_parser in setup_parser function)
+    check_parser = subparsers.add_parser(
+        'check',
+        help='Check access permissions for dataset operations',
+        description='Check access permissions for dataset operations',
+    )
+    check_parser.add_argument('name',
+                              help='Dataset name. Specify bucket and tag/version with ' +
+                              '[bucket/]DS[:tag/version].')
+    check_parser.add_argument('--access-type', '-a',
+                              choices=list(storage_lib.AccessType.__members__.keys()),
+                              help='Access type to check access to the dataset.')
+    check_parser.add_argument('--config-file', '-c',
+                              type=validation.valid_path,
+                              help='Path to the config file to use for the access check.')
+    check_parser.set_defaults(func=_run_check_command)