Skip to content
Merged
1 change: 1 addition & 0 deletions bzl/linting/pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ disable=abstract-method,
too-many-arguments,
too-many-boolean-expressions,
too-many-branches,
too-many-function-args,
too-many-instance-attributes,
too-many-locals,
too-many-nested-blocks,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ Bucket
- String
- The cloud storage region where the bucket is located.
- ``us-east-1``
* - ``check_key``
- Boolean
- Whether to validate access keys before allowing access to the bucket.
- ``False``
* - ``description``
- String
- Human-readable description of the bucket and its intended use.
Expand Down
4 changes: 4 additions & 0 deletions docs/user_guide/workflows/exit_codes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ user exit codes.
- Mount operation failed.
* - 2012
- Upload operation failed.
* - 2013
- Data authorization check failed.
* - 2014
- Data access is unauthorized.
* - 2020
- Invalid authentication token for connecting to the service.
* - 2021
Expand Down
71 changes: 70 additions & 1 deletion src/cli/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,18 @@
"""

import argparse
import json
import re
import shutil
import subprocess
import sys
from typing import IO, Iterable

import shtab

from src.lib.utils import client, client_configs, validation
from src.lib.data import storage
from src.lib.data.storage import constants
from src.lib.utils import client, client_configs, credentials, osmo_errors, validation


HELP_TEXT = """
Expand Down Expand Up @@ -179,6 +182,56 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
)


def _run_check_command(service_client: client.ServiceClient, args: argparse.Namespace):
"""
Check the access to a backend URI
Args:
args : Parsed command line arguments.
"""
# pylint: disable=unused-argument
is_storage_profile = bool(re.fullmatch(constants.STORAGE_PROFILE_REGEX, args.remote_uri))

storage_backend = storage.construct_storage_backend(
uri=args.remote_uri,
profile=is_storage_profile,
cache_config=client_configs.get_cache_config(),
)

data_cred = credentials.get_static_data_credential_from_config(
storage_backend.profile,
args.config_file,
)

try:
match args.access_type:
case storage.AccessType.READ.name:
storage_backend.data_auth(
data_cred=data_cred,
access_type=storage.AccessType.READ,
)
case storage.AccessType.WRITE.name:
storage_backend.data_auth(
data_cred=data_cred,
access_type=storage.AccessType.WRITE,
)
case storage.AccessType.DELETE.name:
storage_backend.data_auth(
data_cred=data_cred,
access_type=storage.AccessType.DELETE,
)
case _:
storage_backend.data_auth(
data_cred=data_cred,
)

# Auth check passed
print(json.dumps({'status': 'pass'}))

except osmo_errors.OSMOCredentialError as err:
# Auth check failed (credentials issue)
print(json.dumps({'status': 'fail', 'error': str(err)}))


def setup_parser(parser: argparse._SubParsersAction):
"""
Dataset parser setup and run command based on parsing
Expand Down Expand Up @@ -285,3 +338,19 @@ def setup_parser(parser: argparse._SubParsersAction):
type=validation.is_regex,
help='Regex to filter which types of files to delete')
delete_parser.set_defaults(func=_run_delete_command)

check_parser = subparsers.add_parser(
'check',
help='Check the access to a backend URI',
description='Check the access to a backend URI',
)
check_parser.add_argument('remote_uri',
type=validation.is_storage_credential_path,
help='URI where access will be checked to.')
check_parser.add_argument('--access-type', '-a',
choices=list(storage.AccessType.__members__.keys()),
help='Access type to check access to the backend URI.')
check_parser.add_argument('--config-file', '-c',
type=validation.valid_path,
help='Path to the config file to use for the access check.')
check_parser.set_defaults(func=_run_check_command)
121 changes: 98 additions & 23 deletions src/cli/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,16 @@

import ijson
import shtab
from tqdm import tqdm # type: ignore
from tqdm import tqdm # type: ignore

from src.lib.data import (
dataset as dataset_lib,
storage as storage_lib,
)
from src.lib.utils import (
client,
client_configs,
credentials,
common,
osmo_errors,
validation,
Expand Down Expand Up @@ -369,8 +371,8 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
prompt_info = f'the latest version of Dataset {dataset.name} from bucket ' +\
f'{dataset.bucket}'
confirm = common.prompt_user(f'Are you sure you want to mark {prompt_info} '
'as PENDING_DELETE? The storage objects will not be '
'deleted yet.')
'as PENDING_DELETE? The storage objects will not be '
'deleted yet.')
if not confirm:
return

Expand All @@ -390,7 +392,7 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
f'All versions of {dataset.name} has been marked as PENDING_DELETE.'
'Do you want to delete the storage objects and wipe the dataset?\n'
'Note: Any concurrent uploads to this dataset may be effected.'
)
)
elif delete_objects and args.force:
confirm_delete_objects = True

Expand All @@ -413,7 +415,7 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
print(json.dumps(json_output, indent=common.JSON_INDENT_SIZE))
else:
for version in delete_result['versions']:
print(f'Dataset {dataset.name} version ' +\
print(f'Dataset {dataset.name} version ' +
f'{version} bucket {dataset.bucket} has been marked as '
f'PENDING_DELETE.')
return
Expand Down Expand Up @@ -524,7 +526,7 @@ def _run_collect_command(service_client: client.ServiceClient, args: argparse.Na
if not collection.bucket:
collection.bucket = dataset_lib.get_user_bucket(service_client)

payload = {'datasets': [common.DatasetStructure(dataset).to_dict()\
payload = {'datasets': [common.DatasetStructure(dataset).to_dict()
for dataset in args.datasets]}
service_client.request(
client.RequestMethod.POST,
Expand All @@ -549,9 +551,9 @@ def _run_recollect_command(service_client: client.ServiceClient, args: argparse.

remove_datasets = []
if args.remove:
remove_datasets = [common.DatasetStructure(dataset).to_dict()\
remove_datasets = [common.DatasetStructure(dataset).to_dict()
for dataset in args.remove]
payload = {'add_datasets': [common.DatasetStructure(dataset).to_dict()\
payload = {'add_datasets': [common.DatasetStructure(dataset).to_dict()
for dataset in args.add],
'remove_datasets': remove_datasets}
result = service_client.request(
Expand Down Expand Up @@ -599,7 +601,7 @@ def _get_metadata_from_file(file_path: str) -> Dict | List:
if isinstance(content, list):
if not all(isinstance(x, (int, float)) for x in content) and \
not all(isinstance(x, str) for x in content):
raise osmo_errors.OSMOError('All elements in an array should be of same type: str or'\
raise osmo_errors.OSMOError('All elements in an array should be of same type: str or'
' numeric.')
elif isinstance(content, bool):
content = [str(content)]
Expand Down Expand Up @@ -743,7 +745,7 @@ def _run_rename_command(service_client: client.ServiceClient, args: argparse.Nam
client.RequestMethod.POST,
f'api/bucket/{old_dataset.bucket}/dataset/{old_dataset.name}/attribute',
params=params)
print(f'{old_dataset.name} has been renamed to {new_dataset.name} in bucket ' +\
print(f'{old_dataset.name} has been renamed to {new_dataset.name} in bucket ' +
f'{old_dataset.bucket}')


Expand Down Expand Up @@ -820,10 +822,10 @@ def _run_checksum_command(service_client: client.ServiceClient, args: argparse.N
for file in objects:
# Add Relative Path + checksum path_checksums
path_checksums.append(file[len(path.rsplit('/', 1)[0]) + 1:] +
' ' + common.etag_checksum(file))
' ' + common.etag_checksum(file))
file_size_uploaded = file_information.get(file, 0)
t.set_postfix(file_name=file.split('/')[-1],
file_size=f'{file_size_uploaded} B', refresh=True)
file_size=f'{file_size_uploaded} B', refresh=True)
t.update(file_size_uploaded)

path_checksums.sort()
Expand Down Expand Up @@ -954,6 +956,62 @@ def _run_migrate_command(service_client: client.ServiceClient, args: argparse.Na
dataset_manager.migrate()


def _run_check_command(service_client: client.ServiceClient, args: argparse.Namespace):
"""
Check the access to a dataset for various operations
Args:
args: Parsed command line arguments.
"""
dataset = common.DatasetStructure(args.name)

if not dataset.bucket:
dataset.bucket = dataset_lib.get_user_bucket(service_client)

try:
location_result = service_client.request(
client.RequestMethod.GET,
dataset_lib.common.construct_location_api_path(dataset),
)

storage_backend = storage_lib.construct_storage_backend(
location_result['path'],
cache_config=client_configs.get_cache_config(),
)

data_cred = credentials.get_static_data_credential_from_config(
storage_backend.profile,
args.config_file,
)

match args.access_type:
case storage_lib.AccessType.WRITE.name:
storage_backend.data_auth(
data_cred=data_cred,
access_type=storage_lib.AccessType.WRITE,
)
case storage_lib.AccessType.DELETE.name:
storage_backend.data_auth(
data_cred=data_cred,
access_type=storage_lib.AccessType.DELETE,
)
case storage_lib.AccessType.READ.name:
storage_backend.data_auth(
data_cred=data_cred,
access_type=storage_lib.AccessType.READ,
)
case _:
storage_backend.data_auth(
data_cred=data_cred,
)

# Auth check passed
print(json.dumps({'status': 'pass'}))

except osmo_errors.OSMOCredentialError as err:
# Auth check failed (credentials issue)
print(json.dumps({'status': 'fail', 'error': str(err)}))


def setup_parser(parser: argparse._SubParsersAction):
"""
Dataset parser setup and run command based on parsing
Expand Down Expand Up @@ -1010,12 +1068,12 @@ def setup_parser(parser: argparse._SubParsersAction):
upload_parser.add_argument('--metadata', '-m',
nargs='+',
default=[],
help='Yaml files of metadata to '\
help='Yaml files of metadata to '
'assign to dataset version').complete = shtab.FILE
upload_parser.add_argument('--labels', '-l',
nargs='+',
default=[],
help='Yaml files of labels to '\
help='Yaml files of labels to '
'assign to dataset').complete = shtab.FILE
upload_parser.add_argument('--regex', '-x',
type=validation.is_regex,
Expand Down Expand Up @@ -1075,7 +1133,7 @@ def setup_parser(parser: argparse._SubParsersAction):
'[bucket/]DS[:tag/version].')
download_parser.add_argument('path', type=validation.valid_path,
help='Location where the dataset is downloaded to.').complete = \
shtab.FILE
shtab.FILE
download_parser.add_argument('--regex', '-x',
type=validation.is_regex,
help='Regex to filter which types of files to download')
Expand Down Expand Up @@ -1242,15 +1300,15 @@ def setup_parser(parser: argparse._SubParsersAction):
dest='set',
nargs='+',
default=[],
help='Set label for dataset in the form '\
'"<key>:<type>:<value>" where type is '\
'string or numeric'\
help='Set label for dataset in the form '
'"<key>:<type>:<value>" where type is '
'string or numeric'
'or the file-path').complete = shtab.FILE
label_parser.add_argument('--delete', '-d',
dest='delete',
nargs='+',
default=[],
help='Delete labels from dataset in the form "<key>"'\
help='Delete labels from dataset in the form "<key>"'
'or the file-path').complete = shtab.FILE
label_parser.add_argument('--format-type', '-t',
dest='format_type',
Expand All @@ -1276,7 +1334,7 @@ def setup_parser(parser: argparse._SubParsersAction):
default=[],
help='Set metadata from dataset in the form '
'"<key>:<type>:<value>" where type is '
'string or numeric'\
'string or numeric'
'or the file-path').complete = shtab.FILE
metadata_parser.add_argument('--delete', '-d',
dest='delete',
Expand Down Expand Up @@ -1332,9 +1390,9 @@ def setup_parser(parser: argparse._SubParsersAction):

# Handle 'inspect' command
inspect_parser = subparsers.add_parser('inspect',
help='Display Dataset Directory',
epilog='Ex. osmo dataset inspect DS1:latest ' +
'--format-type json')
help='Display Dataset Directory',
epilog='Ex. osmo dataset inspect DS1:latest ' +
'--format-type json')
inspect_parser.add_argument('name',
help='Dataset name. Specify bucket and ' +
'tag/version with [bucket/]DS[:tag/version].')
Expand Down Expand Up @@ -1381,3 +1439,20 @@ def setup_parser(parser: argparse._SubParsersAction):
migrate_parser.add_argument('--benchmark-out', '-b',
help='Path to folder where benchmark data will be written to.')
migrate_parser.set_defaults(func=_run_migrate_command)

# Handle 'check' command (add after migrate_parser in setup_parser function)
check_parser = subparsers.add_parser(
'check',
help='Check access permissions for dataset operations',
description='Check access permissions for dataset operations',
)
check_parser.add_argument('name',
help='Dataset name. Specify bucket and tag/version with ' +
'[bucket/]DS[:tag/version].')
check_parser.add_argument('--access-type', '-a',
choices=list(storage_lib.AccessType.__members__.keys()),
help='Access type to check access to the dataset.')
check_parser.add_argument('--config-file', '-c',
type=validation.valid_path,
help='Path to the config file to use for the access check.')
check_parser.set_defaults(func=_run_check_command)
Loading
Loading