Skip to content

Commit 94b1827

Browse files
Perform Client Side Data Auth Check In the Event of Environment Based Auth (#177)
* Data/Dataset Auth Check CLIs * Remove auth check from data service * Use auth check CLIs in ctrl * Add exit code to docs * Fix build issues * Fix lint * Ctrl to use user config when validating data auth * Use the correct CLI argument type * Fix lint * Use profile when looking up data credential from config
1 parent b0837da commit 94b1827

File tree

15 files changed

+352
-143
lines changed

15 files changed

+352
-143
lines changed

bzl/linting/pylintrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ disable=abstract-method,
144144
too-many-arguments,
145145
too-many-boolean-expressions,
146146
too-many-branches,
147+
too-many-function-args,
147148
too-many-instance-attributes,
148149
too-many-locals,
149150
too-many-nested-blocks,

docs/deployment_guide/references/configs_definitions/dataset.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,6 @@ Bucket
6262
- String
6363
- The cloud storage region where the bucket is located.
6464
- ``us-east-1``
65-
* - ``check_key``
66-
- Boolean
67-
- Whether to validate access keys before allowing access to the bucket.
68-
- ``False``
6965
* - ``description``
7066
- String
7167
- Human-readable description of the bucket and its intended use.

docs/user_guide/workflows/exit_codes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ user exit codes.
7171
- Mount operation failed.
7272
* - 2012
7373
- Upload operation failed.
74+
* - 2013
75+
- Data authorization check failed.
76+
* - 2014
77+
- Data access is unauthorized.
7478
* - 2020
7579
- Invalid authentication token for connecting to the service.
7680
* - 2021

src/cli/data.py

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,18 @@
1717
"""
1818

1919
import argparse
20+
import json
21+
import re
2022
import shutil
2123
import subprocess
2224
import sys
2325
from typing import IO, Iterable
2426

2527
import shtab
2628

27-
from src.lib.utils import client, client_configs, validation
2829
from src.lib.data import storage
30+
from src.lib.data.storage import constants
31+
from src.lib.utils import client, client_configs, credentials, osmo_errors, validation
2932

3033

3134
HELP_TEXT = """
@@ -179,6 +182,56 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
179182
)
180183

181184

185+
def _run_check_command(service_client: client.ServiceClient, args: argparse.Namespace):
186+
"""
187+
Check the access to a backend URI
188+
Args:
189+
args : Parsed command line arguments.
190+
"""
191+
# pylint: disable=unused-argument
192+
is_storage_profile = bool(re.fullmatch(constants.STORAGE_PROFILE_REGEX, args.remote_uri))
193+
194+
storage_backend = storage.construct_storage_backend(
195+
uri=args.remote_uri,
196+
profile=is_storage_profile,
197+
cache_config=client_configs.get_cache_config(),
198+
)
199+
200+
data_cred = credentials.get_static_data_credential_from_config(
201+
storage_backend.profile,
202+
args.config_file,
203+
)
204+
205+
try:
206+
match args.access_type:
207+
case storage.AccessType.READ.name:
208+
storage_backend.data_auth(
209+
data_cred=data_cred,
210+
access_type=storage.AccessType.READ,
211+
)
212+
case storage.AccessType.WRITE.name:
213+
storage_backend.data_auth(
214+
data_cred=data_cred,
215+
access_type=storage.AccessType.WRITE,
216+
)
217+
case storage.AccessType.DELETE.name:
218+
storage_backend.data_auth(
219+
data_cred=data_cred,
220+
access_type=storage.AccessType.DELETE,
221+
)
222+
case _:
223+
storage_backend.data_auth(
224+
data_cred=data_cred,
225+
)
226+
227+
# Auth check passed
228+
print(json.dumps({'status': 'pass'}))
229+
230+
except osmo_errors.OSMOCredentialError as err:
231+
# Auth check failed (credentials issue)
232+
print(json.dumps({'status': 'fail', 'error': str(err)}))
233+
234+
182235
def setup_parser(parser: argparse._SubParsersAction):
183236
"""
184237
Dataset parser setup and run command based on parsing
@@ -285,3 +338,19 @@ def setup_parser(parser: argparse._SubParsersAction):
285338
type=validation.is_regex,
286339
help='Regex to filter which types of files to delete')
287340
delete_parser.set_defaults(func=_run_delete_command)
341+
342+
check_parser = subparsers.add_parser(
343+
'check',
344+
help='Check the access to a backend URI',
345+
description='Check the access to a backend URI',
346+
)
347+
check_parser.add_argument('remote_uri',
348+
type=validation.is_storage_credential_path,
349+
help='URI where access will be checked to.')
350+
check_parser.add_argument('--access-type', '-a',
351+
choices=list(storage.AccessType.__members__.keys()),
352+
help='Access type to check access to the backend URI.')
353+
check_parser.add_argument('--config-file', '-c',
354+
type=validation.valid_path,
355+
help='Path to the config file to use for the access check.')
356+
check_parser.set_defaults(func=_run_check_command)

src/cli/dataset.py

Lines changed: 98 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,16 @@
3030

3131
import ijson
3232
import shtab
33-
from tqdm import tqdm # type: ignore
33+
from tqdm import tqdm # type: ignore
3434

3535
from src.lib.data import (
3636
dataset as dataset_lib,
3737
storage as storage_lib,
3838
)
3939
from src.lib.utils import (
4040
client,
41+
client_configs,
42+
credentials,
4143
common,
4244
osmo_errors,
4345
validation,
@@ -369,8 +371,8 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
369371
prompt_info = f'the latest version of Dataset {dataset.name} from bucket ' +\
370372
f'{dataset.bucket}'
371373
confirm = common.prompt_user(f'Are you sure you want to mark {prompt_info} '
372-
'as PENDING_DELETE? The storage objects will not be '
373-
'deleted yet.')
374+
'as PENDING_DELETE? The storage objects will not be '
375+
'deleted yet.')
374376
if not confirm:
375377
return
376378

@@ -390,7 +392,7 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
390392
f'All versions of {dataset.name} has been marked as PENDING_DELETE.'
391393
'Do you want to delete the storage objects and wipe the dataset?\n'
392394
'Note: Any concurrent uploads to this dataset may be effected.'
393-
)
395+
)
394396
elif delete_objects and args.force:
395397
confirm_delete_objects = True
396398

@@ -413,7 +415,7 @@ def _run_delete_command(service_client: client.ServiceClient, args: argparse.Nam
413415
print(json.dumps(json_output, indent=common.JSON_INDENT_SIZE))
414416
else:
415417
for version in delete_result['versions']:
416-
print(f'Dataset {dataset.name} version ' +\
418+
print(f'Dataset {dataset.name} version ' +
417419
f'{version} bucket {dataset.bucket} has been marked as '
418420
f'PENDING_DELETE.')
419421
return
@@ -524,7 +526,7 @@ def _run_collect_command(service_client: client.ServiceClient, args: argparse.Na
524526
if not collection.bucket:
525527
collection.bucket = dataset_lib.get_user_bucket(service_client)
526528

527-
payload = {'datasets': [common.DatasetStructure(dataset).to_dict()\
529+
payload = {'datasets': [common.DatasetStructure(dataset).to_dict()
528530
for dataset in args.datasets]}
529531
service_client.request(
530532
client.RequestMethod.POST,
@@ -549,9 +551,9 @@ def _run_recollect_command(service_client: client.ServiceClient, args: argparse.
549551

550552
remove_datasets = []
551553
if args.remove:
552-
remove_datasets = [common.DatasetStructure(dataset).to_dict()\
554+
remove_datasets = [common.DatasetStructure(dataset).to_dict()
553555
for dataset in args.remove]
554-
payload = {'add_datasets': [common.DatasetStructure(dataset).to_dict()\
556+
payload = {'add_datasets': [common.DatasetStructure(dataset).to_dict()
555557
for dataset in args.add],
556558
'remove_datasets': remove_datasets}
557559
result = service_client.request(
@@ -599,7 +601,7 @@ def _get_metadata_from_file(file_path: str) -> Dict | List:
599601
if isinstance(content, list):
600602
if not all(isinstance(x, (int, float)) for x in content) and \
601603
not all(isinstance(x, str) for x in content):
602-
raise osmo_errors.OSMOError('All elements in an array should be of same type: str or'\
604+
raise osmo_errors.OSMOError('All elements in an array should be of same type: str or'
603605
' numeric.')
604606
elif isinstance(content, bool):
605607
content = [str(content)]
@@ -743,7 +745,7 @@ def _run_rename_command(service_client: client.ServiceClient, args: argparse.Nam
743745
client.RequestMethod.POST,
744746
f'api/bucket/{old_dataset.bucket}/dataset/{old_dataset.name}/attribute',
745747
params=params)
746-
print(f'{old_dataset.name} has been renamed to {new_dataset.name} in bucket ' +\
748+
print(f'{old_dataset.name} has been renamed to {new_dataset.name} in bucket ' +
747749
f'{old_dataset.bucket}')
748750

749751

@@ -820,10 +822,10 @@ def _run_checksum_command(service_client: client.ServiceClient, args: argparse.N
820822
for file in objects:
821823
# Add Relative Path + checksum path_checksums
822824
path_checksums.append(file[len(path.rsplit('/', 1)[0]) + 1:] +
823-
' ' + common.etag_checksum(file))
825+
' ' + common.etag_checksum(file))
824826
file_size_uploaded = file_information.get(file, 0)
825827
t.set_postfix(file_name=file.split('/')[-1],
826-
file_size=f'{file_size_uploaded} B', refresh=True)
828+
file_size=f'{file_size_uploaded} B', refresh=True)
827829
t.update(file_size_uploaded)
828830

829831
path_checksums.sort()
@@ -954,6 +956,62 @@ def _run_migrate_command(service_client: client.ServiceClient, args: argparse.Na
954956
dataset_manager.migrate()
955957

956958

959+
def _run_check_command(service_client: client.ServiceClient, args: argparse.Namespace):
960+
"""
961+
Check the access to a dataset for various operations
962+
Args:
963+
args: Parsed command line arguments.
964+
"""
965+
dataset = common.DatasetStructure(args.name)
966+
967+
if not dataset.bucket:
968+
dataset.bucket = dataset_lib.get_user_bucket(service_client)
969+
970+
try:
971+
location_result = service_client.request(
972+
client.RequestMethod.GET,
973+
dataset_lib.common.construct_location_api_path(dataset),
974+
)
975+
976+
storage_backend = storage_lib.construct_storage_backend(
977+
location_result['path'],
978+
cache_config=client_configs.get_cache_config(),
979+
)
980+
981+
data_cred = credentials.get_static_data_credential_from_config(
982+
storage_backend.profile,
983+
args.config_file,
984+
)
985+
986+
match args.access_type:
987+
case storage_lib.AccessType.WRITE.name:
988+
storage_backend.data_auth(
989+
data_cred=data_cred,
990+
access_type=storage_lib.AccessType.WRITE,
991+
)
992+
case storage_lib.AccessType.DELETE.name:
993+
storage_backend.data_auth(
994+
data_cred=data_cred,
995+
access_type=storage_lib.AccessType.DELETE,
996+
)
997+
case storage_lib.AccessType.READ.name:
998+
storage_backend.data_auth(
999+
data_cred=data_cred,
1000+
access_type=storage_lib.AccessType.READ,
1001+
)
1002+
case _:
1003+
storage_backend.data_auth(
1004+
data_cred=data_cred,
1005+
)
1006+
1007+
# Auth check passed
1008+
print(json.dumps({'status': 'pass'}))
1009+
1010+
except osmo_errors.OSMOCredentialError as err:
1011+
# Auth check failed (credentials issue)
1012+
print(json.dumps({'status': 'fail', 'error': str(err)}))
1013+
1014+
9571015
def setup_parser(parser: argparse._SubParsersAction):
9581016
"""
9591017
Dataset parser setup and run command based on parsing
@@ -1010,12 +1068,12 @@ def setup_parser(parser: argparse._SubParsersAction):
10101068
upload_parser.add_argument('--metadata', '-m',
10111069
nargs='+',
10121070
default=[],
1013-
help='Yaml files of metadata to '\
1071+
help='Yaml files of metadata to '
10141072
'assign to dataset version').complete = shtab.FILE
10151073
upload_parser.add_argument('--labels', '-l',
10161074
nargs='+',
10171075
default=[],
1018-
help='Yaml files of labels to '\
1076+
help='Yaml files of labels to '
10191077
'assign to dataset').complete = shtab.FILE
10201078
upload_parser.add_argument('--regex', '-x',
10211079
type=validation.is_regex,
@@ -1075,7 +1133,7 @@ def setup_parser(parser: argparse._SubParsersAction):
10751133
'[bucket/]DS[:tag/version].')
10761134
download_parser.add_argument('path', type=validation.valid_path,
10771135
help='Location where the dataset is downloaded to.').complete = \
1078-
shtab.FILE
1136+
shtab.FILE
10791137
download_parser.add_argument('--regex', '-x',
10801138
type=validation.is_regex,
10811139
help='Regex to filter which types of files to download')
@@ -1242,15 +1300,15 @@ def setup_parser(parser: argparse._SubParsersAction):
12421300
dest='set',
12431301
nargs='+',
12441302
default=[],
1245-
help='Set label for dataset in the form '\
1246-
'"<key>:<type>:<value>" where type is '\
1247-
'string or numeric'\
1303+
help='Set label for dataset in the form '
1304+
'"<key>:<type>:<value>" where type is '
1305+
'string or numeric'
12481306
'or the file-path').complete = shtab.FILE
12491307
label_parser.add_argument('--delete', '-d',
12501308
dest='delete',
12511309
nargs='+',
12521310
default=[],
1253-
help='Delete labels from dataset in the form "<key>"'\
1311+
help='Delete labels from dataset in the form "<key>"'
12541312
'or the file-path').complete = shtab.FILE
12551313
label_parser.add_argument('--format-type', '-t',
12561314
dest='format_type',
@@ -1276,7 +1334,7 @@ def setup_parser(parser: argparse._SubParsersAction):
12761334
default=[],
12771335
help='Set metadata from dataset in the form '
12781336
'"<key>:<type>:<value>" where type is '
1279-
'string or numeric'\
1337+
'string or numeric'
12801338
'or the file-path').complete = shtab.FILE
12811339
metadata_parser.add_argument('--delete', '-d',
12821340
dest='delete',
@@ -1332,9 +1390,9 @@ def setup_parser(parser: argparse._SubParsersAction):
13321390

13331391
# Handle 'inspect' command
13341392
inspect_parser = subparsers.add_parser('inspect',
1335-
help='Display Dataset Directory',
1336-
epilog='Ex. osmo dataset inspect DS1:latest ' +
1337-
'--format-type json')
1393+
help='Display Dataset Directory',
1394+
epilog='Ex. osmo dataset inspect DS1:latest ' +
1395+
'--format-type json')
13381396
inspect_parser.add_argument('name',
13391397
help='Dataset name. Specify bucket and ' +
13401398
'tag/version with [bucket/]DS[:tag/version].')
@@ -1381,3 +1439,20 @@ def setup_parser(parser: argparse._SubParsersAction):
13811439
migrate_parser.add_argument('--benchmark-out', '-b',
13821440
help='Path to folder where benchmark data will be written to.')
13831441
migrate_parser.set_defaults(func=_run_migrate_command)
1442+
1443+
# Handle 'check' command (add after migrate_parser in setup_parser function)
1444+
check_parser = subparsers.add_parser(
1445+
'check',
1446+
help='Check access permissions for dataset operations',
1447+
description='Check access permissions for dataset operations',
1448+
)
1449+
check_parser.add_argument('name',
1450+
help='Dataset name. Specify bucket and tag/version with ' +
1451+
'[bucket/]DS[:tag/version].')
1452+
check_parser.add_argument('--access-type', '-a',
1453+
choices=list(storage_lib.AccessType.__members__.keys()),
1454+
help='Access type to check access to the dataset.')
1455+
check_parser.add_argument('--config-file', '-c',
1456+
type=validation.valid_path,
1457+
help='Path to the config file to use for the access check.')
1458+
check_parser.set_defaults(func=_run_check_command)

0 commit comments

Comments
 (0)