Skip to content

Commit 435ecbd

Browse files
authored
Merge pull request #796 from hubmapconsortium/karlburke/NewProvMetadataEndpoint
Align prov-metadata endpoint with existing API structure
2 parents 32decab + f1cea35 commit 435ecbd

File tree

3 files changed

+196
-712
lines changed

3 files changed

+196
-712
lines changed

src/app.py

Lines changed: 196 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838
from schema.schema_constants import TriggerTypeEnum
3939
from metadata_constraints import get_constraints, constraints_json_is_valid
4040
# from lib.ontology import initialize_ubkg, init_ontology, Ontology, UbkgSDK
41-
from dev_entity_worker import EntityWorker
42-
import dev_entity_exceptions as entityEx
4341

4442
# HuBMAP commons
4543
from hubmap_commons import string_helper
@@ -248,23 +246,6 @@ def http_internal_server_error(e):
248246
except Exception as s3exception:
249247
logger.critical(s3exception, exc_info=True)
250248

251-
####################################################################################################
252-
## Initialize a "worker" for the service.
253-
## For initial transition to "worker" usage, pass in globals of app.py which would eventually
254-
## be only in the worker and not in app.py.
255-
####################################################################################################
256-
entity_worker = None
257-
try:
258-
entity_worker = EntityWorker( app_config=app.config
259-
, schema_mgr = schema_manager
260-
, memcached_client_instance = memcached_client_instance
261-
, neo4j_driver_instance = neo4j_driver_instance)
262-
if not isinstance(entity_worker, EntityWorker):
263-
raise Exception("Error instantiating a EntityWorker during startup.")
264-
logger.info("EntityWorker instantiated using app.cfg setting.")
265-
except Exception as e:
266-
logger.critical(f"Unable to instantiate a EntityWorker during startup.")
267-
logger.error(e, exc_info=True)
268249

269250
####################################################################################################
270251
## REFERENCE DOI Redirection
@@ -632,6 +613,112 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
632613
entity_visibility = DataVisibilityEnum.PUBLIC
633614
return entity_visibility
634615

616+
'''
617+
Retrieve the metadata information for certain data associated with entity. This method supports
618+
Dataset entities, and can get the associated data for organs, samples, or donors.
619+
620+
Get associated data dict based upon the user's authorization. The associated data may be
621+
filtered down if credentials were not presented for full access.
622+
623+
Parameters
624+
----------
625+
dataset_dict : dict
626+
A dictionary containing all the properties the target entity.
627+
dataset_visibility : DataVisibilityEnum
628+
An indication of the entity itself is public or not, so the associated data can
629+
be filtered to match the entity dictionary before being returned.
630+
valid_user_token : str
631+
Either the valid current token for an authenticated user or None.
632+
user_info : dict
633+
Information for the logged-in user to be used for authorization accessing non-public entities.
634+
associated_data : str
635+
A string indicating the associated property to be retrieved, which must be from
636+
the values supported by this method.
637+
638+
Returns
639+
-------
640+
list
641+
A dictionary containing all the properties the target entity.
642+
'''
643+
def _get_dataset_associated_data(dataset_dict, dataset_visibility, valid_user_token, request, associated_data: str):
644+
645+
# Confirm the associated data requested is supported by this method.
646+
retrievable_associations = ['organs', 'samples', 'donors']
647+
if associated_data.lower() not in retrievable_associations:
648+
bad_request_error( f"Dataset associated data cannot be retrieved for"
649+
f" {associated_data}, only"
650+
f" {COMMA_SEPARATOR.join(retrievable_associations)}.")
651+
652+
# Confirm the dictionary passed in is for a Dataset entity.
653+
if not schema_manager.entity_type_instanceof(dataset_dict['entity_type'], 'Dataset'):
654+
bad_request_error( f"'{dataset_dict['entity_type']}' for"
655+
f" uuid={dataset_dict['uuid']} is not a Dataset or Publication,"
656+
f" so '{associated_data}' can not be retrieved for it.")
657+
# Set up fields to be excluded when retrieving the entities associated with
658+
# the Dataset. Organs are one kind of Sample.
659+
if associated_data.lower() in ['organs', 'samples']:
660+
fields_to_exclude = schema_manager.get_fields_to_exclude('Sample')
661+
elif associated_data.lower() in ['donors']:
662+
fields_to_exclude = schema_manager.get_fields_to_exclude('Donor')
663+
else:
664+
logger.error( f"Expected associated data type to be verified, but got"
665+
f" associated_data.lower()={associated_data.lower()}.")
666+
internal_server_error(f"Unexpected error retrieving '{associated_data}' for a Dataset")
667+
668+
public_entity = (dataset_visibility is DataVisibilityEnum.PUBLIC)
669+
670+
# Set a variable reflecting the user's authorization by being in the HuBMAP-READ Globus Group
671+
user_authorized = user_in_hubmap_read_group(request=request)
672+
673+
# For non-public documents, reject the request if the user is not authorized
674+
if not public_entity:
675+
if valid_user_token is None:
676+
forbidden_error( f"{dataset_dict['entity_type']} for"
677+
f" {dataset_dict['uuid']} is not"
678+
f" accessible without presenting a token.")
679+
if not user_authorized:
680+
forbidden_error( f"The requested Dataset has non-public data."
681+
f" A Globus token with access permission is required.")
682+
683+
# By now, either the entity is public accessible or the user has the correct access level
684+
if associated_data.lower() == 'organs':
685+
associated_entities = app_neo4j_queries.get_associated_organs_from_dataset(neo4j_driver_instance,
686+
dataset_dict['uuid'])
687+
elif associated_data.lower() == 'samples':
688+
associated_entities = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance,
689+
dataset_dict['uuid'])
690+
elif associated_data.lower() == 'donors':
691+
associated_entities = app_neo4j_queries.get_associated_donors_from_dataset(neo4j_driver_instance,
692+
dataset_dict['uuid'])
693+
else:
694+
logger.error( f"Expected associated data type to be verified, but got"
695+
f" associated_data.lower()={associated_data.lower()} while retrieving from Neo4j.")
696+
internal_server_error(f"Unexpected error retrieving '{associated_data}' from the data store")
697+
698+
# If there are zero items in the list of associated_entities, return an empty list rather than retrieving.
699+
if len(associated_entities) < 1:
700+
return []
701+
702+
# Use the internal token to query the target entity to assure it is returned. This way public
703+
# entities can be accessed even if valid_user_token is None.
704+
internal_token = auth_helper_instance.getProcessSecret()
705+
complete_entities_list = schema_manager.get_complete_entities_list( token=internal_token
706+
, entities_list=associated_entities)
707+
# Final result after normalization
708+
final_result = schema_manager.normalize_entities_list_for_response(entities_list=complete_entities_list)
709+
710+
# For public entities, limit the fields in the response unless the authorization presented in the
711+
# Request allows the user to see all properties.
712+
if public_entity and not user_authorized:
713+
filtered_entities_list = []
714+
for entity in final_result:
715+
final_entity_dict = schema_manager.exclude_properties_from_response(excluded_fields=fields_to_exclude
716+
, output_dict=entity)
717+
filtered_entities_list.append(final_entity_dict)
718+
final_result = filtered_entities_list
719+
720+
return final_result
721+
635722
'''
636723
Retrieve the full provenance metadata information of a given entity by id, as
637724
produced for metadata.json files.
@@ -644,11 +731,11 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
644731
645732
An HTTP 400 Response is returned for reasons described in the error message, such as
646733
requesting data for a non-Dataset.
647-
734+
648735
An HTTP 401 Response is returned when a token is presented that is not valid.
649736
650737
An HTTP 403 Response is returned if user is not authorized to access the Dataset, as described above.
651-
738+
652739
An HTTP 404 Response is returned if the requested Dataset is not found.
653740
654741
Parameters
@@ -661,39 +748,95 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
661748
json
662749
Valid JSON for the full provenance metadata of the requested Dataset
663750
'''
664-
@app.route('/datasets/<id>/prov-metadata', methods = ['GET'])
665-
def get_provenance_metadata_by_id_for_auth_level(id:Annotated[str, 32]) -> str:
751+
@app.route('/datasets/<id>/prov-metadata', methods=['GET'])
752+
def get_provenance_metadata_by_id_for_auth_level(id):
753+
# Token is not required, but if an invalid token provided,
754+
# we need to tell the client with a 401 error
755+
validate_token_if_auth_header_exists(request)
666756

667-
try:
668-
# Get the user's token from the Request for later authorization to access non-public entities.
669-
# If an invalid token is presented, reject with an HTTP 401 Response.
670-
# N.B. None is a "valid" user_token which may be adequate for access to public data.
671-
user_token = entity_worker.get_request_auth_token(request=request)
672-
673-
# Get the user's token from the Request for later authorization to access non-public entities.
674-
user_info = entity_worker.get_request_user_info_with_groups(request=request)
675-
676-
# Retrieve the expanded metadata for the entity. If authorization of token or group membership
677-
# does not allow access to the entity, exceptions will be raised describing the problem.
678-
expanded_entity_metadata = entity_worker.get_expanded_dataset_metadata( dataset_id=id
679-
, valid_user_token=user_token
680-
, user_info=user_info)
681-
return jsonify(expanded_entity_metadata)
682-
except entityEx.EntityBadRequestException as e_400:
683-
return jsonify({'error': e_400.message}), 400
684-
except entityEx.EntityUnauthorizedException as e_401:
685-
return jsonify({'error': e_401.message}), 401
686-
except entityEx.EntityForbiddenException as e_403:
687-
return jsonify({'error': e_403.message}), 403
688-
except entityEx.EntityNotFoundException as e_404:
689-
return jsonify({'error': e_404.message}), 404
690-
except entityEx.EntityServerErrorException as e_500:
691-
logger.exception(f"An unexpected error occurred during provenance metadata retrieval.")
692-
return jsonify({'error': e_500.message}), 500
693-
except Exception as e:
694-
default_msg = 'An unexpected error occurred retrieving provenance metadata'
695-
logger.exception(default_msg)
696-
return jsonify({'error': default_msg}), 500
757+
# Use the internal token to query the target entity
758+
# since public entities don't require user token
759+
token = get_internal_token()
760+
761+
# The argument id that shadows Python's built-in id should be an identifier for a Dataset.
762+
# Get the entity dict from cache if exists
763+
# Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
764+
dataset_dict = query_target_entity(id, token)
765+
normalized_entity_type = dataset_dict['entity_type']
766+
767+
# A bit validation
768+
if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
769+
bad_request_error(f"Unable to get the provenance metatdata for this: {normalized_entity_type},"
770+
" supported entity types: Dataset, Publication")
771+
772+
# Get the generated complete entity result from cache if exists
773+
# Otherwise re-generate on the fly
774+
complete_dict = schema_manager.get_complete_entity_result(token=token
775+
, entity_dict=dataset_dict)
776+
777+
# Determine if the entity is publicly visible base on its data, only.
778+
# To verify if a Collection is public, it is necessary to have its Datasets, which
779+
# are populated as triggered data. So pull back the complete entity for
780+
# _get_entity_visibility() to check.
781+
entity_scope = _get_entity_visibility( normalized_entity_type=normalized_entity_type
782+
,entity_dict=complete_dict)
783+
public_entity = (entity_scope is DataVisibilityEnum.PUBLIC)
784+
785+
# Set a variable reflecting the user's authorization by being in the HuBMAP-READ Globus Group
786+
user_authorized = user_in_hubmap_read_group(request=request)
787+
788+
# Get user token from Authorization header
789+
user_token = get_user_token(request)
790+
791+
# For non-public documents, reject the request if the user is not authorized
792+
if not public_entity:
793+
if user_token is None:
794+
forbidden_error( f"{normalized_entity_type} for {complete_dict['uuid']} is not"
795+
f" accessible without presenting a token.")
796+
if not user_authorized:
797+
forbidden_error( f"The requested {normalized_entity_type} has non-public data."
798+
f" A Globus token with access permission is required.")
799+
800+
# We'll need to return all the properties including those generated by
801+
# `on_read_trigger` to have a complete result e.g., the 'next_revision_uuid' and
802+
# 'previous_revision_uuid' being used below.
803+
# Collections, however, will filter out only public properties for return.
804+
805+
# Also normalize the result based on schema
806+
final_result = schema_manager.normalize_entity_result_for_response(complete_dict)
807+
808+
# Identify fields to exclude from non-authorized responses for the entity type.
809+
fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type)
810+
811+
# Response with the dict
812+
if public_entity and not user_authorized:
813+
final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
814+
815+
# Retrieve the associated data for the entity, and add it to the expanded dictionary.
816+
associated_organ_list = _get_dataset_associated_data( dataset_dict=final_result
817+
, dataset_visibility=entity_scope
818+
, valid_user_token=user_token
819+
, request=request
820+
, associated_data='Organs')
821+
final_result['organs'] = associated_organ_list
822+
823+
associated_sample_list = _get_dataset_associated_data( dataset_dict=final_result
824+
, dataset_visibility=entity_scope
825+
, valid_user_token=user_token
826+
, request=request
827+
, associated_data='Samples')
828+
final_result['samples'] = associated_sample_list
829+
830+
associated_donor_list = _get_dataset_associated_data( dataset_dict=final_result
831+
, dataset_visibility=entity_scope
832+
, valid_user_token=user_token
833+
, request=request
834+
, associated_data='Donors')
835+
836+
final_result['donors'] = associated_donor_list
837+
838+
# Return JSON for the dictionary containing the entity metadata as well as metadata for the associated data.
839+
return jsonify(final_result)
697840

698841
"""
699842
Retrieve the metadata information of a given entity by id

src/dev_entity_exceptions.py

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)