3838from schema .schema_constants import TriggerTypeEnum
3939from metadata_constraints import get_constraints , constraints_json_is_valid
4040# from lib.ontology import initialize_ubkg, init_ontology, Ontology, UbkgSDK
41- from dev_entity_worker import EntityWorker
42- import dev_entity_exceptions as entityEx
4341
4442# HuBMAP commons
4543from hubmap_commons import string_helper
@@ -248,23 +246,6 @@ def http_internal_server_error(e):
248246except Exception as s3exception :
249247 logger .critical (s3exception , exc_info = True )
250248
251- ####################################################################################################
252- ## Initialize a "worker" for the service.
253- ## For initial transition to "worker" usage, pass in globals of app.py which would eventually
254- ## be only in the worker and not in app.py.
255- ####################################################################################################
256- entity_worker = None
257- try :
258- entity_worker = EntityWorker ( app_config = app .config
259- , schema_mgr = schema_manager
260- , memcached_client_instance = memcached_client_instance
261- , neo4j_driver_instance = neo4j_driver_instance )
262- if not isinstance (entity_worker , EntityWorker ):
263- raise Exception ("Error instantiating a EntityWorker during startup." )
264- logger .info ("EntityWorker instantiated using app.cfg setting." )
265- except Exception as e :
266- logger .critical (f"Unable to instantiate a EntityWorker during startup." )
267- logger .error (e , exc_info = True )
268249
269250####################################################################################################
270251## REFERENCE DOI Redirection
@@ -632,6 +613,112 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
632613 entity_visibility = DataVisibilityEnum .PUBLIC
633614 return entity_visibility
634615
616+ '''
617+ Retrieve the metadata information for certain data associated with entity. This method supports
618+ Dataset entities, and can get the associated data for organs, samples, or donors.
619+
620+ Get associated data dict based upon the user's authorization. The associated data may be
621+ filtered down if credentials were not presented for full access.
622+
623+ Parameters
624+ ----------
625+ dataset_dict : dict
626+ A dictionary containing all the properties the target entity.
627+ dataset_visibility : DataVisibilityEnum
628+ An indication of the entity itself is public or not, so the associated data can
629+ be filtered to match the entity dictionary before being returned.
630+ valid_user_token : str
631+ Either the valid current token for an authenticated user or None.
632+ user_info : dict
633+ Information for the logged-in user to be used for authorization accessing non-public entities.
634+ associated_data : str
635+ A string indicating the associated property to be retrieved, which must be from
636+ the values supported by this method.
637+
638+ Returns
639+ -------
640+ list
641+ A dictionary containing all the properties the target entity.
642+ '''
643+ def _get_dataset_associated_data (dataset_dict , dataset_visibility , valid_user_token , request , associated_data : str ):
644+
645+ # Confirm the associated data requested is supported by this method.
646+ retrievable_associations = ['organs' , 'samples' , 'donors' ]
647+ if associated_data .lower () not in retrievable_associations :
648+ bad_request_error ( f"Dataset associated data cannot be retrieved for"
649+ f" { associated_data } , only"
650+ f" { COMMA_SEPARATOR .join (retrievable_associations )} ." )
651+
652+ # Confirm the dictionary passed in is for a Dataset entity.
653+ if not schema_manager .entity_type_instanceof (dataset_dict ['entity_type' ], 'Dataset' ):
654+ bad_request_error ( f"'{ dataset_dict ['entity_type' ]} ' for"
655+ f" uuid={ dataset_dict ['uuid' ]} is not a Dataset or Publication,"
656+ f" so '{ associated_data } ' can not be retrieved for it." )
657+ # Set up fields to be excluded when retrieving the entities associated with
658+ # the Dataset. Organs are one kind of Sample.
659+ if associated_data .lower () in ['organs' , 'samples' ]:
660+ fields_to_exclude = schema_manager .get_fields_to_exclude ('Sample' )
661+ elif associated_data .lower () in ['donors' ]:
662+ fields_to_exclude = schema_manager .get_fields_to_exclude ('Donor' )
663+ else :
664+ logger .error ( f"Expected associated data type to be verified, but got"
665+ f" associated_data.lower()={ associated_data .lower ()} ." )
666+ internal_server_error (f"Unexpected error retrieving '{ associated_data } ' for a Dataset" )
667+
668+ public_entity = (dataset_visibility is DataVisibilityEnum .PUBLIC )
669+
670+ # Set a variable reflecting the user's authorization by being in the HuBMAP-READ Globus Group
671+ user_authorized = user_in_hubmap_read_group (request = request )
672+
673+ # For non-public documents, reject the request if the user is not authorized
674+ if not public_entity :
675+ if valid_user_token is None :
676+ forbidden_error ( f"{ dataset_dict ['entity_type' ]} for"
677+ f" { dataset_dict ['uuid' ]} is not"
678+ f" accessible without presenting a token." )
679+ if not user_authorized :
680+ forbidden_error ( f"The requested Dataset has non-public data."
681+ f" A Globus token with access permission is required." )
682+
683+ # By now, either the entity is public accessible or the user has the correct access level
684+ if associated_data .lower () == 'organs' :
685+ associated_entities = app_neo4j_queries .get_associated_organs_from_dataset (neo4j_driver_instance ,
686+ dataset_dict ['uuid' ])
687+ elif associated_data .lower () == 'samples' :
688+ associated_entities = app_neo4j_queries .get_associated_samples_from_dataset (neo4j_driver_instance ,
689+ dataset_dict ['uuid' ])
690+ elif associated_data .lower () == 'donors' :
691+ associated_entities = app_neo4j_queries .get_associated_donors_from_dataset (neo4j_driver_instance ,
692+ dataset_dict ['uuid' ])
693+ else :
694+ logger .error ( f"Expected associated data type to be verified, but got"
695+ f" associated_data.lower()={ associated_data .lower ()} while retrieving from Neo4j." )
696+ internal_server_error (f"Unexpected error retrieving '{ associated_data } ' from the data store" )
697+
698+ # If there are zero items in the list of associated_entities, return an empty list rather than retrieving.
699+ if len (associated_entities ) < 1 :
700+ return []
701+
702+ # Use the internal token to query the target entity to assure it is returned. This way public
703+ # entities can be accessed even if valid_user_token is None.
704+ internal_token = auth_helper_instance .getProcessSecret ()
705+ complete_entities_list = schema_manager .get_complete_entities_list ( token = internal_token
706+ , entities_list = associated_entities )
707+ # Final result after normalization
708+ final_result = schema_manager .normalize_entities_list_for_response (entities_list = complete_entities_list )
709+
710+ # For public entities, limit the fields in the response unless the authorization presented in the
711+ # Request allows the user to see all properties.
712+ if public_entity and not user_authorized :
713+ filtered_entities_list = []
714+ for entity in final_result :
715+ final_entity_dict = schema_manager .exclude_properties_from_response (excluded_fields = fields_to_exclude
716+ , output_dict = entity )
717+ filtered_entities_list .append (final_entity_dict )
718+ final_result = filtered_entities_list
719+
720+ return final_result
721+
635722'''
636723Retrieve the full provenance metadata information of a given entity by id, as
637724produced for metadata.json files.
@@ -644,11 +731,11 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
644731
645732An HTTP 400 Response is returned for reasons described in the error message, such as
646733requesting data for a non-Dataset.
647-
734+
648735An HTTP 401 Response is returned when a token is presented that is not valid.
649736
650737An HTTP 403 Response is returned if user is not authorized to access the Dataset, as described above.
651-
738+
652739An HTTP 404 Response is returned if the requested Dataset is not found.
653740
654741Parameters
@@ -661,39 +748,95 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
661748json
662749 Valid JSON for the full provenance metadata of the requested Dataset
663750'''
664- @app .route ('/datasets/<id>/prov-metadata' , methods = ['GET' ])
665- def get_provenance_metadata_by_id_for_auth_level (id :Annotated [str , 32 ]) -> str :
751+ @app .route ('/datasets/<id>/prov-metadata' , methods = ['GET' ])
752+ def get_provenance_metadata_by_id_for_auth_level (id ):
753+ # Token is not required, but if an invalid token provided,
754+ # we need to tell the client with a 401 error
755+ validate_token_if_auth_header_exists (request )
666756
667- try :
668- # Get the user's token from the Request for later authorization to access non-public entities.
669- # If an invalid token is presented, reject with an HTTP 401 Response.
670- # N.B. None is a "valid" user_token which may be adequate for access to public data.
671- user_token = entity_worker .get_request_auth_token (request = request )
672-
673- # Get the user's token from the Request for later authorization to access non-public entities.
674- user_info = entity_worker .get_request_user_info_with_groups (request = request )
675-
676- # Retrieve the expanded metadata for the entity. If authorization of token or group membership
677- # does not allow access to the entity, exceptions will be raised describing the problem.
678- expanded_entity_metadata = entity_worker .get_expanded_dataset_metadata ( dataset_id = id
679- , valid_user_token = user_token
680- , user_info = user_info )
681- return jsonify (expanded_entity_metadata )
682- except entityEx .EntityBadRequestException as e_400 :
683- return jsonify ({'error' : e_400 .message }), 400
684- except entityEx .EntityUnauthorizedException as e_401 :
685- return jsonify ({'error' : e_401 .message }), 401
686- except entityEx .EntityForbiddenException as e_403 :
687- return jsonify ({'error' : e_403 .message }), 403
688- except entityEx .EntityNotFoundException as e_404 :
689- return jsonify ({'error' : e_404 .message }), 404
690- except entityEx .EntityServerErrorException as e_500 :
691- logger .exception (f"An unexpected error occurred during provenance metadata retrieval." )
692- return jsonify ({'error' : e_500 .message }), 500
693- except Exception as e :
694- default_msg = 'An unexpected error occurred retrieving provenance metadata'
695- logger .exception (default_msg )
696- return jsonify ({'error' : default_msg }), 500
757+ # Use the internal token to query the target entity
758+ # since public entities don't require user token
759+ token = get_internal_token ()
760+
761+ # The argument id that shadows Python's built-in id should be an identifier for a Dataset.
762+ # Get the entity dict from cache if exists
763+ # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
764+ dataset_dict = query_target_entity (id , token )
765+ normalized_entity_type = dataset_dict ['entity_type' ]
766+
767+ # A bit validation
768+ if not schema_manager .entity_type_instanceof (normalized_entity_type , 'Dataset' ):
769+ bad_request_error (f"Unable to get the provenance metatdata for this: { normalized_entity_type } ,"
770+ " supported entity types: Dataset, Publication" )
771+
772+ # Get the generated complete entity result from cache if exists
773+ # Otherwise re-generate on the fly
774+ complete_dict = schema_manager .get_complete_entity_result (token = token
775+ , entity_dict = dataset_dict )
776+
777+ # Determine if the entity is publicly visible base on its data, only.
778+ # To verify if a Collection is public, it is necessary to have its Datasets, which
779+ # are populated as triggered data. So pull back the complete entity for
780+ # _get_entity_visibility() to check.
781+ entity_scope = _get_entity_visibility ( normalized_entity_type = normalized_entity_type
782+ ,entity_dict = complete_dict )
783+ public_entity = (entity_scope is DataVisibilityEnum .PUBLIC )
784+
785+ # Set a variable reflecting the user's authorization by being in the HuBMAP-READ Globus Group
786+ user_authorized = user_in_hubmap_read_group (request = request )
787+
788+ # Get user token from Authorization header
789+ user_token = get_user_token (request )
790+
791+ # For non-public documents, reject the request if the user is not authorized
792+ if not public_entity :
793+ if user_token is None :
794+ forbidden_error ( f"{ normalized_entity_type } for { complete_dict ['uuid' ]} is not"
795+ f" accessible without presenting a token." )
796+ if not user_authorized :
797+ forbidden_error ( f"The requested { normalized_entity_type } has non-public data."
798+ f" A Globus token with access permission is required." )
799+
800+ # We'll need to return all the properties including those generated by
801+ # `on_read_trigger` to have a complete result e.g., the 'next_revision_uuid' and
802+ # 'previous_revision_uuid' being used below.
803+ # Collections, however, will filter out only public properties for return.
804+
805+ # Also normalize the result based on schema
806+ final_result = schema_manager .normalize_entity_result_for_response (complete_dict )
807+
808+ # Identify fields to exclude from non-authorized responses for the entity type.
809+ fields_to_exclude = schema_manager .get_fields_to_exclude (normalized_entity_type )
810+
811+ # Response with the dict
812+ if public_entity and not user_authorized :
813+ final_result = schema_manager .exclude_properties_from_response (fields_to_exclude , final_result )
814+
815+ # Retrieve the associated data for the entity, and add it to the expanded dictionary.
816+ associated_organ_list = _get_dataset_associated_data ( dataset_dict = final_result
817+ , dataset_visibility = entity_scope
818+ , valid_user_token = user_token
819+ , request = request
820+ , associated_data = 'Organs' )
821+ final_result ['organs' ] = associated_organ_list
822+
823+ associated_sample_list = _get_dataset_associated_data ( dataset_dict = final_result
824+ , dataset_visibility = entity_scope
825+ , valid_user_token = user_token
826+ , request = request
827+ , associated_data = 'Samples' )
828+ final_result ['samples' ] = associated_sample_list
829+
830+ associated_donor_list = _get_dataset_associated_data ( dataset_dict = final_result
831+ , dataset_visibility = entity_scope
832+ , valid_user_token = user_token
833+ , request = request
834+ , associated_data = 'Donors' )
835+
836+ final_result ['donors' ] = associated_donor_list
837+
838+ # Return JSON for the dictionary containing the entity metadata as well as metadata for the associated data.
839+ return jsonify (final_result )
697840
698841"""
699842Retrieve the metadata information of a given entity by id
0 commit comments