Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
ee75d15
Initial commit introducing /prov-metadata endpoint, EntityWorker, and…
kburke Jan 14, 2025
b0e10df
Misc cleanup of initial commit, and revised Smart API YAML for new /p…
kburke Jan 15, 2025
4f39cf9
Misc cleanup of initial commit, and revised Smart API YAML for new /p…
kburke Jan 15, 2025
8e90a09
Merge remote-tracking branch 'origin/main' into karlburke/NewProvMeta…
kburke Jan 16, 2025
07f9c09
Final comments before PR.
kburke Jan 16, 2025
476bce8
Remove print statements in init() method. Remove support for filterin…
kburke Jan 16, 2025
b16a7ca
Final PR comment requests. Align endpoint name to /datasets/<id>/pro…
kburke Jan 16, 2025
d184617
Merge remote-tracking branch 'origin/main' into karlburke/NewProvMeta…
kburke Jan 17, 2025
e66bc72
Correct various "authorization" code to enable appropriate exclusion …
kburke Jan 17, 2025
0b9ed14
Merge remote-tracking branch 'origin/main' into karlburke/NewProvMeta…
kburke Jan 22, 2025
c367217
Initial changes for https://github.com/hubmapconsortium/entity-api/is…
kburke Jan 22, 2025
fd59a8b
Merge remote-tracking branch 'origin/main' into karlburke/NewProvMeta…
kburke Jan 22, 2025
139ff51
Code review requests for excluding nested fields. Missed content plu…
kburke Jan 22, 2025
9c3e602
Remove reference to submission_id in Collection.datasets YAML
kburke Jan 23, 2025
95806ca
Limit provenance metadata support to only Datasets. Error immediatel…
kburke Jan 23, 2025
dcc4b1b
Replace usage of shadowed id variable with the local entity_id of the…
kburke Jan 23, 2025
9e4ee8c
Remove remnant of property support.
kburke Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion entity-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,30 @@ components:
specimen_tumor_distance_unit:
type: string
description: ""
DatasetProvMetadata:
type: object
allOf:
- $ref: '#/components/schemas/Dataset'
- type: object
properties:
samples:
type: array
items:
$ref: '#/components/schemas/Sample'
readOnly: true
description: 'List of Samples not of sub-type organ for the dataset'
organs:
type: array
items:
$ref: '#/components/schemas/Sample'
readOnly: true
description: 'List of Samples of sub-type organ for the dataset'
donors:
type: array
items:
$ref: '#/components/schemas/Donor'
readOnly: true
description: 'List of Donors for the dataset'
File:
type: object
properties:
Expand Down Expand Up @@ -1924,7 +1948,31 @@ paths:
description: The target entity could not be found
'500':
description: Internal error
'/datasets/{id}/latest-revision':
'/datasets/{id}/prov-metadata':
get:
summary: 'Returns full provenance metadata for a Dataset, which can be used when publishing the Dataset.'
parameters:
- name: id
in: path
description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID (32 digit hexadecimal number)
required: true
schema:
type: string
responses:
'200':
description: Full provenance information for the given dataset as JSON in the Response body.
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetProvMetadata'
'401':
description: The user's token has expired or the user did not supply a valid token
'403':
description: THe user is not authorized to use this method
'500':
description:
Internal error
'/datasets/{id}/latest-revision':
get:
summary: 'Retrive the latest (newest) revision of a given Dataset. Public/Consortium access rules apply - if no token/consortium access then must be for a public dataset and the returned Dataset must be the latest public version. If the given dataset itself is the latest revision, meaning it has no next revisions, this dataset gets returned.'
parameters:
Expand Down Expand Up @@ -2812,3 +2860,4 @@ paths:
description: The given dataset is unpublished and the user does not have the authorization to view it.
'500':
description: Internal error

96 changes: 85 additions & 11 deletions src/app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
import collections
from typing import Callable, List, Optional
from typing import Callable, List, Optional, Annotated
from datetime import datetime
from flask import Flask, g, jsonify, abort, request, Response, redirect, make_response
from neo4j.exceptions import TransactionError
Expand Down Expand Up @@ -38,8 +38,8 @@
from schema.schema_constants import TriggerTypeEnum
from metadata_constraints import get_constraints, constraints_json_is_valid
# from lib.ontology import initialize_ubkg, init_ontology, Ontology, UbkgSDK


from dev_entity_worker import EntityWorker
import dev_entity_exceptions as entityEx

# HuBMAP commons
from hubmap_commons import string_helper
Expand Down Expand Up @@ -248,6 +248,24 @@ def http_internal_server_error(e):
except Exception as s3exception:
logger.critical(s3exception, exc_info=True)

####################################################################################################
## Initialize a "worker" for the service.
## For initial transition to "worker" usage, pass in globals of app.py which would eventually
## be only in the worker and not in app.py.
####################################################################################################
entity_worker = None
try:
entity_worker = EntityWorker( app_config=app.config
, schema_mgr = schema_manager
, memcached_client_instance = memcached_client_instance
, neo4j_driver_instance = neo4j_driver_instance)
if not isinstance(entity_worker, EntityWorker):
raise Exception("Error instantiating a EntityWorker during startup.")
logger.info("EntityWorker instantiated using app.cfg setting.")
except Exception as e:
logger.critical(f"Unable to instantiate a EntityWorker during startup.")
logger.error(e, exc_info=True)

####################################################################################################
## REFERENCE DOI Redirection
####################################################################################################
Expand Down Expand Up @@ -614,6 +632,69 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
entity_visibility = DataVisibilityEnum.PUBLIC
return entity_visibility

'''
Retrieve the full provenance metadata information of a given entity by id, as
produced for metadata.json files.

This endpoint as publicly accessible. Without presenting a token, only data for
published Datasets may be requested.

When a valid token is presented, a member of the HuBMAP-Read Globus group is authorized to
access any Dataset. Otherwise, only access to published Datasets is authorized.

An HTTP 400 Response is returned for reasons described in the error message, such as
requesting data for a non-Dataset.

An HTTP 401 Response is returned when a token is presented that is not valid.

An HTTP 403 Response is returned if user is not authorized to access the Dataset, as described above.

An HTTP 404 Response is returned if the requested Dataset is not found.

Parameters
----------
id : str
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity

Returns
-------
json
Valid JSON for the full provenance metadata of the requested Dataset
'''
@app.route('/datasets/<id>/prov-metadata', methods = ['GET'])
def get_provenance_metadata_by_id_for_auth_level(id:Annotated[str, 32]) -> str:

try:
# Get the user's token from the Request for later authorization to access non-public entities.
# If an invalid token is presented, reject with an HTTP 401 Response.
# N.B. None is a "valid" user_token which may be adequate for access to public data.
user_token = entity_worker.get_request_auth_token(request=request)

# Get the user's token from the Request for later authorization to access non-public entities.
user_info = entity_worker.get_request_user_info_with_groups(request=request)

# Retrieve the expanded metadata for the entity. If authorization of token or group membership
# does not allow access to the entity, exceptions will be raised describing the problem.
expanded_entity_metadata = entity_worker.get_expanded_dataset_metadata( dataset_id=id
, valid_user_token=user_token
, user_info=user_info)
return jsonify(expanded_entity_metadata)
except entityEx.EntityBadRequestException as e_400:
return jsonify({'error': e_400.message}), 400
except entityEx.EntityUnauthorizedException as e_401:
return jsonify({'error': e_401.message}), 401
except entityEx.EntityForbiddenException as e_403:
return jsonify({'error': e_403.message}), 403
except entityEx.EntityNotFoundException as e_404:
return jsonify({'error': e_404.message}), 404
except entityEx.EntityServerErrorException as e_500:
logger.exception(f"An unexpected error occurred during provenance metadata retrieval.")
return jsonify({'error': e_500.message}), 500
except Exception as e:
default_msg = 'An unexpected error occurred retrieving provenance metadata'
logger.exception(default_msg)
return jsonify({'error': default_msg}), 500

"""
Retrieve the metadata information of a given entity by id

Expand Down Expand Up @@ -715,13 +796,6 @@ def get_entity_by_id(id):
# Response with the dict
if public_entity and not user_in_hubmap_read_group(request):
final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
if normalized_entity_type == 'Collection':
for i, dataset in enumerate(final_result.get('datasets', [])):
if _get_entity_visibility(normalized_entity_type='Dataset', entity_dict=dataset) != DataVisibilityEnum.PUBLIC or user_in_hubmap_read_group(request):
# If the dataset is non-public, or if the user has read-group access, there is no need to remove fields, continue to the next dataset
continue
dataset_excluded_fields = schema_manager.get_fields_to_exclude('Dataset')
final_result.get('datasets')[i] = schema_manager.exclude_properties_from_response(dataset_excluded_fields, dataset)
return jsonify(final_result)

"""
Expand Down Expand Up @@ -5690,7 +5764,7 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met
# Without token, the user can only access public collections, modify the collection result
# by only returning public datasets attached to this collection
if isinstance(user_token, Response):
forbidden_error(f"{normalized_entity_type} for {id} is not accessible without presenting a token.")
forbidden_error(f"{normalized_entity_type} for {entity_id} is not accessible without presenting a token.")
else:
# When the groups token is valid, but the user doesn't belong to HuBMAP-READ group
# Or the token is valid but doesn't contain group information (auth token or transfer token)
Expand Down
44 changes: 44 additions & 0 deletions src/dev_entity_exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Exceptions used internally by the service, typically for anticipated exceptions.
# Knowledge of Flask, HTTP codes, and formatting of the Response should be
# closer to the endpoint @app.route() methods rather than throughout service.
class EntityConfigurationException(Exception):
"""Exception raised when problems loading the service configuration are encountered."""
def __init__(self, message='There were problems loading the configuration for the service.'):
self.message = message
super().__init__(self.message)

class EntityRequestAuthorizationException(Exception):
"""Exception raised for authorization info on a Request."""
def __init__(self, message='Request authorization problem.'):
self.message = message
super().__init__(self.message)

class EntityUnauthorizedException(Exception):
"""Exception raised when authorization for a resource fails."""
def __init__(self, message='Authorization failed.'):
self.message = message
super().__init__(self.message)

class EntityForbiddenException(Exception):
"""Exception raised when authorization for a resource is forbidden."""
def __init__(self, message='Access forbidden.'):
self.message = message
super().__init__(self.message)

class EntityNotFoundException(Exception):
"""Exception raised when entity retrieval returns no results."""
def __init__(self, message='Not found.'):
self.message = message
super().__init__(self.message)

class EntityBadRequestException(Exception):
"""Exception raised when entity retrieval is flagged as a bad request."""
def __init__(self, message='Bad request.'):
self.message = message
super().__init__(self.message)

class EntityServerErrorException(Exception):
"""Exception raised when entity retrieval causes an internal server error."""
def __init__(self, message='Internal server error.'):
self.message = message
super().__init__(self.message)
Loading
Loading