Skip to content
Merged
42 changes: 32 additions & 10 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@
MEMCACHED_MODE = False
MEMCACHED_PREFIX = 'NONE'

# Read the secret key which may be submitted in HTTP Request Headers to override the lockout of
# updates to entities with characteristics prohibiting their modification.
LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY']

# Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled
requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning)

Expand Down Expand Up @@ -1363,8 +1367,29 @@ def update_entity(id):
# Normalize user provided entity_type
normalized_entity_type = schema_manager.normalize_entity_type(entity_dict['entity_type'])

# Note, we don't support entity level validators on entity update via PUT
# Only entity create via POST is supported at the entity level
# Execute entity level validator defined in schema yaml before entity modification.
lockout_overridden = False
try:
schema_manager.execute_entity_level_validator(validator_type='before_entity_update_validator'
, normalized_entity_type=normalized_entity_type
, request=request
, existing_entity_dict=entity_dict)
except schema_errors.MissingApplicationHeaderException as e:
bad_request_error(e)
except schema_errors.InvalidApplicationHeaderException as e:
bad_request_error(e)
except schema_errors.LockedEntityUpdateException as leue:
# HTTP header names are case-insensitive, and request.headers.get() returns None if the header doesn't exist
locked_entity_update_header = request.headers.get(SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER)
if locked_entity_update_header and (LOCKED_ENTITY_UPDATE_OVERRIDE_KEY == locked_entity_update_header):
lockout_overridden = True
logger.info(f"For {entity_dict['entity_type']} {entity_dict['uuid']}"
f" update prohibited due to {str(leue)},"
f" but being overridden by valid {SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER} in request.")
else:
forbidden_error(leue)
except Exception as e:
internal_server_error(e)

# Validate request json against the yaml schema
# Pass in the entity_dict for missing required key check, this is different from creating new entity
Expand All @@ -1383,6 +1408,9 @@ def update_entity(id):
ValueError) as e:
bad_request_error(e)

# Proceed with per-entity updates after passing any entity-level or property-level validations which
# would have locked out updates.
#
# Sample, Dataset, and Upload: additional validation, update entity, after_update_trigger
# Collection and Donor: update entity
if normalized_entity_type == 'Sample':
Expand Down Expand Up @@ -1467,13 +1495,6 @@ def update_entity(id):
if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink or has_updated_status:
after_update(normalized_entity_type, user_token, merged_updated_dict)
elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'):
entity_visibility = _get_entity_visibility( normalized_entity_type=normalized_entity_type
,entity_dict=entity_dict)
# Prohibit update of an existing Collection if it meets criteria of being visible to public e.g. has DOI.
if entity_visibility == DataVisibilityEnum.PUBLIC:
logger.info(f"Attempt to update {normalized_entity_type} with id={id} which has visibility {entity_visibility}.")
bad_request_error(f"Cannot update {normalized_entity_type} due '{entity_visibility.value}' visibility.")

# Generate 'before_update_trigger' data and update the entity details in Neo4j
merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict)

Expand Down Expand Up @@ -1539,7 +1560,8 @@ def update_entity(id):
# Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou
# return jsonify(normalized_complete_dict)

return jsonify({'message': f"{normalized_entity_type} of {id} has been updated"})
override_msg = 'Lockout overridden. ' if lockout_overridden else ''
return jsonify({'message': f"{override_msg}{normalized_entity_type} of {id} has been updated"})


"""
Expand Down
5 changes: 5 additions & 0 deletions src/instance/app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ NEO4J_URI = 'bolt://hubmap-neo4j-localhost:7687'
NEO4J_USERNAME = 'neo4j'
NEO4J_PASSWORD = '123'

# Secret value presented with the request header value named by
# SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER, expected to be off the form
# X-HuBMAP-Update-Override: <LOCKED_ENTITY_UPDATE_OVERRIDE_KEY value which follows>
LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = 'set during deployment'

# Set MEMCACHED_MODE to False to disable the caching for local development
MEMCACHED_MODE = True
MEMCACHED_SERVER = 'host:11211'
Expand Down
28 changes: 25 additions & 3 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
# - trigger types: before_create_trigger|after_create_trigger|before_update_trigger|after_update_trigger|on_read_trigger|on_index_trigger, one property can have none (default) or more than one triggers
# - updated_peripherally: a temporary measure to correctly handle any attributes which are potentially updated by multiple triggers

# Entity level validator:
# - types: before_entity_create_validator, a single validation method needed for creating or updating the entity
# Entity level validators:
# - types: before_entity_create_validator - validation method needed for creating an entity.
# before_entity_update_validator - validation method needed for updating an entity.

# Property level validators:
# - types: before_property_create_validators|before_property_update_validators, a list of validation methods
Expand Down Expand Up @@ -192,6 +193,9 @@ shared_entity_properties: &shared_entity_properties
ENTITIES:
############################################# Collection #############################################
Collection:
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
excluded_properties_from_public_response:
- datasets:
- lab_dataset_id
Expand Down Expand Up @@ -304,6 +308,9 @@ ENTITIES:
Dataset:
# Only allowed applications can create new Dataset via POST
before_entity_create_validator: validate_application_header_before_entity_create
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
# Dataset can be either derivation source or target
excluded_properties_from_public_response:
- lab_dataset_id
Expand Down Expand Up @@ -659,6 +666,9 @@ ENTITIES:
superclass: Dataset
# Only allowed applications can create new Publication via POST
before_entity_create_validator: validate_application_header_before_entity_create
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
# Publications can be either derivation source or target
derivation:
source: true
Expand Down Expand Up @@ -763,6 +773,9 @@ ENTITIES:
- lab_donor_id
- submission_id
- label
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
properties:
<<: *shared_properties
<<: *shared_entity_properties
Expand Down Expand Up @@ -896,6 +909,9 @@ ENTITIES:
- lab_id
# Both Sample and Donor ancestors of a Sample must have these fields removed
- submission_id
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
properties:
<<: *shared_properties
<<: *shared_entity_properties
Expand Down Expand Up @@ -1120,7 +1136,10 @@ ENTITIES:
Upload:
# Only allowed applications can create new Upload via POST
before_entity_create_validator: validate_application_header_before_entity_create
# Upload requires an ancestor of Lab, and and has no allowed decesndents
# No before_entity_update_validator needed for Upload because the entity is
# always considered "non-public", and therefore not blocked from update/PUT.
#
# Upload requires a Lab entity as an ancestor, and has no allowed descendants
derivation:
source: false
target: false # Set to false since the schema doesn't handle Lab currently
Expand Down Expand Up @@ -1268,5 +1287,8 @@ ENTITIES:
derivation:
source: false
target: false
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
properties:
<<: *shared_collection_properties
1 change: 1 addition & 0 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class SchemaConstants(object):
COMPONENT_DATASET = 'component-dataset'
INGEST_PIPELINE_APP = 'ingest-pipeline'
HUBMAP_APP_HEADER = 'X-Hubmap-Application'
LOCKED_ENTITY_UPDATE_HEADER = 'X-HuBMAP-Update-Override'
INTERNAL_TRIGGER = 'X-Internal-Trigger'
DATASET_STATUS_PUBLISHED = 'published'

Expand Down
3 changes: 3 additions & 0 deletions src/schema/schema_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,7 @@ class MissingApplicationHeaderException(Exception):
pass

class InvalidApplicationHeaderException(Exception):
pass

class LockedEntityUpdateException(Exception):
pass
60 changes: 40 additions & 20 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1166,13 +1166,15 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex
Parameters
----------
validator_type : str
One of the validator types: before_entity_create_validator
One of the validator types recognized by the validate_entity_level_validator_type() method.
normalized_entity_type : str
One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload, Upload, Publication
request: Flask request object
The instance of Flask request passed in from application request
existing_entity_dict : dict
The dictionary for an entity, retrieved from Neo4j, for use during update/PUT validations
"""
def execute_entity_level_validator(validator_type, normalized_entity_type, request):
def execute_entity_level_validator(validator_type, normalized_entity_type, request, existing_entity_dict=None):
global _schema

# A bit validation
Expand All @@ -1183,23 +1185,41 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque

for key in entity:
if validator_type == key:
validator_method_name = entity[validator_type]
if isinstance(entity[validator_type], str):
validator_method_names = [entity[validator_type]]
else:
# default to expecting a list when not a str
validator_method_names = entity[validator_type]

try:
# Get the target validator method defined in the schema_validators.py module
validator_method_to_call = getattr(schema_validators, validator_method_name)

logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}")

validator_method_to_call(normalized_entity_type, request)
except schema_errors.MissingApplicationHeaderException as e:
raise schema_errors.MissingApplicationHeaderException(e)
except schema_errors.InvalidApplicationHeaderException as e:
raise schema_errors.InvalidApplicationHeaderException(e)
except Exception as e:
msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
for validator_method_name in validator_method_names:
try:
# Get the target validator method defined in the schema_validators.py module
validator_method_to_call = getattr(schema_validators, validator_method_name)

logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}")

# Create a dictionary to hold data need by any entity validator, which must be populated
# with validator specific requirements when the method to be called is determined.
options_dict = {}
if existing_entity_dict is None:
# Execute the entity-level validation for create/POST
options_dict['http_request'] = request
validator_method_to_call(options_dict)
else:
# Execute the entity-level validation for update/PUT
options_dict['existing_entity_dict']= existing_entity_dict
validator_method_to_call(options_dict)
except schema_errors.MissingApplicationHeaderException as e:
raise schema_errors.MissingApplicationHeaderException(e)
except schema_errors.InvalidApplicationHeaderException as e:
raise schema_errors.InvalidApplicationHeaderException(e)
except schema_errors.LockedEntityUpdateException as leue:
raise leue
except Exception as e:
msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
raise e


"""
Expand Down Expand Up @@ -1360,10 +1380,10 @@ def validate_trigger_type(trigger_type:TriggerTypeEnum):
Parameters
----------
validator_type : str
One of the validator types: before_entity_create_validator
Name of an entity-level validator type, which must be listed in accepted_validator_types and found in this schema manager module.
"""
def validate_entity_level_validator_type(validator_type):
accepted_validator_types = ['before_entity_create_validator']
accepted_validator_types = ['before_entity_create_validator', 'before_entity_update_validator']
separator = ', '

if validator_type.lower() not in accepted_validator_types:
Expand Down
Loading