Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.4.7
2.4.8
10 changes: 8 additions & 2 deletions entity-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ components:
- block
- section
- suspension
description: 'A code representing the type of specimen. Must be an organ, block, section, or suspension'
description: 'A code representing the type of specimen. Must be an organ, block, section, or suspension, in all lower case.'
protocol_url:
type: string
description: 'The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared.'
Expand Down Expand Up @@ -482,7 +482,7 @@ components:
- TR
- UR
- UT
description: 'Organ code specifier, only set if sample_category == organ. Valid values found in: [organ types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml)'
description: 'Organ code specifier, only set if sample_category == organ. Valid values found in: [organ types](https://ontology.api.hubmapconsortium.org/organs/by-code?application_context=HUBMAP)'
organ_other:
type: string
description: The organ type provided by the user if "other" organ type is selected
Expand Down Expand Up @@ -903,6 +903,12 @@ components:
$ref: '#/components/schemas/Dataset'
readOnly: true
description: 'The datasets that are contained in this Upload.'
anticipated_complete_upload_month:
type: string
description: 'The month that the Upload is anticipated to have all required data uploaded, in the format YYYY-MM.'
anticipated_dataset_count:
type: integer
description: 'The total number of datasets that this Upload will eventually contain.'
Collection:
type: object
properties:
Expand Down
54 changes: 42 additions & 12 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@
MEMCACHED_MODE = False
MEMCACHED_PREFIX = 'NONE'

# Read the secret key which may be submitted in HTTP Request Headers to override the lockout of
# updates to entities with characteristics prohibiting their modification.
LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY']

# Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled
requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning)

Expand Down Expand Up @@ -1272,7 +1276,15 @@ def create_multiple_samples(count):
# No need to log the validation errors
bad_request_error(str(e))

# `direct_ancestor_uuid` is required on create
try:
schema_manager.execute_property_level_validators('before_property_create_validators', normalized_entity_type, request, {}, json_data_dict)
# Currently only ValueError
except ValueError as e:
bad_request_error(e)
except schema_errors.UnimplementedValidatorException as uve:
internal_server_error(uve)

# `direct_ancestor_uuid` is required on create for a Sample.
# Check existence of the direct ancestor (either another Sample or Donor)
direct_ancestor_dict = query_target_entity(json_data_dict['direct_ancestor_uuid'], user_token)

Expand All @@ -1288,7 +1300,7 @@ def create_multiple_samples(count):
if ('organ' not in json_data_dict) or (not json_data_dict['organ']):
bad_request_error("A valid organ code is required since the direct ancestor is a Donor")

# Generate 'before_create_triiger' data and create the entity details in Neo4j
# Generate 'before_create_trigger' data and create the entity details in Neo4j
generated_ids_dict_list = create_multiple_samples_details(request, normalized_entity_type, user_token, json_data_dict, count)

# Also index the each new Sample node in elasticsearch via search-api
Expand Down Expand Up @@ -1355,8 +1367,29 @@ def update_entity(id):
# Normalize user provided entity_type
normalized_entity_type = schema_manager.normalize_entity_type(entity_dict['entity_type'])

# Note, we don't support entity level validators on entity update via PUT
# Only entity create via POST is supported at the entity level
# Execute entity level validator defined in schema yaml before entity modification.
lockout_overridden = False
try:
schema_manager.execute_entity_level_validator(validator_type='before_entity_update_validator'
, normalized_entity_type=normalized_entity_type
, request=request
, existing_entity_dict=entity_dict)
except schema_errors.MissingApplicationHeaderException as e:
bad_request_error(e)
except schema_errors.InvalidApplicationHeaderException as e:
bad_request_error(e)
except schema_errors.LockedEntityUpdateException as leue:
# HTTP header names are case-insensitive, and request.headers.get() returns None if the header doesn't exist
locked_entity_update_header = request.headers.get(SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER)
if locked_entity_update_header and (LOCKED_ENTITY_UPDATE_OVERRIDE_KEY == locked_entity_update_header):
lockout_overridden = True
logger.info(f"For {entity_dict['entity_type']} {entity_dict['uuid']}"
f" update prohibited due to {str(leue)},"
f" but being overridden by valid {SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER} in request.")
else:
forbidden_error(leue)
except Exception as e:
internal_server_error(e)

# Validate request json against the yaml schema
# Pass in the entity_dict for missing required key check, this is different from creating new entity
Expand All @@ -1375,6 +1408,9 @@ def update_entity(id):
ValueError) as e:
bad_request_error(e)

# Proceed with per-entity updates after passing any entity-level or property-level validations which
# would have locked out updates.
#
# Sample, Dataset, and Upload: additional validation, update entity, after_update_trigger
# Collection and Donor: update entity
if normalized_entity_type == 'Sample':
Expand Down Expand Up @@ -1459,13 +1495,6 @@ def update_entity(id):
if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink or has_updated_status:
after_update(normalized_entity_type, user_token, merged_updated_dict)
elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'):
entity_visibility = _get_entity_visibility( normalized_entity_type=normalized_entity_type
,entity_dict=entity_dict)
# Prohibit update of an existing Collection if it meets criteria of being visible to public e.g. has DOI.
if entity_visibility == DataVisibilityEnum.PUBLIC:
logger.info(f"Attempt to update {normalized_entity_type} with id={id} which has visibility {entity_visibility}.")
bad_request_error(f"Cannot update {normalized_entity_type} due '{entity_visibility.value}' visibility.")

# Generate 'before_update_trigger' data and update the entity details in Neo4j
merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict)

Expand Down Expand Up @@ -1531,7 +1560,8 @@ def update_entity(id):
# Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou
# return jsonify(normalized_complete_dict)

return jsonify({'message': f"{normalized_entity_type} of {id} has been updated"})
override_msg = 'Lockout overridden. ' if lockout_overridden else ''
return jsonify({'message': f"{override_msg}{normalized_entity_type} of {id} has been updated"})


"""
Expand Down
5 changes: 5 additions & 0 deletions src/instance/app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ NEO4J_URI = 'bolt://hubmap-neo4j-localhost:7687'
NEO4J_USERNAME = 'neo4j'
NEO4J_PASSWORD = '123'

# Secret value presented with the request header value named by
# SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER, expected to be off the form
# X-HuBMAP-Update-Override: <LOCKED_ENTITY_UPDATE_OVERRIDE_KEY value which follows>
LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = 'set during deployment'

# Set MEMCACHED_MODE to False to disable the caching for local development
MEMCACHED_MODE = True
MEMCACHED_SERVER = 'host:11211'
Expand Down
46 changes: 43 additions & 3 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
# - trigger types: before_create_trigger|after_create_trigger|before_update_trigger|after_update_trigger|on_read_trigger|on_index_trigger, one property can have none (default) or more than one triggers
# - updated_peripherally: a temporary measure to correctly handle any attributes which are potentially updated by multiple triggers

# Entity level validator:
# - types: before_entity_create_validator, a single validation method needed for creating or updating the entity
# Entity level validators:
# - types: before_entity_create_validator - validation method needed for creating an entity.
# before_entity_update_validator - validation method needed for updating an entity.

# Property level validators:
# - types: before_property_create_validators|before_property_update_validators, a list of validation methods
Expand Down Expand Up @@ -192,6 +193,9 @@ shared_entity_properties: &shared_entity_properties
ENTITIES:
############################################# Collection #############################################
Collection:
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
excluded_properties_from_public_response:
- datasets:
- lab_dataset_id
Expand Down Expand Up @@ -304,6 +308,9 @@ ENTITIES:
Dataset:
# Only allowed applications can create new Dataset via POST
before_entity_create_validator: validate_application_header_before_entity_create
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
# Dataset can be either derivation source or target
excluded_properties_from_public_response:
- lab_dataset_id
Expand Down Expand Up @@ -659,6 +666,9 @@ ENTITIES:
superclass: Dataset
# Only allowed applications can create new Publication via POST
before_entity_create_validator: validate_application_header_before_entity_create
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
# Publications can be either derivation source or target
derivation:
source: true
Expand Down Expand Up @@ -763,6 +773,9 @@ ENTITIES:
- lab_donor_id
- submission_id
- label
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
properties:
<<: *shared_properties
<<: *shared_entity_properties
Expand Down Expand Up @@ -896,6 +909,9 @@ ENTITIES:
- lab_id
# Both Sample and Donor ancestors of a Sample must have these fields removed
- submission_id
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
properties:
<<: *shared_properties
<<: *shared_entity_properties
Expand Down Expand Up @@ -1120,7 +1136,10 @@ ENTITIES:
Upload:
# Only allowed applications can create new Upload via POST
before_entity_create_validator: validate_application_header_before_entity_create
# Upload requires an ancestor of Lab, and and has no allowed decesndents
# No before_entity_update_validator needed for Upload because the entity is
# always considered "non-public", and therefore not blocked from update/PUT.
#
# Upload requires a Lab entity as an ancestor, and has no allowed descendants
derivation:
source: false
target: false # Set to false since the schema doesn't handle Lab currently
Expand Down Expand Up @@ -1241,6 +1260,24 @@ ENTITIES:
type: string
indexed: true
description: The organ code representing the organ type that the data contained in the upload will be registered/associated with.
anticipated_complete_upload_month:
type: string
indexed: true
description: The specific month the Upload is anticipated to have all required data uploaded, in the format YYYY-MM.
required_on_create: false
before_property_create_validators:
- validate_anticipated_complete_date
before_property_update_validators:
- validate_anticipated_complete_date
anticipated_dataset_count:
type: integer
indexed: true
description: The total number of datasets that this Upload will eventually contain.
required_on_create: false
before_property_create_validators:
- validate_anticipated_dataset_count
before_property_update_validators:
- validate_anticipated_dataset_count

############################################# EPICollection #############################################
Epicollection:
Expand All @@ -1250,5 +1287,8 @@ ENTITIES:
derivation:
source: false
target: false
before_entity_update_validator:
# Halt modification of entities which are "locked", such as a Dataset with status == 'Published'
- validate_entity_not_locked_before_update
properties:
<<: *shared_collection_properties
1 change: 1 addition & 0 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class SchemaConstants(object):
COMPONENT_DATASET = 'component-dataset'
INGEST_PIPELINE_APP = 'ingest-pipeline'
HUBMAP_APP_HEADER = 'X-Hubmap-Application'
LOCKED_ENTITY_UPDATE_HEADER = 'X-HuBMAP-Update-Override'
INTERNAL_TRIGGER = 'X-Internal-Trigger'
DATASET_STATUS_PUBLISHED = 'published'

Expand Down
3 changes: 3 additions & 0 deletions src/schema/schema_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,7 @@ class MissingApplicationHeaderException(Exception):
pass

class InvalidApplicationHeaderException(Exception):
pass

class LockedEntityUpdateException(Exception):
pass
60 changes: 40 additions & 20 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1166,13 +1166,15 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex
Parameters
----------
validator_type : str
One of the validator types: before_entity_create_validator
One of the validator types recognized by the validate_entity_level_validator_type() method.
normalized_entity_type : str
One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload, Upload, Publication
request: Flask request object
The instance of Flask request passed in from application request
existing_entity_dict : dict
The dictionary for an entity, retrieved from Neo4j, for use during update/PUT validations
"""
def execute_entity_level_validator(validator_type, normalized_entity_type, request):
def execute_entity_level_validator(validator_type, normalized_entity_type, request, existing_entity_dict=None):
global _schema

# A bit validation
Expand All @@ -1183,23 +1185,41 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque

for key in entity:
if validator_type == key:
validator_method_name = entity[validator_type]
if isinstance(entity[validator_type], str):
validator_method_names = [entity[validator_type]]
else:
# default to expecting a list when not a str
validator_method_names = entity[validator_type]

try:
# Get the target validator method defined in the schema_validators.py module
validator_method_to_call = getattr(schema_validators, validator_method_name)

logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}")

validator_method_to_call(normalized_entity_type, request)
except schema_errors.MissingApplicationHeaderException as e:
raise schema_errors.MissingApplicationHeaderException(e)
except schema_errors.InvalidApplicationHeaderException as e:
raise schema_errors.InvalidApplicationHeaderException(e)
except Exception as e:
msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
for validator_method_name in validator_method_names:
try:
# Get the target validator method defined in the schema_validators.py module
validator_method_to_call = getattr(schema_validators, validator_method_name)

logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}")

# Create a dictionary to hold data need by any entity validator, which must be populated
# with validator specific requirements when the method to be called is determined.
options_dict = {}
if existing_entity_dict is None:
# Execute the entity-level validation for create/POST
options_dict['http_request'] = request
validator_method_to_call(options_dict)
else:
# Execute the entity-level validation for update/PUT
options_dict['existing_entity_dict']= existing_entity_dict
validator_method_to_call(options_dict)
except schema_errors.MissingApplicationHeaderException as e:
raise schema_errors.MissingApplicationHeaderException(e)
except schema_errors.InvalidApplicationHeaderException as e:
raise schema_errors.InvalidApplicationHeaderException(e)
except schema_errors.LockedEntityUpdateException as leue:
raise leue
except Exception as e:
msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
raise e


"""
Expand Down Expand Up @@ -1360,10 +1380,10 @@ def validate_trigger_type(trigger_type:TriggerTypeEnum):
Parameters
----------
validator_type : str
One of the validator types: before_entity_create_validator
Name of an entity-level validator type, which must be listed in accepted_validator_types and found in this schema manager module.
"""
def validate_entity_level_validator_type(validator_type):
accepted_validator_types = ['before_entity_create_validator']
accepted_validator_types = ['before_entity_create_validator', 'before_entity_update_validator']
separator = ', '

if validator_type.lower() not in accepted_validator_types:
Expand Down
Loading