diff --git a/src/app.py b/src/app.py index 777043a2..eb5a53c1 100644 --- a/src/app.py +++ b/src/app.py @@ -3743,6 +3743,16 @@ def sankey_data(): HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type' HEADER_DATASET_STATUS = 'dataset_status' + public_only = False + + # Token is not required, but if an invalid token provided, + # we need to tell the client with a 401 error + validate_token_if_auth_header_exists(request) + try: + token = get_user_token(request, non_public_access_required=True) + except Exception: + public_only = True + # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description # because that would require using a urllib request for each dataset organ_types_dict = schema_manager.get_organ_types() @@ -3761,15 +3771,17 @@ def sankey_data(): logger.info(f'Sankey data cache not found or expired. Making a new data fetch at time {datetime.now()}') # Call to app_neo4j_queries to prepare and execute the database query - sankey_info = app_neo4j_queries.get_sankey_info(neo4j_driver_instance) + sankey_info = app_neo4j_queries.get_sankey_info(neo4j_driver_instance, public_only) for dataset in sankey_info: internal_dict = collections.OrderedDict() internal_dict[HEADER_DATASET_GROUP_NAME] = dataset[HEADER_DATASET_GROUP_NAME] - - organ_code = dataset[HEADER_ORGAN_TYPE].upper() - validate_organ_code(organ_code) - - internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[organ_code].lower() + organ_list = [] + for organ in dataset[HEADER_ORGAN_TYPE]: + organ_code = organ.upper() + validate_organ_code(organ_code) + organ_type = organ_types_dict[organ_code].lower() + organ_list.append(organ_type) + internal_dict[HEADER_ORGAN_TYPE] = organ_list internal_dict[HEADER_DATASET_DATASET_TYPE] = dataset[HEADER_DATASET_DATASET_TYPE] @@ -4005,8 +4017,12 @@ def multiple_components(): validate_token_if_auth_header_exists(request) # Get user token from Authorization header user_token = get_user_token(request) + # Create a dictionary as required to use an entity validator. Ignore the + # options_dict['existing_entity_dict'] support for PUT requests, since this + # @app.route() only supports POST. + options_dict = {'http_request': request} try: - schema_validators.validate_application_header_before_entity_create("Dataset", request) + schema_validators.validate_application_header_before_entity_create(options_dict=options_dict) except Exception as e: bad_request_error(str(e)) require_json(request) @@ -5326,37 +5342,54 @@ def require_json(request): """ def delete_cache(id): if MEMCACHED_MODE: - # First delete the target entity cache entity_dict = query_target_entity(id, get_internal_token()) entity_uuid = entity_dict['uuid'] - - # Delete the cache of all the descendants - descendant_uuids = schema_neo4j_queries.get_descendants(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Collection, delete the cache for each of its associated - # Datasets and Publications (via [:IN_COLLECTION]) as well as just Publications (via [:USES_DATA]) + entity_type = entity_dict['entity_type'] + descendant_uuids = [] + collection_dataset_uuids = [] + upload_dataset_uuids = [] + collection_uuids = [] + dataset_upload_dict = {} + publication_collection_dict = {} + + # Determine the associated cache keys based on the entity type + # To reduce unnecessary Neo4j lookups that may cause timeout on the PUT call + + # For Donor/Datasets/Sample/Publication, delete the cache of all the descendants + if entity_type in ['Donor', 'Sample', 'Dataset', 'Publication']: + descendant_uuids = schema_neo4j_queries.get_descendants(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Collection/Epicollection, delete the cache for each of its associated datasets (via [:IN_COLLECTION]) + if schema_manager.entity_type_instanceof(entity_type, 'Collection'): + collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD]) + if entity_type == 'Upload': + upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Dataset, delete the associated Collections cache and single Upload cache + if entity_type == 'Dataset': + collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') + dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) + + # For Publication, delete cache of the associated collection # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. # Still keep it in the code until further decision - Zhou - collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD] relationship) - upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Datasets/Publication, delete the associated Collections cache, Upload cache - collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') - collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) - upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) - + if entity_type == 'Publication': + publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) + # We only use uuid in the cache key acorss all the cache types uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids - # It's possible no linked collection or upload - if collection_dict: - uuids_list.append(collection_dict['uuid']) + # It's possible the target dataset has no linked upload + if dataset_upload_dict: + uuids_list.append(dataset_upload_dict['uuid']) - if upload_dict: - uuids_list.append(upload_dict['uuid']) + # It's possible the target publicaiton has no associated collection + if publication_collection_dict: + uuids_list.append(publication_collection_dict['uuid']) + # Final batch delete schema_manager.delete_memcached_cache(uuids_list) diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index b127c122..220df6b4 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -877,11 +877,18 @@ def get_all_dataset_samples(neo4j_driver, dataset_uuid): neo4j_driver : neo4j.Driver object The neo4j database connection pool """ -def get_sankey_info(neo4j_driver): - query = (f"MATCH (ds:Dataset)<-[]-(a)<-[]-(:Sample)" - # specimen_type -> sample_category 12/15/2022 - f"MATCH (donor)-[:ACTIVITY_INPUT]->(oa)-[:ACTIVITY_OUTPUT]->(organ:Sample {{sample_category:'organ'}})-[*]->(ds)" - f"RETURN distinct ds.group_name, organ.organ, ds.dataset_type, ds.status, ds. uuid order by ds.group_name") +def get_sankey_info(neo4j_driver, public_only): + public_only_query = " " + if public_only: + public_only_query = f"AND toLower(ds.status) = 'published' " + query = (f"MATCH (donor:Donor)-[:ACTIVITY_INPUT]->(organ_activity:Activity)-[:ACTIVITY_OUTPUT]-> " + f"(organ:Sample {{sample_category:'organ'}})-[*]->(a:Activity)-[:ACTIVITY_OUTPUT]->(ds:Dataset) " + f"WHERE toLower(a.creation_action) = 'create dataset activity' " + f"AND NOT (ds)<-[:REVISION_OF]-(:Entity) " + f"{public_only_query} " + f"RETURN DISTINCT ds.group_name, COLLECT(DISTINCT organ.organ), ds.dataset_type, ds.status, ds.uuid " + f"ORDER BY ds.group_name") + logger.info("======get_sankey_info() query======") logger.info(query) with neo4j_driver.session() as session: diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 81272c03..244277f6 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -24,10 +24,10 @@ Parameters ---------- -normalized_type : str - One of the types defined in the schema yaml: Dataset, Upload -request: Flask request - The instance of Flask request passed in from application request +options_dict : dict + A dictionary of data needed by this entity-level validator based upon the create/POST or + update/PUT actions. The dictionary will always have 'http_request' and will have + 'existing_entity_dict' for a PUT request. """ def validate_application_header_before_entity_create(options_dict): if 'http_request' in options_dict: @@ -52,10 +52,10 @@ def validate_application_header_before_entity_create(options_dict): Parameters ---------- -normalized_type : str - One of the types defined in the schema yaml: Dataset, Upload -request: Flask request - The instance of Flask request passed in from application request +options_dict : dict + A dictionary of data needed by this entity-level validator based upon the create/POST or + update/PUT actions. The dictionary will always have 'http_request' and will have + 'existing_entity_dict' for a PUT request. """ def validate_entity_not_locked_before_update(options_dict): if 'existing_entity_dict' in options_dict: