Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.4.2
2.4.3
7 changes: 0 additions & 7 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,13 +797,6 @@ def get_entity_by_id(id):
# Response with the dict
if public_entity and not user_in_hubmap_read_group(request):
final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
if normalized_entity_type == 'Collection':
for i, dataset in enumerate(final_result.get('datasets', [])):
if _get_entity_visibility(normalized_entity_type='Dataset', entity_dict=dataset) != DataVisibilityEnum.PUBLIC or user_in_hubmap_read_group(request):
# If the dataset is non-public, or if the user has read-group access, there is no need to remove fields, continue to the next dataset
continue
dataset_excluded_fields = schema_manager.get_fields_to_exclude('Dataset')
final_result.get('datasets')[i] = schema_manager.exclude_properties_from_response(dataset_excluded_fields, dataset)
return jsonify(final_result)

"""
Expand Down
9 changes: 0 additions & 9 deletions src/dev_entity_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,15 +329,6 @@ def _get_entity_by_id_for_auth_level(self, entity_id:Annotated[str, 32], valid_u
#if public_entity and not user_in_hubmap_read_group(request):
if public_entity and not user_authorized:
final_result = self.schemaMgr.exclude_properties_from_response(fields_to_exclude, final_result)
if normalized_entity_type == 'Collection':
for i, dataset in enumerate(final_result.get('datasets', [])):
if self._get_entity_visibility( entity_dict=dataset) != DataVisibilityEnum.PUBLIC \
or user_authorized: # or user_in_hubmap_read_group(request):
# If the dataset is public, or if the user has read-group access, there is
# no need to remove fields, continue to the next dataset
continue
dataset_excluded_fields = self.schemaMgr.get_fields_to_exclude('Dataset')
final_result.get('datasets')[i] = self.schemaMgr.exclude_properties_from_response(dataset_excluded_fields, dataset)
return final_result

'''
Expand Down
30 changes: 26 additions & 4 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,11 @@ shared_entity_properties: &shared_entity_properties
ENTITIES:
############################################# Collection #############################################
Collection:
excluded_properties_from_public_response:
- datasets:
- lab_dataset_id
- metadata:
- lab_id
# Collection can not be derivation source but not target
derivation:
source: false
Expand Down Expand Up @@ -304,6 +309,15 @@ ENTITIES:
- lab_dataset_id
- metadata:
- lab_id
- direct_ancestors:
# Sample ancestors of a Dataset must have these fields removed
- lab_tissue_sample_id
- submission_id
# Dataset ancestors of a Dataset must have these fields removed
- lab_dataset_id
# Both Sample and Dataset ancestors of a Dataset must have these fields removed
- metadata:
- lab_id
derivation:
source: true
target: true
Expand Down Expand Up @@ -667,10 +681,6 @@ ENTITIES:
description: "The activity that was performed."
dataset_type:
before_create_trigger: set_publication_dataset_type
before_property_create_validators:
- validate_recognized_dataset_type
before_property_update_validators:
- validate_recognized_dataset_type
type: string
generated: true
immutable: true
Expand Down Expand Up @@ -874,6 +884,18 @@ ENTITIES:
excluded_properties_from_public_response:
- lab_tissue_sample_id
- submission_id
- metadata:
- lab_id
- direct_ancestor:
# Donor ancestors of a Sample must have these fields removed
- lab_donor_id
- label
# Sample ancestors of a Sample must have these fields removed
- lab_tissue_sample_id
- metadata:
- lab_id
# Both Sample and Donor ancestors of a Sample must have these fields removed
- submission_id
properties:
<<: *shared_properties
<<: *shared_entity_properties
Expand Down
2 changes: 1 addition & 1 deletion src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class SchemaConstants(object):

DOI_BASE_URL = 'https://doi.org/'

DATASETS_OMITTED_FIELDS = ['ingest_metadata', 'metadata', 'files']
OMITTED_FIELDS = ['ingest_metadata', 'files']

# Define an enumeration to classify an entity's visibility, which can be combined with
# authorization info when verify operations on a request.
Expand Down
17 changes: 15 additions & 2 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,13 +306,26 @@ def delete_nested_field(data, nested_path):
if isinstance(value, list):
for nested_field in value:
if isinstance(nested_field, dict):
delete_nested_field(data[key], nested_field)
if isinstance(data[key], list):
for item in data[key]:
delete_nested_field(item, nested_field)
else:
delete_nested_field(data[key], nested_field)
elif isinstance(data[key], list):
for item in data[key]:
if nested_field in item:
del item[nested_field]
elif nested_field in data[key]:
del data[key][nested_field]
elif isinstance(value, dict):
delete_nested_field(data[key], value)
elif nested_path in data:
del data[nested_path]
if isinstance(data[nested_path], list):
for item in data[nested_path]:
if nested_path in item:
del item[nested_path]
else:
del data[nested_path]

for field in excluded_fields:
delete_nested_field(output_dict, field)
Expand Down
24 changes: 12 additions & 12 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type):
"""
def get_children(neo4j_driver, uuid, property_key = None):
results = []

fields_to_omit = SchemaConstants.OMITTED_FIELDS
if property_key:
query = (f"MATCH (e:Entity)-[:ACTIVITY_INPUT]->(:Activity)-[:ACTIVITY_OUTPUT]->(child:Entity) "
# The target entity can't be a Lab
Expand All @@ -178,7 +178,7 @@ def get_children(neo4j_driver, uuid, property_key = None):
f"WHERE e.uuid='{uuid}' AND e.entity_type <> 'Lab' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(child)) AS {record_field_name}")
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(child), apoc.map.removeKeys(properties(child), {fields_to_omit})))) AS {record_field_name}")

logger.info("======get_children() query======")
logger.info(query)
Expand All @@ -193,7 +193,7 @@ def get_children(neo4j_driver, uuid, property_key = None):
else:
# Convert the list of nodes to a list of dicts
results = nodes_to_dicts(record[record_field_name])

return results


Expand All @@ -216,7 +216,7 @@ def get_children(neo4j_driver, uuid, property_key = None):
"""
def get_parents(neo4j_driver, uuid, property_key = None):
results = []

fields_to_omit = SchemaConstants.OMITTED_FIELDS
if property_key:
query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
# Filter out the Lab entities
Expand All @@ -230,7 +230,7 @@ def get_parents(neo4j_driver, uuid, property_key = None):
f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(parent)) AS {record_field_name}")
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(parent), apoc.map.removeKeys(properties(parent), {fields_to_omit})))) AS {record_field_name}")

logger.info("======get_parents() query======")
logger.info(query)
Expand Down Expand Up @@ -380,7 +380,7 @@ def get_tuplets(neo4j_driver, uuid, property_key=None):
"""
def get_ancestors(neo4j_driver, uuid, property_key = None):
results = []

fields_to_omit = SchemaConstants.OMITTED_FIELDS
if property_key:
query = (f"MATCH (e:Entity)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(ancestor:Entity) "
# Filter out the Lab entities
Expand All @@ -394,7 +394,7 @@ def get_ancestors(neo4j_driver, uuid, property_key = None):
f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(ancestor)) AS {record_field_name}")
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(ancestor), apoc.map.removeKeys(properties(ancestor), {fields_to_omit})))) AS {record_field_name}")

logger.info("======get_ancestors() query======")
logger.info(query)
Expand Down Expand Up @@ -431,7 +431,7 @@ def get_ancestors(neo4j_driver, uuid, property_key = None):
"""
def get_descendants(neo4j_driver, uuid, property_key = None):
results = []

fields_to_omit = SchemaConstants.OMITTED_FIELDS
if property_key:
query = (f"MATCH (e:Entity)-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(descendant:Entity) "
# The target entity can't be a Lab
Expand All @@ -445,7 +445,7 @@ def get_descendants(neo4j_driver, uuid, property_key = None):
f"WHERE e.uuid='{uuid}' AND e.entity_type <> 'Lab' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(descendant)) AS {record_field_name}")
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(descendant), apoc.map.removeKeys(properties(descendant), {fields_to_omit})))) AS {record_field_name}")

logger.info("======get_descendants() query======")
logger.info(query)
Expand All @@ -460,7 +460,7 @@ def get_descendants(neo4j_driver, uuid, property_key = None):
else:
# Convert the list of nodes to a list of dicts
results = nodes_to_dicts(record[record_field_name])

return results


Expand Down Expand Up @@ -1185,7 +1185,7 @@ def get_dataset_upload(neo4j_driver, uuid):
def get_collection_datasets(neo4j_driver, uuid):
results = []

fields_to_omit = SchemaConstants.DATASETS_OMITTED_FIELDS
fields_to_omit = SchemaConstants.OMITTED_FIELDS
query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) "
f"WHERE c.uuid = '{uuid}' "
f"RETURN COLLECT(apoc.create.vNode(labels(e), apoc.map.removeKeys(properties(e), {fields_to_omit}))) AS {record_field_name}")
Expand Down Expand Up @@ -1391,7 +1391,7 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list):
"""
def get_upload_datasets(neo4j_driver, uuid, property_key = None):
results = []
fields_to_omit = SchemaConstants.DATASETS_OMITTED_FIELDS
fields_to_omit = SchemaConstants.OMITTED_FIELDS
if property_key:
query = (f"MATCH (e:Dataset)-[:IN_UPLOAD]->(s:Upload) "
f"WHERE s.uuid = '{uuid}' "
Expand Down
3 changes: 2 additions & 1 deletion src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def validate_recognized_dataset_type(property_key, normalized_entity_type, reque
# those square brackets are acceptable at the end of the string. Simply validate the start.
proposed_dataset_type_prefix = re.sub(pattern='(\S)\s\[.*\]$', repl=r'\1', string=new_data_dict['dataset_type'])
target_list = schema_manager.get_dataset_type_valueset_list()

# TODO This is a temporary bypass because the UBKG does not support publication as a dataset_type yet. Remove once its added
target_list.append("Publication")
if proposed_dataset_type_prefix not in target_list:
raise ValueError(f"Proposed Dataset dataset_type '{proposed_dataset_type_prefix}'"
f" is not recognized in the existing ontology."
Expand Down
Loading