From 09a9076eb751359f35df4cb60f9146c26a64b046 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 14 Jan 2025 03:37:32 -0500 Subject: [PATCH 1/3] modified omitted fields to not include metadata. Modified /ancestors /parents /children and /descendants to remove items from omitted fields from result --- src/schema/schema_constants.py | 2 +- src/schema/schema_neo4j_queries.py | 31 ++++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py index ad0a35a7..30cc0279 100644 --- a/src/schema/schema_constants.py +++ b/src/schema/schema_constants.py @@ -22,7 +22,7 @@ class SchemaConstants(object): DOI_BASE_URL = 'https://doi.org/' - DATASETS_OMITTED_FIELDS = ['ingest_metadata', 'metadata', 'files'] + OMITTED_FIELDS = ['ingest_metadata', 'files'] # Define an enumeration to classify an entity's visibility, which can be combined with # authorization info when verify operations on a request. diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 781dc624..a91408ca 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -164,7 +164,7 @@ def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type): """ def get_children(neo4j_driver, uuid, property_key = None): results = [] - + fields_to_omit = SchemaConstants.OMITTED_FIELDS if property_key: query = (f"MATCH (e:Entity)-[:ACTIVITY_INPUT]->(:Activity)-[:ACTIVITY_OUTPUT]->(child:Entity) " # The target entity can't be a Lab @@ -193,7 +193,10 @@ def get_children(neo4j_driver, uuid, property_key = None): else: # Convert the list of nodes to a list of dicts results = nodes_to_dicts(record[record_field_name]) - + if fields_to_omit: + for node_dict in results: + for field in fields_to_omit: + node_dict.pop(field, None) return results @@ -216,7 +219,7 @@ def get_children(neo4j_driver, uuid, property_key = None): """ def get_parents(neo4j_driver, uuid, property_key = None): results = [] - + fields_to_omit = SchemaConstants.OMITTED_FIELDS if property_key: query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) " # Filter out the Lab entities @@ -245,6 +248,10 @@ def get_parents(neo4j_driver, uuid, property_key = None): else: # Convert the list of nodes to a list of dicts results = nodes_to_dicts(record[record_field_name]) + if fields_to_omit: + for node_dict in results: + for field in fields_to_omit: + node_dict.pop(field, None) return results @@ -380,7 +387,7 @@ def get_tuplets(neo4j_driver, uuid, property_key=None): """ def get_ancestors(neo4j_driver, uuid, property_key = None): results = [] - + fields_to_omit = SchemaConstants.OMITTED_FIELDS if property_key: query = (f"MATCH (e:Entity)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(ancestor:Entity) " # Filter out the Lab entities @@ -410,6 +417,11 @@ def get_ancestors(neo4j_driver, uuid, property_key = None): # Convert the list of nodes to a list of dicts results = nodes_to_dicts(record[record_field_name]) + if fields_to_omit: + for node_dict in results: + for field in fields_to_omit: + node_dict.pop(field, None) + return results """ @@ -431,7 +443,7 @@ def get_ancestors(neo4j_driver, uuid, property_key = None): """ def get_descendants(neo4j_driver, uuid, property_key = None): results = [] - + fields_to_omit = SchemaConstants.OMITTED_FIELDS if property_key: query = (f"MATCH (e:Entity)-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(descendant:Entity) " # The target entity can't be a Lab @@ -460,7 +472,10 @@ def get_descendants(neo4j_driver, uuid, property_key = None): else: # Convert the list of nodes to a list of dicts results = nodes_to_dicts(record[record_field_name]) - + if fields_to_omit: + for node_dict in results: + for field in fields_to_omit: + node_dict.pop(field, None) return results @@ -1185,7 +1200,7 @@ def get_dataset_upload(neo4j_driver, uuid): def get_collection_datasets(neo4j_driver, uuid): results = [] - fields_to_omit = SchemaConstants.DATASETS_OMITTED_FIELDS + fields_to_omit = SchemaConstants.OMITTED_FIELDS query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) " f"WHERE c.uuid = '{uuid}' " f"RETURN COLLECT(apoc.create.vNode(labels(e), apoc.map.removeKeys(properties(e), {fields_to_omit}))) AS {record_field_name}") @@ -1391,7 +1406,7 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list): """ def get_upload_datasets(neo4j_driver, uuid, property_key = None): results = [] - fields_to_omit = SchemaConstants.DATASETS_OMITTED_FIELDS + fields_to_omit = SchemaConstants.OMITTED_FIELDS if property_key: query = (f"MATCH (e:Dataset)-[:IN_UPLOAD]->(s:Upload) " f"WHERE s.uuid = '{uuid}' " From a852db579af6fc02604609407b89a66e4116e5ab Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 16 Jan 2025 13:43:20 -0500 Subject: [PATCH 2/3] changed /ancestors /children /parents/ descendants queries to omit fields at the query level rather than removing them later --- src/schema/schema_neo4j_queries.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index a91408ca..65718333 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -178,7 +178,7 @@ def get_children(neo4j_driver, uuid, property_key = None): f"WHERE e.uuid='{uuid}' AND e.entity_type <> 'Lab' " # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes - f"RETURN apoc.coll.toSet(COLLECT(child)) AS {record_field_name}") + f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(child), apoc.map.removeKeys(properties(child), {fields_to_omit})))) AS {record_field_name}") logger.info("======get_children() query======") logger.info(query) @@ -233,7 +233,7 @@ def get_parents(neo4j_driver, uuid, property_key = None): f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' " # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes - f"RETURN apoc.coll.toSet(COLLECT(parent)) AS {record_field_name}") + f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(parent), apoc.map.removeKeys(properties(parent), {fields_to_omit})))) AS {record_field_name}") logger.info("======get_parents() query======") logger.info(query) @@ -401,7 +401,7 @@ def get_ancestors(neo4j_driver, uuid, property_key = None): f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' " # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes - f"RETURN apoc.coll.toSet(COLLECT(ancestor)) AS {record_field_name}") + f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(ancestor), apoc.map.removeKeys(properties(ancestor), {fields_to_omit})))) AS {record_field_name}") logger.info("======get_ancestors() query======") logger.info(query) @@ -457,7 +457,7 @@ def get_descendants(neo4j_driver, uuid, property_key = None): f"WHERE e.uuid='{uuid}' AND e.entity_type <> 'Lab' " # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes - f"RETURN apoc.coll.toSet(COLLECT(descendant)) AS {record_field_name}") + f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(descendant), apoc.map.removeKeys(properties(descendant), {fields_to_omit})))) AS {record_field_name}") logger.info("======get_descendants() query======") logger.info(query) From 4ae38a88054cae75f8b2e557f3c50fac6cf8e71d Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 16 Jan 2025 13:47:32 -0500 Subject: [PATCH 3/3] removed superfluous key removals in ancestor, parent, children, descendants now that this is handled in the query directly --- src/schema/schema_neo4j_queries.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 65718333..6ba3ff43 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -193,10 +193,7 @@ def get_children(neo4j_driver, uuid, property_key = None): else: # Convert the list of nodes to a list of dicts results = nodes_to_dicts(record[record_field_name]) - if fields_to_omit: - for node_dict in results: - for field in fields_to_omit: - node_dict.pop(field, None) + return results @@ -248,10 +245,6 @@ def get_parents(neo4j_driver, uuid, property_key = None): else: # Convert the list of nodes to a list of dicts results = nodes_to_dicts(record[record_field_name]) - if fields_to_omit: - for node_dict in results: - for field in fields_to_omit: - node_dict.pop(field, None) return results @@ -417,11 +410,6 @@ def get_ancestors(neo4j_driver, uuid, property_key = None): # Convert the list of nodes to a list of dicts results = nodes_to_dicts(record[record_field_name]) - if fields_to_omit: - for node_dict in results: - for field in fields_to_omit: - node_dict.pop(field, None) - return results """ @@ -472,10 +460,7 @@ def get_descendants(neo4j_driver, uuid, property_key = None): else: # Convert the list of nodes to a list of dicts results = nodes_to_dicts(record[record_field_name]) - if fields_to_omit: - for node_dict in results: - for field in fields_to_omit: - node_dict.pop(field, None) + return results