Skip to content

Commit f120405

Browse files
authored
Merge pull request #964 from hubmapconsortium/Derek-Furst/validate-direct-ancestors
Derek furst/validate direct ancestors
2 parents 4b77285 + 3dbe4b8 commit f120405

File tree

4 files changed

+87
-0
lines changed

4 files changed

+87
-0
lines changed

src/schema/provenance_schema.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,12 @@ ENTITIES:
463463
type: list
464464
before_property_create_validators:
465465
- validate_no_duplicates_in_list
466+
- validate_ancestor_type
466467
before_property_update_validators:
467468
- validate_no_duplicates_in_list
468469
- validate_not_invalid_creation_action
469470
- validate_id_not_in_direct_ancestor
471+
- validate_ancestor_type
470472
transient: true
471473
exposed: false
472474
indexed: false

src/schema/schema_manager.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,34 @@ def get_entity_superclass(normalized_entity_class):
226226
return normalized_superclass
227227

228228

229+
"""
230+
Get the optional subclass (if defined) of the given entity class
231+
232+
Parameters
233+
----------
234+
normalized_entity_class : str
235+
The normalized target entity class
236+
237+
Returns
238+
-------
239+
string or None
240+
One of the normalized entity classes if defined. None otherwise
241+
"""
242+
def get_entity_subclasses(normalized_entity_class):
243+
subclasses = []
244+
all_entity_types = get_all_entity_types()
245+
246+
if normalized_entity_class not in all_entity_types:
247+
raise ValueError(f"Unrecognized entity class: {normalized_entity_class}")
248+
249+
for name, data in _schema["ENTITIES"].items():
250+
superclass = data.get("superclass")
251+
if superclass and normalize_entity_type(superclass) == normalized_entity_class:
252+
subclasses.append(normalize_entity_type(name))
253+
254+
return subclasses
255+
256+
229257
"""
230258
Determine if the Entity type with 'entity_type' is an instance of 'entity_class'.
231259
Use this function if you already have the Entity type. Use `entity_instanceof(uuid, class)`

src/schema/schema_neo4j_queries.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1825,6 +1825,35 @@ def create_activity_tx(tx, activity_data_dict):
18251825
return node
18261826

18271827

1828+
def validate_direct_ancestors(neo4j_driver, entity_uuids, allowed_types, disallowed_property_values=None):
1829+
disallowed_rules_list = disallowed_property_values
1830+
query = """
1831+
UNWIND $uuids AS uid
1832+
OPTIONAL MATCH (n) WHERE n.uuid = uid
1833+
WITH uid, n,
1834+
CASE
1835+
WHEN n IS NULL THEN false
1836+
ELSE any(l IN labels(n) WHERE l IN $allowed_labels)
1837+
END AS label_ok,
1838+
$disallowed AS rules
1839+
WITH uid, label_ok,
1840+
any(rule IN rules WHERE
1841+
n IS NOT NULL
1842+
AND n[rule.property] IS NOT NULL
1843+
AND n[rule.property] = rule.value
1844+
) AS has_forbidden_prop
1845+
WHERE NOT label_ok OR has_forbidden_prop
1846+
RETURN DISTINCT uid AS invalid_uuid
1847+
"""
1848+
with neo4j_driver.session() as session:
1849+
result = session.run(query,
1850+
uuids=entity_uuids,
1851+
allowed_labels=allowed_types,
1852+
disallowed=disallowed_rules_list)
1853+
1854+
return [record["invalid_uuid"] for record in result]
1855+
1856+
18281857
"""
18291858
Build the property key-value pairs to be used in the Cypher clause for node creation/update
18301859

src/schema/schema_validators.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,34 @@ def validate_sample_category(property_key, normalized_entity_type, request, exis
674674
if new_data_dict[property_key] != sample_category:
675675
raise ValueError(f"The case of sample_category '{new_data_dict[property_key]}'"
676676
f" must be specified as '{sample_category}'.")
677+
678+
"""
679+
Validate the provided value of Dataset.direct_ancestor on create via POST and update via PUT
680+
681+
Parameters
682+
----------
683+
property_key : str
684+
The target property key
685+
normalized_type : str
686+
Submission
687+
request: Flask request object
688+
The instance of Flask request passed in from application request
689+
existing_data_dict : dict
690+
A dictionary that contains all existing entity properties
691+
new_data_dict : dict
692+
The json data in request body, already after the regular validations
693+
"""
694+
def validate_ancestor_type(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
695+
allowed_ancestor_types = ["Dataset", "Sample"]
696+
for allowed_ancestor in list(allowed_ancestor_types):
697+
subclasses = schema_manager.get_entity_subclasses(schema_manager.normalize_entity_type(allowed_ancestor))
698+
allowed_ancestor_types.extend(subclasses)
699+
direct_ancestor_uuids = new_data_dict[property_key]
700+
disallowed_properties = [{"property": "sample_category", "value": "organ"}]
701+
invalid_uuids = schema_neo4j_queries.validate_direct_ancestors(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, allowed_ancestor_types, disallowed_properties)
702+
if invalid_uuids:
703+
raise ValueError(f"Invalid or not-found direct_ancestor_uuid(s). Allowed entity_types are: {', '.join(allowed_ancestor_types)}. For samples, 'organ' is not allowed. Invalid uuids: {', '.join(invalid_uuids)}")
704+
677705

678706
"""
679707
Validate the provided value of Publication.publication_date is in the correct format against ISO 8601 Format:

0 commit comments

Comments
 (0)