diff --git a/mavecore/models/data.py b/mavecore/models/data.py index b0d1800..c8b251e 100644 --- a/mavecore/models/data.py +++ b/mavecore/models/data.py @@ -18,10 +18,12 @@ class DataSet(BaseModel): class Config: alias_generator = to_camel + allow_population_by_field_name = True @validator('keywords') def validate_keywords(cls, v): keywords.validate_keywords(v) + return v class Experiment(DataSet): @@ -45,7 +47,9 @@ def validate_scoreset_urn(cls, v): urn.validate_mavedb_urn_scoreset(v) else: [urn.validate_mavedb_urn_scoreset(s) for s in v] + return v @validator('experiment_urn') def validate_experiment_urn(cls, v): urn.validate_mavedb_urn_experiment(v) + return v diff --git a/mavecore/models/identifier.py b/mavecore/models/identifier.py index 13c748e..d36ba84 100644 --- a/mavecore/models/identifier.py +++ b/mavecore/models/identifier.py @@ -10,6 +10,7 @@ class Identifier(BaseModel): class Config: alias_generator = to_camel + allow_population_by_field_name = True class DoiIdentifier(Identifier): @@ -17,6 +18,7 @@ class DoiIdentifier(Identifier): @validator('identifier') def must_be_valid_doi(cls, v): id.validate_doi_identifier(v) + return v class PubmedIdentifier(Identifier): @@ -24,6 +26,7 @@ class PubmedIdentifier(Identifier): @validator('identifier') def must_be_valid_pubmed(cls, v): id.validate_pubmed_identifier(v) + return v '''class ExternalIdentifierId(BaseModel): @@ -62,3 +65,4 @@ class Config: @validator('identifier') def validate_identifier(cls, v): id.validate_external_identifier(v) + return v diff --git a/mavecore/models/map.py b/mavecore/models/map.py index bbad61a..343a9f7 100644 --- a/mavecore/models/map.py +++ b/mavecore/models/map.py @@ -9,3 +9,4 @@ class ReferenceMap(BaseModel): class Config: alias_generator = to_camel + allow_population_by_field_name = True diff --git a/mavecore/models/sequence.py b/mavecore/models/sequence.py index 7c2399e..b45bccb 100644 --- a/mavecore/models/sequence.py +++ b/mavecore/models/sequence.py @@ -10,11 +10,14 @@ class WildType(BaseModel): class Config: alias_generator = to_camel + allow_population_by_field_name = True @validator('sequence_type') def validate_category(cls, v): target.validate_sequence_category(v) + return v @validator('sequence') def validate_sequence(cls, v): target.validate_target_sequence(v) + return v diff --git a/mavecore/models/target.py b/mavecore/models/target.py index d246337..73e02fe 100644 --- a/mavecore/models/target.py +++ b/mavecore/models/target.py @@ -12,13 +12,15 @@ class TargetGene(BaseModel): name: str category: str - external_identifiers: List[ExternalIdentifier] - reference_maps: List[ReferenceMap] + external_identifiers: Optional[List[ExternalIdentifier]] + reference_maps: Optional[List[ReferenceMap]] wt_sequence: WildType class Config: alias_generator = to_camel + allow_population_by_field_name = True @validator('category') def validate_category(cls, v): target.validate_target_category(v) + return v diff --git a/mavecore/validation/dataset.py b/mavecore/validation/dataset.py index 716ae59..8388802 100644 --- a/mavecore/validation/dataset.py +++ b/mavecore/validation/dataset.py @@ -1,6 +1,7 @@ import json from mavecore.models.data import Experiment, ScoreSet +from mavecore.validation.exceptions import ValidationError def validate_experiment(experiment: dict): @@ -20,9 +21,9 @@ def validate_experiment(experiment: dict): If required keys are missing or any keys contain incorrect values. """ try: - return json.loads(Experiment.parse_obj(experiment).json()) + return Experiment.parse_obj(experiment).dict(by_alias=True, exclude_none=True) except ValueError as e: - print(e) + raise ValidationError(e) def validate_scoreset(scoreset: dict): @@ -43,6 +44,7 @@ def validate_scoreset(scoreset: dict): If required keys are missing or any keys contain incorrect values. """ try: - return json.loads(ScoreSet.parse_obj(scoreset).json()) + #return json.loads(ScoreSet.parse_obj(scoreset).json()) + return ScoreSet.parse_obj(scoreset).dict(by_alias=True, exclude_none=True) except ValueError as e: - print(e) + raise ValidationError(e) diff --git a/mavecore/validation/identifier.py b/mavecore/validation/identifier.py index 9e6c693..8771cfc 100644 --- a/mavecore/validation/identifier.py +++ b/mavecore/validation/identifier.py @@ -33,24 +33,28 @@ def validate_external_identifier(identifier: dict): "and `identifier`.") # check that the keys are the right name - if "dbname" not in identifier: - raise ValidationError("The identifier attribute of the external identifier should have two Keys, `dbname` " - "and `identifier`.") + if "db_name" not in identifier and "dbName" not in identifier: + raise ValidationError("The identifier attribute of the external identifier should have the key `dbName` or 'db_name`.") if "identifier" not in identifier: - raise ValidationError("The identifier attribute of the external identifier should have two Keys, `dbname` " - "and `identifier`.") + raise ValidationError("The identifier attribute of the external identifier should have the key `identifier`.") + + # assign dbName key to variable + if "dbName" in identifier: + db_name = "dbName" + else: + db_name = "db_name" # check that dbname is valid - if identifier.get("dbname") not in valid_dbnames: - raise ValidationError(f"The `dbname` key within the identifier attribute of the external identifier should " + if identifier.get(db_name) not in valid_dbnames: + raise ValidationError(f"The `db_name` key within the identifier attribute of the external identifier should " f"take one of the following values: {valid_dbnames}.") # validate identifier based on dbname: could be one of UniProt, RefSeq, or Ensembl - if identifier.get("dbname") == "UniProt": + if identifier.get(db_name) == "UniProt": validate_uniprot_identifier(identifier.get("identifier")) - elif identifier.get("dbname") == "RefSeq": + elif identifier.get(db_name) == "RefSeq": validate_refseq_identifier(identifier.get("identifier")) - elif identifier.get("dbname") == "Ensembl": + elif identifier.get(db_name) == "Ensembl": validate_ensembl_identifier(identifier.get("identifier")) diff --git a/tests/models/data.py b/tests/models/data.py index e361be0..92acaf9 100644 --- a/tests/models/data.py +++ b/tests/models/data.py @@ -7,10 +7,10 @@ class TestDataSet(TestCase): def setUp(self): self.dataset = { "title": "title", - "shortDescription": "short description", - "abstractText": "abstract", - "methodText": "methods", - "extraMetadata": {}, + "short_description": "short description", + "abstract_text": "abstract", + "method_text": "methods", + "extra_metadata": {}, "keywords": ["string"], } @@ -18,7 +18,7 @@ def test_valid_all_fields(self): DataSet.parse_obj(self.dataset) def test_valid_exclude_optional(self): - self.dataset.pop("extraMetadata") + self.dataset.pop("extra_metadata") self.dataset.pop("keywords") DataSet.parse_obj(self.dataset) @@ -34,23 +34,23 @@ def setUp(self): pubmed_identifier = {"identifier": "29785012"} self.experiment = { "title": "title", - "shortDescription": "short description", - "abstractText": "abstract", - "methodText": "methods", - "extraMetadata": {}, + "short_description": "short description", + "abstract_text": "abstract", + "method_text": "methods", + "extra_metadata": {}, "keywords": ["string"], - "doiIdentifiers": [doi_identifier], - "pubmedIdentifiers": [pubmed_identifier], + "doi_identifiers": [doi_identifier], + "pubmed_identifiers": [pubmed_identifier], } def test_valid_all_fields(self): Experiment.parse_obj(self.experiment) def test_valid_exclude_optional(self): - self.experiment.pop("extraMetadata") + self.experiment.pop("extra_metadata") self.experiment.pop("keywords") - self.experiment.pop("doiIdentifiers") - self.experiment.pop("pubmedIdentifiers") + self.experiment.pop("doi_identifiers") + self.experiment.pop("pubmed_identifiers") Experiment.parse_obj(self.experiment) diff --git a/tests/models/map.py b/tests/models/map.py index 33c1f60..1ed0ce0 100644 --- a/tests/models/map.py +++ b/tests/models/map.py @@ -5,8 +5,8 @@ class TestReferenceMap(TestCase): def setUp(self): self.reference_map = { - "genomeId": 0, - "targetId": 0, + "genome_id": 0, + "target_id": 0, } def test_valid_all_fields(self): diff --git a/tests/models/sequence.py b/tests/models/sequence.py index 6314cb1..ed06b4c 100644 --- a/tests/models/sequence.py +++ b/tests/models/sequence.py @@ -6,14 +6,14 @@ class Test(TestCase): def test_valid_all_fields(self): sequence = { - "sequenceType": "Protein", + "sequence_type": "Protein", "sequence": "ATC", } WildType.parse_obj(sequence) def test_invalid_sequence_type(self): sequence = { - "sequenceType": "RNA", + "sequence_type": "RNA", "sequence": "ATC", } with self.assertRaises(ValidationError): diff --git a/tests/validation/dataset.py b/tests/validation/dataset.py index ccea630..1a3693c 100644 --- a/tests/validation/dataset.py +++ b/tests/validation/dataset.py @@ -8,13 +8,13 @@ def setUp(self): pubmed_identifier = {"identifier": "29785012"} self.experiment = { "title": "title", - "shortDescription": "short description", - "abstractText": "abstract", - "methodText": "methods", - "extraMetadata": {}, + "short_description": "short description", + "abstract_text": "abstract", + "method_text": "methods", + "extra_metadata": {}, "keywords": ["string"], - "doiIdentifiers": [doi_identifier], - "pubmedIdentifiers": [pubmed_identifier], + "doi_identifiers": [doi_identifier], + "pubmed_identifiers": [pubmed_identifier], } def test_valid_all_fields(self): @@ -29,10 +29,10 @@ def test_valid_all_fields(self): print(e)''' def test_valid_exclude_optional(self): - self.experiment.pop("extraMetadata") + self.experiment.pop("extra_metadata") self.experiment.pop("keywords") - self.experiment.pop("doiIdentifiers") - self.experiment.pop("pubmedIdentifiers") + self.experiment.pop("doi_identifiers") + self.experiment.pop("pubmed_identifiers") validate_experiment(self.experiment) @@ -40,40 +40,40 @@ class TestValidateScoreSet(TestCase): def setUp(self): doi_identifier = {"identifier": "10.1038/s41588-018-0122-z"} pubmed_identifier = {"identifier": "29785012"} - reference_map = {"genomeId": 0, "targetId": 0} - sequence = {"sequenceType": "DNA", "sequence": "ATC"} - external_identifier_id = {"dbname": "UniProt", "identifier": "P01133"} + reference_map = {"genome_id": 0, "target_id": 0} + sequence = {"sequence_type": "DNA", "sequence": "ATC"} + external_identifier_id = {"db_name": "UniProt", "identifier": "P01133"} external_identifier = {"identifier": external_identifier_id, "offset": 0} target = {"name": "name", "category": "Protein coding", - "externalIdentifiers": [external_identifier], - "referenceMaps": [reference_map], - "wtSequence": sequence} + "external_identifiers": [external_identifier], + "reference_maps": [reference_map], + "wt_sequence": sequence} self.scoreset = { "title": "title", - "shortDescription": "short description", - "abstractText": "abstract", - "methodText": "methods", - "extraMetadata": {}, - "dataUsagePolicy": "policy", - "licenceId": 0, + "short_description": "short description", + "abstract_text": "abstract", + "method_text": "methods", + "extra_metadata": {}, + "data_usage_policy": "policy", + "licence_id": 0, "keywords": ["string"], - "experimentUrn": "tmp:0a56b8eb-8e19-4906-8cc7-d17d884330a5", - "supersededScoresetUrn": "tmp:0a56b8eb-8e19-4906-8cc7-d17d884330a5", - "metaAnalysisSourceScoresetUrns": ["tmp:0a56b8eb-8e19-4906-8cc7-d17d884330a5"], - "doiIdentifiers": [doi_identifier], - "pubmedIdentifiers": [pubmed_identifier], - "targetGene": target, + "experiment_urn": "tmp:0a56b8eb-8e19-4906-8cc7-d17d884330a5", + "superseded_scoreset_urn": "tmp:0a56b8eb-8e19-4906-8cc7-d17d884330a5", + "meta_analysis_source_scoreset_urns": ["tmp:0a56b8eb-8e19-4906-8cc7-d17d884330a5"], + "doi_identifiers": [doi_identifier], + "pubmed_identifiers": [pubmed_identifier], + "target_gene": target, } def test_valid_all_fields(self): validate_scoreset(self.scoreset) def test_valid_exclude_optional(self): - self.scoreset.pop("extraMetadata") + self.scoreset.pop("extra_metadata") self.scoreset.pop("keywords") - self.scoreset.pop("doiIdentifiers") - self.scoreset.pop("pubmedIdentifiers") - self.scoreset.pop("supersededScoresetUrn") - self.scoreset.pop("metaAnalysisSourceScoresetUrns") + self.scoreset.pop("doi_identifiers") + self.scoreset.pop("pubmed_identifiers") + self.scoreset.pop("superseded_scoreset_urn") + self.scoreset.pop("meta_analysis_source_scoreset_urns") validate_scoreset(self.scoreset)