Skip to content

Commit b3ec6ef

Browse files
alejoe91mekhlakapoorjtyoung84
authored
fix: DataDescriptionUpgrade sets data_level to raw if not correct (#568)
* fix: DataDescriptionUpgrade sets data_level to raw if field value is not correct * test: add test with missing data_level field * validator for data level * fix: updates data level validator * fix: adds check if user sets data_level explicitly --------- Co-authored-by: Mekhla Kapoor <[email protected]> Co-authored-by: jtyoung84 <[email protected]>
1 parent 157b116 commit b3ec6ef

File tree

4 files changed

+116
-3
lines changed

4 files changed

+116
-3
lines changed

src/aind_data_schema/data_description.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import re
66
from datetime import datetime
77
from enum import Enum, EnumMeta
8-
from typing import Any, List, Optional
8+
from typing import Any, List, Optional, Union
99

10-
from pydantic import Field
10+
from pydantic import Field, ValidationError, validator
1111

1212
from aind_data_schema.base import AindCoreModel, AindModel, BaseName, BaseNameEnumMeta, PIDName, Registry
1313

@@ -271,9 +271,14 @@ class DataDescription(AindCoreModel):
271271
)
272272
data_summary: Optional[str] = Field(None, title="Data summary", description="Semantic summary of experimental goal")
273273

274+
# TODO: We need to remove all the custom class constructors on pydantic
275+
# models
274276
def __init__(self, label=None, **kwargs):
275277
"""Construct a generic DataDescription"""
276278

279+
# Ideally, we'd like to just use validators to parse information,
280+
# but we need to get rid of these init methods first since they
281+
# don't get called on here
277282
super().__init__(**kwargs)
278283

279284
if label is not None:
@@ -297,6 +302,23 @@ def parse_name(cls, name):
297302
creation_time=creation_time,
298303
)
299304

305+
@validator("data_level", pre=True, always=True)
306+
def upgrade_data_level(cls, value: Union[str, DataLevel]):
307+
"""Updates legacy values to current values"""
308+
# If user inputs a string and is 'raw level', convert it to RAW
309+
if isinstance(value, str) and value in ["raw level", "raw data"]:
310+
return DataLevel.RAW
311+
# If user inputs a string, try to convert it to a DataLevel. Will raise
312+
# an error if unable to parse the input string
313+
elif isinstance(value, str):
314+
return DataLevel(value)
315+
# If user inputs a DataLevel object, return the object without parsing
316+
elif isinstance(value, DataLevel):
317+
return value
318+
# else raise a validation error
319+
else:
320+
raise ValidationError("Data Level needs to be string or enum")
321+
300322

301323
class DerivedDataDescription(DataDescription):
302324
"""A logical collection of data files derived via processing"""

src/aind_data_schema/schema_upgrade/data_description_upgrade.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ def upgrade_data_description(self, **kwargs) -> DataDescription:
131131
modality = [ModalityUpgrade.upgrade_modality(m) for m in old_modality]
132132
else:
133133
modality = getattr(DataDescription.__fields__.get("modality"), "default")
134+
old_data_level = self._get_or_default(self.old_data_description_model, "data_level", kwargs)
134135

135136
experiment_type = self._get_or_default(self.old_data_description_model, "experiment_type", kwargs)
136137
platform = None
@@ -155,7 +156,7 @@ def upgrade_data_description(self, **kwargs) -> DataDescription:
155156
name=self._get_or_default(self.old_data_description_model, "name", kwargs),
156157
institution=institution,
157158
funding_source=funding_source,
158-
data_level=self._get_or_default(self.old_data_description_model, "data_level", kwargs),
159+
data_level=old_data_level,
159160
group=self._get_or_default(self.old_data_description_model, "group", kwargs),
160161
investigators=self._get_or_default(self.old_data_description_model, "investigators", kwargs),
161162
project_name=self._get_or_default(self.old_data_description_model, "project_name", kwargs),
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/data_description.py",
3+
"schema_version": "0.3.0",
4+
"license": "CC-BY-4.0",
5+
"creation_time": "10:52:15",
6+
"creation_date": "2022-07-26",
7+
"name": "ecephys_624643_2022-07-26_10-52-15",
8+
"institution": "AIND",
9+
"funding_source": [],
10+
"data_level": "raw data",
11+
"group": null,
12+
"project_name": null,
13+
"project_id": null,
14+
"restrictions": null,
15+
"modality": "ecephys",
16+
"subject_id": "624643"
17+
}

tests/test_schema_upgrade.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,79 @@ def test_upgrades_0_3_0(self):
7676
self.assertEqual([], new_data_description.related_data)
7777
self.assertIsNone(new_data_description.data_summary)
7878

79+
def test_upgrades_0_3_0_wrong_field(self):
80+
"""Tests data_description_0.3.0_wrong_field.json is mapped correctly."""
81+
data_description_0_3_0 = self.data_descriptions["data_description_0.3.0_wrong_field.json"]
82+
upgrader = DataDescriptionUpgrade(old_data_description_model=data_description_0_3_0)
83+
# Should complain about platform being None and missing data level
84+
with self.assertRaises(Exception) as e:
85+
upgrader.upgrade_data_description()
86+
87+
expected_error_message = (
88+
"ValidationError("
89+
"model='DataDescription', "
90+
"errors=[{"
91+
"'loc': ('platform',), "
92+
"'msg': 'none is not an allowed value', "
93+
"'type': 'type_error.none.not_allowed'"
94+
"}])"
95+
)
96+
self.assertEqual(expected_error_message, repr(e.exception))
97+
98+
# Should work by setting platform explicitly and DataLevel
99+
new_data_description = upgrader.upgrade_data_description(platform=Platform.ECEPHYS, data_level=DataLevel.RAW)
100+
self.assertEqual(datetime.datetime(2022, 7, 26, 10, 52, 15), new_data_description.creation_time)
101+
self.assertEqual("ecephys_624643_2022-07-26_10-52-15", new_data_description.name)
102+
self.assertEqual(Institution.AIND, new_data_description.institution)
103+
self.assertEqual([], new_data_description.funding_source)
104+
self.assertEqual(DataLevel.RAW, new_data_description.data_level)
105+
self.assertIsNone(new_data_description.group)
106+
self.assertEqual([], new_data_description.investigators)
107+
self.assertIsNone(new_data_description.project_name)
108+
self.assertIsNone(new_data_description.restrictions)
109+
self.assertEqual([Modality.ECEPHYS], new_data_description.modality)
110+
self.assertEqual("624643", new_data_description.subject_id)
111+
self.assertEqual([], new_data_description.related_data)
112+
self.assertIsNone(new_data_description.data_summary)
113+
114+
# Should also work by inputting legacy
115+
new_data_description2 = upgrader.upgrade_data_description(platform=Platform.ECEPHYS, data_level="raw level")
116+
self.assertEqual(DataLevel.RAW, new_data_description2.data_level)
117+
118+
# Should fail if inputting unknown string
119+
with self.assertRaises(Exception) as e1:
120+
upgrader.upgrade_data_description(platform=Platform.ECEPHYS, data_level="asfnewnjfq")
121+
122+
expected_error_message1 = (
123+
"ValidationError(model='DataDescription', "
124+
"errors=[{'loc': ('data_level',), "
125+
"'msg': \"'asfnewnjfq' is not a valid DataLevel\", "
126+
"'type': 'value_error'}])"
127+
)
128+
129+
self.assertEqual(expected_error_message1, repr(e1.exception))
130+
131+
# Should also fail if inputting wrong type
132+
with self.assertRaises(Exception) as e2:
133+
upgrader.upgrade_data_description(platform=Platform.ECEPHYS, data_level=["raw"])
134+
expected_error_message2 = (
135+
"ValidationError(model='DataDescription', "
136+
"errors=[{'loc': ('data_level',), "
137+
"'msg': '__init__() takes exactly 3 positional arguments "
138+
"(2 given)', 'type': 'type_error'}])"
139+
)
140+
141+
self.assertEqual(expected_error_message2, repr(e2.exception))
142+
143+
# Should work if data_level is missing in original json doc and
144+
# user sets it explicitly
145+
data_description_dict = data_description_0_3_0.dict()
146+
del data_description_dict["data_level"]
147+
data_description_0_3_0_no_data_level = DataDescription.construct(**data_description_dict)
148+
upgrader3 = DataDescriptionUpgrade(old_data_description_model=data_description_0_3_0_no_data_level)
149+
new_data_description3 = upgrader3.upgrade_data_description(platform=Platform.ECEPHYS, data_level=DataLevel.RAW)
150+
self.assertEqual(DataLevel.RAW, new_data_description3.data_level)
151+
79152
def test_upgrades_0_4_0(self):
80153
"""Tests data_description_0.4.0.json is mapped correctly."""
81154
data_description_0_4_0 = self.data_descriptions["data_description_0.4.0.json"]

0 commit comments

Comments
 (0)