22
33from datetime import datetime
44from enum import Enum
5- from typing import Dict , List
5+ from typing import Dict , List , Optional
6+ from uuid import UUID , uuid4
67
7- from pydantic import Extra , Field
8+ from pydantic import Field , root_validator , validate_model
89
910from aind_data_schema .base import AindCoreModel
11+ from aind_data_schema .data_description import DataDescription
12+ from aind_data_schema .imaging .acquisition import Acquisition
13+ from aind_data_schema .imaging .instrument import Instrument
14+ from aind_data_schema .procedures import Procedures
15+ from aind_data_schema .processing import Processing
16+ from aind_data_schema .rig import Rig
17+ from aind_data_schema .session import Session
1018from aind_data_schema .subject import Subject
1119
1220
@@ -29,8 +37,10 @@ class Metadata(AindCoreModel):
2937 """The records in the Data Asset Collection needs to contain certain fields
3038 to easily query and index the data."""
3139
32- id : str = Field (
33- ...,
40+ schema_version : str = Field ("0.0.2" , description = "schema version" , title = "Version" , const = True )
41+
42+ id : UUID = Field (
43+ default_factory = uuid4 ,
3444 alias = "_id" ,
3545 title = "Data Asset ID" ,
3646 description = "The unique id of the data asset." ,
@@ -40,31 +50,90 @@ class Metadata(AindCoreModel):
4050 description = "Name of the data asset." ,
4151 title = "Data Asset Name" ,
4252 )
53+ # We'll set created and last_modified defaults using the root_validator
54+ # to ensure they're synced on creation
4355 created : datetime = Field (
44- ... ,
56+ default_factory = datetime . utcnow ,
4557 title = "Created" ,
46- description = "The data and time the data asset created." ,
58+ description = "The utc date and time the data asset created." ,
4759 )
4860 last_modified : datetime = Field (
49- ..., title = "Last Modified" , description = "The date and time that the data asset was last modified."
61+ default_factory = datetime .utcnow ,
62+ title = "Last Modified" ,
63+ description = "The utc date and time that the data asset was last modified." ,
5064 )
5165 location : str = Field (
5266 ...,
5367 title = "Location" ,
5468 description = "Current location of the data asset." ,
5569 )
56- metadata_status : MetadataStatus = Field (..., title = " Metadata Status" , description = "The status of the metadata." )
57- schema_version : str = Field ("0.0.1" , title = "Schema Version" , const = True )
70+ metadata_status : MetadataStatus = Field (
71+ default = MetadataStatus .UNKNOWN , title = " Metadata Status" , description = "The status of the metadata."
72+ )
5873 external_links : List [Dict [ExternalPlatforms , str ]] = Field (
59- ... , title = "External Links" , description = "Links to the data asset on different platforms."
74+ default = [] , title = "External Links" , description = "Links to the data asset on different platforms."
6075 )
61- subject : Subject = Field (
62- ...,
76+ # We can make the AindCoreModel fields optional for now and do more
77+ # granular validations using validators. We may have some older data
78+ # assets in S3 that don't have metadata attached. We'd still like to
79+ # index that data, but we can flag those instances as MISSING or UNKNOWN
80+ subject : Optional [Subject ] = Field (
81+ None ,
6382 title = "Subject" ,
64- description = "Description of a subject of data collection." ,
83+ description = "Subject of data collection." ,
6584 )
85+ data_description : Optional [DataDescription ] = Field (
86+ None , title = "Data Description" , description = "A logical collection of data files."
87+ )
88+ procedures : Optional [Procedures ] = Field (
89+ None , title = "Procedures" , description = "All procedures performed on a subject."
90+ )
91+ session : Optional [Session ] = Field (None , title = "Session" , description = "Description of a session." )
92+ rig : Optional [Rig ] = Field (None , title = "Rig" , description = "Rig." )
93+ processing : Optional [Processing ] = Field (None , title = "Processing" , description = "All processes run on data." )
94+ acquisition : Optional [Acquisition ] = Field (None , title = "Acquisition" , description = "Imaging acquisition session" )
95+ instrument : Optional [Instrument ] = Field (
96+ None , title = "Instrument" , description = "Instrument, which is a collection of devices"
97+ )
98+
99+ @root_validator (pre = False )
100+ def validate_metadata (cls , values ):
101+ """Validator for metadata"""
66102
67- class Config :
68- """Need to allow for additional fields to append to base model"""
103+ # There's a simpler way to do this if we drop support for py37
104+ all_model_fields = []
105+ for field_name in cls .__fields__ :
106+ field_to_check = cls .__fields__ [field_name ]
107+ try :
108+ if issubclass (field_to_check .type_ , AindCoreModel ):
109+ all_model_fields .append (field_to_check )
110+ except TypeError :
111+ # Type errors in python3.7 when using issubclass on type
112+ # generics
113+ pass
69114
70- extra = Extra .allow
115+ # For each model field, check that is present and check if the model
116+ # is valid. If it isn't valid, still add it, but mark MetadataStatus
117+ # as INVALID
118+ metadata_status = MetadataStatus .VALID
119+ for model_field in all_model_fields :
120+ model_class = model_field .type_
121+ model_name = model_field .name
122+ if values .get (model_name ) is not None :
123+ model = values [model_name ]
124+ # Since pre=False, the dictionaries get converted to models
125+ # upstream
126+ model_contents = model .dict ()
127+ * _ , validation_error = validate_model (model_class , model_contents )
128+ if validation_error :
129+ model_instance = model_class .construct (** model_contents )
130+ metadata_status = MetadataStatus .INVALID
131+ else :
132+ model_instance = model_class (** model_contents )
133+ values [model_name ] = model_instance
134+ # For certain required fields, like subject, if they are not present,
135+ # mark the metadata record as missing
136+ if values .get ("subject" ) is None :
137+ metadata_status = MetadataStatus .MISSING
138+ values ["metadata_status" ] = metadata_status
139+ return values
0 commit comments