Skip to content

Commit 1ce3cfb

Browse files
authored
feat: validate datetime objects in the schema (except QC and model) (#1491)
* feat: validate times * fix: reduce requirement for data_description.name to just the same day * chore: lint * chore: lint * tests: fix variable passing issue * fix: remove validators in procedures * chore: lint * fix: add the additional time validators for other core files * fix: revert validation on prep_date field * tests: add full coverage for metadata new time validators * chore: lint * fix: remove unnecessary try/except * chore: lint * refactor: simplify time validator by pulling function into a util * chore: lint
1 parent fa02361 commit 1ce3cfb

File tree

8 files changed

+745
-15
lines changed

8 files changed

+745
-15
lines changed

src/aind_data_schema/components/measurements.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Calibration data models"""
22

3+
from typing import Annotated, List, Literal, Optional
34
from enum import Enum
4-
from typing import List, Literal, Optional
55

66
from aind_data_schema_models.units import UNITS, PowerUnit, TimeUnit, VolumeUnit, VoltageUnit
77
from pydantic import model_validator
88

99
from aind_data_schema.base import AwareDatetimeWithDefault, DataModel, Discriminated, Field, GenericModel
1010
from aind_data_schema.components.configs import DeviceConfig
1111
from aind_data_schema.components.reagent import Reagent
12+
from aind_data_schema.utils.validators import TimeValidation
1213

1314

1415
class FitType(Enum):
@@ -50,7 +51,9 @@ def validate_fit_type(cls, values):
5051
class Calibration(DeviceConfig):
5152
"""Generic calibration class"""
5253

53-
calibration_date: AwareDatetimeWithDefault = Field(..., title="Date and time of calibration")
54+
calibration_date: Annotated[AwareDatetimeWithDefault, TimeValidation.BEFORE] = Field(
55+
..., title="Date and time of calibration"
56+
)
5457
description: str = Field(..., title="Description", description="Brief description of what is being calibrated")
5558
input: List[float | str] = Field(..., description="Calibration input", title="Inputs")
5659
input_unit: UNITS = Field(..., title="Input unit")
@@ -111,7 +114,9 @@ class PowerCalibration(Calibration):
111114
class Maintenance(DeviceConfig):
112115
"""Generic maintenance class"""
113116

114-
maintenance_date: AwareDatetimeWithDefault = Field(..., title="Date and time of maintenance")
117+
maintenance_date: Annotated[AwareDatetimeWithDefault, TimeValidation.BEFORE] = Field(
118+
..., title="Date and time of maintenance"
119+
)
115120
description: str = Field(..., title="Description", description="Description on maintenance procedure")
116121
protocol_id: Optional[str] = Field(default=None, title="Protocol ID")
117122

src/aind_data_schema/components/subjects.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from datetime import date as date_type
44
from datetime import time
55
from enum import Enum
6-
from typing import List, Optional
6+
from typing import Annotated, List, Optional
77

88
from aind_data_schema_models.organizations import Organization
99
from aind_data_schema_models.pid_names import PIDName
@@ -12,6 +12,7 @@
1212
from pydantic_core.core_schema import ValidationInfo
1313

1414
from aind_data_schema.base import DataModel
15+
from aind_data_schema.utils.validators import TimeValidation
1516

1617

1718
class Sex(str, Enum):
@@ -81,7 +82,7 @@ class MouseSubject(DataModel):
8182
"""Description of a mouse subject"""
8283

8384
sex: Sex = Field(..., title="Sex")
84-
date_of_birth: date_type = Field(..., title="Date of birth")
85+
date_of_birth: Annotated[date_type, TimeValidation.BEFORE] = Field(..., title="Date of birth")
8586
strain: Strain.ONE_OF = Field(..., title="Strain")
8687
species: Species.ONE_OF = Field(..., title="Species")
8788
alleles: List[PIDName] = Field(default=[], title="Alleles", description="Allele names and persistent IDs")

src/aind_data_schema/core/acquisition.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""Schema describing data acquisition metadata and configurations"""
22

33
from decimal import Decimal
4-
from typing import List, Literal, Optional
4+
from typing import Annotated, List, Literal, Optional
55

66
from aind_data_schema_models.modalities import Modality
77
from aind_data_schema_models.stimulus_modality import StimulusModality
88
from aind_data_schema_models.units import MassUnit, VolumeUnit
9+
from aind_data_schema.utils.validators import TimeValidation
910
from pydantic import Field, SkipValidation, model_validator
1011

1112
from aind_data_schema.base import AwareDatetimeWithDefault, DataCoreModel, DataModel, DiscriminatedList, GenericModel
@@ -87,8 +88,16 @@ class DataStream(DataModel):
8788
same time.
8889
"""
8990

90-
stream_start_time: AwareDatetimeWithDefault = Field(..., title="Stream start time")
91-
stream_end_time: AwareDatetimeWithDefault = Field(..., title="Stream stop time")
91+
stream_start_time: Annotated[
92+
AwareDatetimeWithDefault,
93+
Field(..., title="Stream start time"),
94+
TimeValidation.BETWEEN,
95+
]
96+
stream_end_time: Annotated[
97+
AwareDatetimeWithDefault,
98+
Field(..., title="Stream stop time"),
99+
TimeValidation.BETWEEN,
100+
]
92101
modalities: List[Modality.ONE_OF] = Field(
93102
..., title="Modalities", description="Modalities that are acquired in this stream"
94103
)
@@ -165,12 +174,12 @@ class StimulusEpoch(DataModel):
165174
same time. Not all acquisitions have StimulusEpochs.
166175
"""
167176

168-
stimulus_start_time: AwareDatetimeWithDefault = Field(
177+
stimulus_start_time: Annotated[AwareDatetimeWithDefault, TimeValidation.BETWEEN] = Field(
169178
...,
170179
title="Stimulus start time",
171180
description="When a specific stimulus begins. This might be the same as the acquisition start time.",
172181
)
173-
stimulus_end_time: AwareDatetimeWithDefault = Field(
182+
stimulus_end_time: Annotated[AwareDatetimeWithDefault, TimeValidation.BETWEEN] = Field(
174183
...,
175184
title="Stimulus end time",
176185
description="When a specific stimulus ends. This might be the same as the acquisition end time.",

src/aind_data_schema/core/metadata.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from aind_data_schema.core.quality_control import QualityControl
3131
from aind_data_schema.core.subject import Subject
3232
from aind_data_schema.utils.compatibility_check import InstrumentAcquisitionCompatibility
33+
from aind_data_schema.utils.validators import recursive_time_validation_check, validate_creation_time_after_midnight
3334

3435
CORE_FILES = [
3536
"subject",
@@ -269,6 +270,79 @@ def validate_training_protocol_references(self):
269270

270271
return self
271272

273+
@model_validator(mode="after")
274+
def validate_time_constraints(self):
275+
"""Validate that all fields with TimeValidation annotations respect acquisition time bounds
276+
(if acquisition is present)"""
277+
if self.acquisition:
278+
acquisition_start_time = None
279+
acquisition_end_time = None
280+
if hasattr(self.acquisition, "acquisition_start_time") and hasattr(
281+
self.acquisition, "acquisition_end_time"
282+
):
283+
acquisition_start_time = self.acquisition.acquisition_start_time
284+
acquisition_end_time = self.acquisition.acquisition_end_time
285+
286+
recursive_time_validation_check(
287+
self.acquisition,
288+
acquisition_start_time=acquisition_start_time,
289+
acquisition_end_time=acquisition_end_time,
290+
)
291+
292+
if self.processing:
293+
recursive_time_validation_check(
294+
self.processing,
295+
acquisition_start_time=acquisition_start_time,
296+
acquisition_end_time=acquisition_end_time,
297+
)
298+
if self.subject:
299+
recursive_time_validation_check(
300+
self.subject,
301+
acquisition_start_time=acquisition_start_time,
302+
acquisition_end_time=acquisition_end_time,
303+
)
304+
if self.instrument:
305+
recursive_time_validation_check(
306+
self.instrument,
307+
acquisition_start_time=acquisition_start_time,
308+
acquisition_end_time=acquisition_end_time,
309+
)
310+
if self.procedures:
311+
recursive_time_validation_check(
312+
self.procedures,
313+
acquisition_start_time=acquisition_start_time,
314+
acquisition_end_time=acquisition_end_time,
315+
)
316+
317+
return self
318+
319+
@model_validator(mode="after")
320+
def validate_data_description_name_time_consistency(self):
321+
"""Validate that the creation_time from data_description.name is on or after midnight
322+
on the same day as acquisition.acquisition_end_time"""
323+
if self.data_description and self.acquisition:
324+
if (
325+
self.data_description.name
326+
and hasattr(self.acquisition, "acquisition_end_time")
327+
and self.acquisition.acquisition_end_time is not None
328+
):
329+
# Parse the name to extract creation_time
330+
parsed_name = DataDescription.parse_name(self.data_description.name, self.data_description.data_level)
331+
name_creation_time = parsed_name.get("creation_time")
332+
333+
if name_creation_time:
334+
try:
335+
validate_creation_time_after_midnight(name_creation_time, self.acquisition.acquisition_end_time)
336+
except ValueError as e:
337+
# Re-raise with more specific context for data_description.name
338+
raise ValueError(
339+
f"Creation time from data_description.name ({name_creation_time}) "
340+
f"must be on or after midnight of the acquisition day "
341+
f"({self.acquisition.acquisition_end_time.date()})"
342+
) from e
343+
344+
return self
345+
272346

273347
def create_metadata_json(
274348
name: str,

src/aind_data_schema/core/processing.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
import warnings
55
from enum import Enum
6-
from typing import Dict, List, Literal, Optional
6+
from typing import Annotated, Dict, List, Literal, Optional
77

88
from aind_data_schema_models.process_names import ProcessName
99
from aind_data_schema_models.units import MemoryUnit, UnitlessUnit
@@ -13,6 +13,7 @@
1313
from aind_data_schema.components.identifiers import Code, Person
1414
from aind_data_schema.components.wrappers import AssetPath
1515
from aind_data_schema.utils.merge import merge_notes, merge_optional_list
16+
from aind_data_schema.utils.validators import TimeValidation
1617

1718

1819
class ProcessStage(str, Enum):
@@ -63,8 +64,8 @@ class DataProcess(DataModel):
6364
pipeline_name: Optional[str] = Field(
6465
default=None, title="Pipeline name", description="Pipeline names must exist in Processing.pipelines"
6566
)
66-
start_date_time: AwareDatetimeWithDefault = Field(..., title="Start date time")
67-
end_date_time: AwareDatetimeWithDefault = Field(..., title="End date time")
67+
start_date_time: Annotated[AwareDatetimeWithDefault, TimeValidation.AFTER] = Field(..., title="Start date time")
68+
end_date_time: Annotated[AwareDatetimeWithDefault, TimeValidation.AFTER] = Field(..., title="End date time")
6869
output_path: Optional[AssetPath] = Field(
6970
default=None, title="Output path", description="Path to processing outputs, if stored."
7071
)

src/aind_data_schema/utils/validators.py

Lines changed: 133 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,28 @@
11
""" Validator utility functions """
22

33
import logging
4+
from datetime import date, datetime
45
from enum import Enum
56
from pathlib import Path
6-
from typing import Any, List, Optional
7+
from typing import Any, List, Optional, Union
78

89
from aind_data_schema.components.wrappers import AssetPath
910

1011
# Fields that should have the same length as the coordinate system axes
1112
AXIS_TYPES = ["Translation", "Rotation", "Scale"]
1213

1314

15+
class TimeValidation(Enum):
16+
"""Enum for time validation types."""
17+
18+
BETWEEN = "between"
19+
"""Time should be between start and end."""
20+
AFTER = "after"
21+
"""Time should be after the start time."""
22+
BEFORE = "before"
23+
"""Time should be before the end time."""
24+
25+
1426
class CoordinateSystemException(Exception):
1527
"""Raised when a coordinate system is missing."""
1628

@@ -44,6 +56,83 @@ def subject_specimen_id_compatibility(subject_id: str, specimen_id: str) -> bool
4456
return subject_id in specimen_id
4557

4658

59+
def recursive_time_validation_check(data, acquisition_start_time=None, acquisition_end_time=None):
60+
"""Recursively check fields for TimeValidation annotations and validate against acquisition times.
61+
62+
Parameters
63+
----------
64+
data : Any
65+
The data structure to check recursively
66+
acquisition_start_time : Optional[datetime]
67+
The acquisition start time to validate against
68+
acquisition_end_time : Optional[datetime]
69+
The acquisition end time to validate against
70+
"""
71+
if not data:
72+
return
73+
74+
# Check if this object has fields with TimeValidation annotations
75+
if hasattr(data, "__annotations__") and hasattr(data, "__dict__"):
76+
for field_name, field_value in data.__dict__.items():
77+
if field_name in getattr(data, "__annotations__", {}):
78+
# Check if the field has TimeValidation annotation
79+
annotation = data.__annotations__[field_name]
80+
if hasattr(annotation, "__metadata__"):
81+
for metadata in annotation.__metadata__:
82+
if isinstance(metadata, TimeValidation):
83+
# Validate the field value against the time constraint
84+
if field_value and acquisition_start_time and acquisition_end_time:
85+
_validate_time_constraint(
86+
field_value, metadata, acquisition_start_time, acquisition_end_time, field_name
87+
)
88+
89+
# Recursively check nested structures
90+
_time_validation_recurse_helper(data, acquisition_start_time, acquisition_end_time)
91+
92+
93+
def _validate_time_constraint(field_value, time_validation, start_time, end_time, field_name):
94+
"""Validate a single time field against the specified constraint."""
95+
96+
# Handle conversion between date and datetime objects for comparison
97+
def convert_to_comparable(value, reference_datetime):
98+
"""Convert date to datetime using the timezone from reference, or return as-is if already datetime"""
99+
if isinstance(value, date) and not isinstance(value, datetime):
100+
# Convert date to datetime at midnight with same timezone as reference
101+
return datetime.combine(value, datetime.min.time()).replace(tzinfo=reference_datetime.tzinfo)
102+
return value
103+
104+
# Convert field_value to be comparable with start_time and end_time
105+
comparable_field_value = convert_to_comparable(field_value, start_time)
106+
107+
if time_validation == TimeValidation.BETWEEN:
108+
if not (start_time <= comparable_field_value <= end_time):
109+
raise ValueError(
110+
f"Field '{field_name}' with value {field_value} must be between {start_time} and {end_time}"
111+
)
112+
elif time_validation == TimeValidation.AFTER:
113+
if comparable_field_value <= start_time:
114+
raise ValueError(f"Field '{field_name}' with value {field_value} must be after {start_time}")
115+
elif time_validation == TimeValidation.BEFORE:
116+
if comparable_field_value >= end_time:
117+
raise ValueError(f"Field '{field_name}' with value {field_value} must be before {end_time}")
118+
119+
120+
def _time_validation_recurse_helper(data, acquisition_start_time, acquisition_end_time):
121+
"""Helper function for recursive_time_validation_check: recurse calls for lists and objects only"""
122+
if isinstance(data, list):
123+
for item in data:
124+
recursive_time_validation_check(item, acquisition_start_time, acquisition_end_time)
125+
return
126+
elif hasattr(data, "__dict__"):
127+
for attr_name, attr_value in data.__dict__.items():
128+
if attr_name == "object_type":
129+
continue # skip object_type
130+
if callable(attr_value):
131+
continue # skip methods
132+
133+
recursive_time_validation_check(attr_value, acquisition_start_time, acquisition_end_time)
134+
135+
47136
def _recurse_helper(data, **kwargs):
48137
"""Helper function for recursive_axis_order_check: recurse calls for lists and objects only"""
49138
if isinstance(data, list):
@@ -156,3 +245,46 @@ def recursive_check_paths(obj: Any, directory: Optional[Path] = None):
156245
elif hasattr(obj, "__dict__"):
157246
for value in vars(obj).values():
158247
recursive_check_paths(value, directory)
248+
249+
250+
def validate_creation_time_after_midnight(
251+
creation_time: Optional[Union[datetime, date]], reference_time: Optional[datetime]
252+
) -> None:
253+
"""Validate that creation_time is on or after midnight of the reference_time's day.
254+
255+
Parameters
256+
----------
257+
creation_time : Optional[datetime]
258+
The creation time to validate (datetime or date objects are supported)
259+
reference_time : Optional[datetime]
260+
The reference time to compare against (typically acquisition_end_time)
261+
262+
Raises
263+
------
264+
ValueError
265+
If creation_time is before midnight of the reference_time's day
266+
"""
267+
if not creation_time or not reference_time:
268+
return
269+
270+
# Convert date to datetime if needed
271+
if isinstance(creation_time, date) and not isinstance(creation_time, datetime):
272+
creation_time = datetime.combine(creation_time, datetime.min.time())
273+
274+
# If creation_time is timezone-naive (local time),
275+
# add the same timezone as reference_time
276+
if isinstance(creation_time, datetime) and creation_time.tzinfo is None and reference_time.tzinfo is not None:
277+
creation_time = creation_time.replace(tzinfo=reference_time.tzinfo)
278+
279+
# Get midnight of the reference time day
280+
reference_date = reference_time.date()
281+
midnight_of_reference_day = datetime.combine(reference_date, datetime.min.time()).replace(
282+
tzinfo=reference_time.tzinfo
283+
)
284+
285+
# Validate that creation_time is on or after midnight of the reference day
286+
if isinstance(creation_time, datetime) and creation_time < midnight_of_reference_day:
287+
raise ValueError(
288+
f"Creation time ({creation_time}) "
289+
f"must be on or after midnight of the reference day ({midnight_of_reference_day})"
290+
)

0 commit comments

Comments
 (0)