AllenNeuralDynamics
diff --git a/‎docs/source/_static/diagrams/DataDescription.png‎
9.95 KB b/‎docs/source/_static/diagrams/DataDescription.png‎
9.95 KB
diff --git a/‎docs/source/_static/diagrams/Metadata.png‎
5.96 MB b/‎docs/source/_static/diagrams/Metadata.png‎
5.96 MB
diff --git a/‎examples/data_description.json‎
Lines changed: 1 addition & 1 deletion b/‎examples/data_description.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/aind_data_schema/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎src/aind_data_schema/__init__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/aind_data_schema/base.py‎
Lines changed: 1 addition & 1 deletion b/‎src/aind_data_schema/base.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/aind_data_schema/data_description.py‎
Lines changed: 47 additions & 1 deletion b/‎src/aind_data_schema/data_description.py‎
Lines changed: 47 additions & 1 deletion
diff --git a/‎src/aind_data_schema/metadata.py‎
Lines changed: 85 additions & 16 deletions b/‎src/aind_data_schema/metadata.py‎
Lines changed: 85 additions & 16 deletions
diff --git a/‎tests/resources/ephys_data_description/data_description_0.6.2.json‎
Lines changed: 1 addition & 1 deletion b/‎tests/resources/ephys_data_description/data_description_0.6.2.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/resources/ephys_data_description/data_description_0.6.2_wrong_field.json‎
Lines changed: 1 addition & 1 deletion b/‎tests/resources/ephys_data_description/data_description_0.6.2_wrong_field.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_data_description.py‎
Lines changed: 63 additions & 0 deletions b/‎tests/test_data_description.py‎
Lines changed: 63 additions & 0 deletions
@@ -1,6 +1,6 @@
 {
    "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/data_description.py",
-   "schema_version": "0.10.1",
+   "schema_version": "0.10.2",
    "license": "CC-BY-4.0",
    "creation_time": "2022-02-21T16:30:01",
    "name": "ecephys_12345_2022-02-21_16-30-01",
 
@@ -1,7 +1,7 @@
 """ imports for AindModel subclasses
 """
 
-__version__ = "0.15.18"
+__version__ = "0.15.20"
 
 
 from .behavior.behavior_rig import BehaviorRig
@@ -12,6 +12,7 @@
 from .imaging.acquisition import Acquisition, Axis
 from .imaging.instrument import Instrument
 from .imaging.mri_session import MriSession
+from .metadata import Metadata
 from .ophys.ophys_rig import OphysRig
 from .ophys.ophys_session import OphysSession
 from .procedures import Procedures
@@ -44,4 +45,5 @@
     "MriSession",
     "Rig",
     "Session",
+    "Metadata",
 ]
@@ -154,7 +154,7 @@ def write_standard_file(self, output_directory: Optional[Path] = None, prefix=No
             filename = output_directory / filename
 
         with open(filename, "w") as f:
-            f.write(self.json(indent=3))
+            f.write(self.json(indent=3, by_alias=True))
 
 
 class _TypeEnumSubset(object):
 
@@ -31,7 +31,12 @@ class DataRegex(Enum):
         f"^(?P<input>.+?_{RegexParts.DATE.value}_{RegexParts.TIME.value})_(?P<process_name>.+?)_(?P<c_date>"
         f"{RegexParts.DATE.value})_(?P<c_time>{RegexParts.TIME.value})"
     )
+    ANALYZED = (
+        f"^(?P<project_abbreviation>.+?)_(?P<analysis_name>.+?)_(?P<c_date>"
+        f"{RegexParts.DATE.value})_(?P<c_time>{RegexParts.TIME.value})$"
+    )
     NO_UNDERSCORES = "^[^_]+$"
+    NO_SPECIAL_CHARS = '^[^<>:;"/|? \\_]+$'
 
 
 class DataLevel(Enum):
@@ -198,7 +203,7 @@ class RelatedData(AindModel):
 class DataDescription(AindCoreModel):
     """Description of a logical collection of data files"""
 
-    schema_version: str = Field("0.10.1", title="Schema Version", const=True)
+    schema_version: str = Field("0.10.2", title="Schema Version", const=True)
     license: str = Field("CC-BY-4.0", title="License", const=True)
 
     creation_time: datetime = Field(
@@ -244,6 +249,7 @@ class DataDescription(AindCoreModel):
     )
     project_name: Optional[str] = Field(
         None,
+        regex=DataRegex.NO_SPECIAL_CHARS.value,
         description="A name for a set of coordinated activities intended to achieve one or more objectives.",
         title="Project Name",
     )
@@ -459,3 +465,43 @@ def parse_name(cls, name):
             subject_id=m.group("subject_id"),
             creation_time=creation_time,
         )
+
+
+class AnalysisDescription(DataDescription):
+    """A collection of data files as analyzed from an asset"""
+
+    data_level: DataLevel = Field(
+        DataLevel.DERIVED, description="Level of processing that data has undergone", title="Data Level", const=True
+    )
+    project_name: str = Field(
+        ...,
+        regex=DataRegex.NO_SPECIAL_CHARS.value,
+        description="Name of the project the analysis belongs to",
+        title="Project name",
+    )
+    analysis_name: str = Field(
+        ..., regex=DataRegex.NO_SPECIAL_CHARS.value, description="Name of the analysis performed", title="Analysis name"
+    )
+
+    @property
+    def label(self):
+        """returns the label of the file"""
+
+        return f"{self.project_name}_{self.analysis_name}"
+
+    @classmethod
+    def parse_name(cls, name):
+        """Decompose raw Analysis name into component parts"""
+
+        m = re.match(f"{DataRegex.ANALYZED.value}", name)
+
+        if m is None:
+            raise ValueError(f"name({name}) does not match pattern")
+
+        creation_time = datetime_from_name_string(m.group("c_date"), m.group("c_time"))
+
+        return dict(
+            project_abbreviation=m.group("project_abbreviation"),
+            analysis_name=m.group("analysis_name"),
+            creation_time=creation_time,
+        )
@@ -2,11 +2,19 @@
 
 from datetime import datetime
 from enum import Enum
-from typing import Dict, List
+from typing import Dict, List, Optional
+from uuid import UUID, uuid4
 
-from pydantic import Extra, Field
+from pydantic import Field, root_validator, validate_model
 
 from aind_data_schema.base import AindCoreModel
+from aind_data_schema.data_description import DataDescription
+from aind_data_schema.imaging.acquisition import Acquisition
+from aind_data_schema.imaging.instrument import Instrument
+from aind_data_schema.procedures import Procedures
+from aind_data_schema.processing import Processing
+from aind_data_schema.rig import Rig
+from aind_data_schema.session import Session
 from aind_data_schema.subject import Subject
 
 
@@ -29,8 +37,10 @@ class Metadata(AindCoreModel):
     """The records in the Data Asset Collection needs to contain certain fields
     to easily query and index the data."""
 
-    id: str = Field(
-        ...,
+    schema_version: str = Field("0.0.2", description="schema version", title="Version", const=True)
+
+    id: UUID = Field(
+        default_factory=uuid4,
         alias="_id",
         title="Data Asset ID",
         description="The unique id of the data asset.",
@@ -40,31 +50,90 @@ class Metadata(AindCoreModel):
         description="Name of the data asset.",
         title="Data Asset Name",
     )
+    # We'll set created and last_modified defaults using the root_validator
+    # to ensure they're synced on creation
     created: datetime = Field(
-        ...,
+        default_factory=datetime.utcnow,
         title="Created",
-        description="The data and time the data asset created.",
+        description="The utc date and time the data asset created.",
     )
     last_modified: datetime = Field(
-        ..., title="Last Modified", description="The date and time that the data asset was last modified."
+        default_factory=datetime.utcnow,
+        title="Last Modified",
+        description="The utc date and time that the data asset was last modified.",
     )
     location: str = Field(
         ...,
         title="Location",
         description="Current location of the data asset.",
     )
-    metadata_status: MetadataStatus = Field(..., title=" Metadata Status", description="The status of the metadata.")
-    schema_version: str = Field("0.0.3", title="Schema Version", const=True)
+    metadata_status: MetadataStatus = Field(
+        default=MetadataStatus.UNKNOWN, title=" Metadata Status", description="The status of the metadata."
+    )
     external_links: List[Dict[ExternalPlatforms, str]] = Field(
-        ..., title="External Links", description="Links to the data asset on different platforms."
+        default=[], title="External Links", description="Links to the data asset on different platforms."
     )
-    subject: Subject = Field(
-        ...,
+    # We can make the AindCoreModel fields optional for now and do more
+    # granular validations using validators. We may have some older data
+    # assets in S3 that don't have metadata attached. We'd still like to
+    # index that data, but we can flag those instances as MISSING or UNKNOWN
+    subject: Optional[Subject] = Field(
+        None,
         title="Subject",
-        description="Description of a subject of data collection.",
+        description="Subject of data collection.",
     )
+    data_description: Optional[DataDescription] = Field(
+        None, title="Data Description", description="A logical collection of data files."
+    )
+    procedures: Optional[Procedures] = Field(
+        None, title="Procedures", description="All procedures performed on a subject."
+    )
+    session: Optional[Session] = Field(None, title="Session", description="Description of a session.")
+    rig: Optional[Rig] = Field(None, title="Rig", description="Rig.")
+    processing: Optional[Processing] = Field(None, title="Processing", description="All processes run on data.")
+    acquisition: Optional[Acquisition] = Field(None, title="Acquisition", description="Imaging acquisition session")
+    instrument: Optional[Instrument] = Field(
+        None, title="Instrument", description="Instrument, which is a collection of devices"
+    )
+
+    @root_validator(pre=False)
+    def validate_metadata(cls, values):
+        """Validator for metadata"""
 
-    class Config:
-        """Need to allow for additional fields to append to base model"""
+        # There's a simpler way to do this if we drop support for py37
+        all_model_fields = []
+        for field_name in cls.__fields__:
+            field_to_check = cls.__fields__[field_name]
+            try:
+                if issubclass(field_to_check.type_, AindCoreModel):
+                    all_model_fields.append(field_to_check)
+            except TypeError:
+                # Type errors in python3.7 when using issubclass on type
+                # generics
+                pass
 
-        extra = Extra.allow
+        # For each model field, check that is present and check if the model
+        # is valid. If it isn't valid, still add it, but mark MetadataStatus
+        # as INVALID
+        metadata_status = MetadataStatus.VALID
+        for model_field in all_model_fields:
+            model_class = model_field.type_
+            model_name = model_field.name
+            if values.get(model_name) is not None:
+                model = values[model_name]
+                # Since pre=False, the dictionaries get converted to models
+                # upstream
+                model_contents = model.dict()
+                *_, validation_error = validate_model(model_class, model_contents)
+                if validation_error:
+                    model_instance = model_class.construct(**model_contents)
+                    metadata_status = MetadataStatus.INVALID
+                else:
+                    model_instance = model_class(**model_contents)
+                values[model_name] = model_instance
+        # For certain required fields, like subject, if they are not present,
+        # mark the metadata record as missing
+        if values.get("subject") is None:
+            metadata_status = MetadataStatus.MISSING
+        values["metadata_status"] = metadata_status
+        return values
@@ -33,7 +33,7 @@
     "ror_id": "04szwah67",
     "data_level": "raw",
     "group": "ephys",
-    "project_name": "MRI-Guided Elecrophysiology",
+    "project_name": "mri-guided-electrophysiology",
     "experiment_type": "ecephys",
     "subject_id": "661279",
     "data_summary": "This dataset was collected to evaluate the accuracy and feasibility of the AIND MRI-guided insertion pipeline. One probe targets the retinotopic center of LGN, with drifting grating for receptive field mapping to evaluate targeting. Other targets can be evaluated in histology."
 
@@ -33,7 +33,7 @@
     "ror_id": "04szwah67",
     "data_level": "raw",
     "group": "ephys",
-    "project_name": "MRI-Guided Elecrophysiology",
+    "project_name": "mri-guided-electrophysiology",
     "experiment_type": "ecephys",
     "subject_id": "661279",
     "data_summary": "This dataset was collected to evaluate the accuracy and feasibility of the AIND MRI-guided insertion pipeline. One probe targets the retinotopic center of LGN, with drifting grating for receptive field mapping to evaluate targeting. Other targets can be evaluated in histology."
 
@@ -8,6 +8,7 @@
 from typing import List
 
 from aind_data_schema.data_description import (
+    AnalysisDescription,
     DataDescription,
     DerivedDataDescription,
     Funding,
@@ -38,6 +39,7 @@ def setUpClass(cls):
     BAD_NAME = "fizzbuzz"
     BASIC_NAME = "ecephys_1234_3033-12-21_04-22-11"
     DERIVED_NAME = "ecephys_1234_3033-12-21_04-22-11_spikesorted-ks25_2022-10-12_23-23-11"
+    ANALYSIS_NAME = "project_analysis_3033-12-21_04-22-11"
 
     def test_constructors(self):
         """test building from component parts"""
@@ -120,6 +122,59 @@ def test_constructors(self):
                 investigators=["Jane Smith"],
             )
 
+        ad = AnalysisDescription(
+            analysis_name="analysis",
+            project_name="project",
+            creation_time=dt,
+            subject_id="1234",
+            modality=[Modality.SPIM],
+            platform="exaspim",
+            institution=Institution.AIND,
+            funding_source=[f],
+            investigators=["Jane Smith"],
+        )
+
+        self.assertEqual(ad.label, "project_analysis")
+
+        with self.assertRaises(ValueError):
+            AnalysisDescription(
+                analysis_name="ana lysis",
+                project_name="pro_ject",
+                subject_id="1234",
+                modality=[Modality.SPIM],
+                platform="exaspim",
+                creation_time=dt,
+                institution=Institution.AIND,
+                funding_source=[f],
+                investigators=["Jane Smith"],
+            )
+
+        with self.assertRaises(ValueError):
+            AnalysisDescription(
+                analysis_name="",
+                project_name="project",
+                subject_id="1234",
+                modality=[Modality.SPIM],
+                platform="exaspim",
+                creation_time=dt,
+                institution=Institution.AIND,
+                funding_source=[f],
+                investigators=["Jane Smith"],
+            )
+
+        with self.assertRaises(ValueError):
+            AnalysisDescription(
+                analysis_name="analysis",
+                project_name="",
+                subject_id="1234",
+                modality=[Modality.SPIM],
+                platform="exaspim",
+                creation_time=dt,
+                institution=Institution.AIND,
+                funding_source=[f],
+                investigators=["Jane Smith"],
+            )
+
     def test_round_trip(self):
         """make sure we can round trip from json"""
 
@@ -167,6 +222,14 @@ def test_parse_name(self):
         with self.assertRaises(ValueError):
             toks = DerivedDataDescription.parse_name(self.BAD_NAME)
 
+        toks = AnalysisDescription.parse_name(self.ANALYSIS_NAME)
+        assert toks["project_abbreviation"] == "project"
+        assert toks["analysis_name"] == "analysis"
+        assert toks["creation_time"] == datetime.datetime(3033, 12, 21, 4, 22, 11)
+
+        with self.assertRaises(ValueError):
+            toks = AnalysisDescription.parse_name(self.BAD_NAME)
+
     def test_abbreviation_enums(self):
         """Tests that BaseName enums can be constructed from abbreviations"""
         # Tests that Modality constructed as expected
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/data_description.py",`
`3`		`- "schema_version": "0.10.1",`
	`3`	`+ "schema_version": "0.10.2",`
`4`	`4`	`"license": "CC-BY-4.0",`
`5`	`5`	`"creation_time": "2022-02-21T16:30:01",`
`6`	`6`	`"name": "ecephys_12345_2022-02-21_16-30-01",`