Skip to content

Commit 35eb87b

Browse files
authored
Merge pull request #535 from AllenNeuralDynamics/feat-500-add-AnalysisDescription
Feat 500 add Analysis Description
2 parents 162eabf + ea776fe commit 35eb87b

File tree

7 files changed

+115
-9
lines changed

7 files changed

+115
-9
lines changed

examples/data_description.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/data_description.py",
3-
"schema_version": "0.10.1",
3+
"schema_version": "0.10.2",
44
"license": "CC-BY-4.0",
55
"creation_time": "2022-02-21T16:30:01",
66
"name": "ecephys_12345_2022-02-21_16-30-01",

src/aind_data_schema/data_description.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@ class DataRegex(Enum):
3131
f"^(?P<input>.+?_{RegexParts.DATE.value}_{RegexParts.TIME.value})_(?P<process_name>.+?)_(?P<c_date>"
3232
f"{RegexParts.DATE.value})_(?P<c_time>{RegexParts.TIME.value})"
3333
)
34+
ANALYZED = (
35+
f"^(?P<project_abbreviation>.+?)_(?P<analysis_name>.+?)_(?P<c_date>"
36+
f"{RegexParts.DATE.value})_(?P<c_time>{RegexParts.TIME.value})$"
37+
)
3438
NO_UNDERSCORES = "^[^_]+$"
39+
NO_SPECIAL_CHARS = '^[^<>:;"/|? \\_]+$'
3540

3641

3742
class DataLevel(Enum):
@@ -198,7 +203,7 @@ class RelatedData(AindModel):
198203
class DataDescription(AindCoreModel):
199204
"""Description of a logical collection of data files"""
200205

201-
schema_version: str = Field("0.10.1", title="Schema Version", const=True)
206+
schema_version: str = Field("0.10.2", title="Schema Version", const=True)
202207
license: str = Field("CC-BY-4.0", title="License", const=True)
203208

204209
creation_time: datetime = Field(
@@ -244,6 +249,7 @@ class DataDescription(AindCoreModel):
244249
)
245250
project_name: Optional[str] = Field(
246251
None,
252+
regex=DataRegex.NO_SPECIAL_CHARS.value,
247253
description="A name for a set of coordinated activities intended to achieve one or more objectives.",
248254
title="Project Name",
249255
)
@@ -459,3 +465,43 @@ def parse_name(cls, name):
459465
subject_id=m.group("subject_id"),
460466
creation_time=creation_time,
461467
)
468+
469+
470+
class AnalysisDescription(DataDescription):
471+
"""A collection of data files as analyzed from an asset"""
472+
473+
data_level: DataLevel = Field(
474+
DataLevel.DERIVED, description="Level of processing that data has undergone", title="Data Level", const=True
475+
)
476+
project_name: str = Field(
477+
...,
478+
regex=DataRegex.NO_SPECIAL_CHARS.value,
479+
description="Name of the project the analysis belongs to",
480+
title="Project name",
481+
)
482+
analysis_name: str = Field(
483+
..., regex=DataRegex.NO_SPECIAL_CHARS.value, description="Name of the analysis performed", title="Analysis name"
484+
)
485+
486+
@property
487+
def label(self):
488+
"""returns the label of the file"""
489+
490+
return f"{self.project_name}_{self.analysis_name}"
491+
492+
@classmethod
493+
def parse_name(cls, name):
494+
"""Decompose raw Analysis name into component parts"""
495+
496+
m = re.match(f"{DataRegex.ANALYZED.value}", name)
497+
498+
if m is None:
499+
raise ValueError(f"name({name}) does not match pattern")
500+
501+
creation_time = datetime_from_name_string(m.group("c_date"), m.group("c_time"))
502+
503+
return dict(
504+
project_abbreviation=m.group("project_abbreviation"),
505+
analysis_name=m.group("analysis_name"),
506+
creation_time=creation_time,
507+
)

tests/resources/ephys_data_description/data_description_0.6.2.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"ror_id": "04szwah67",
3434
"data_level": "raw",
3535
"group": "ephys",
36-
"project_name": "MRI-Guided Elecrophysiology",
36+
"project_name": "mri-guided-electrophysiology",
3737
"experiment_type": "ecephys",
3838
"subject_id": "661279",
3939
"data_summary": "This dataset was collected to evaluate the accuracy and feasibility of the AIND MRI-guided insertion pipeline. One probe targets the retinotopic center of LGN, with drifting grating for receptive field mapping to evaluate targeting. Other targets can be evaluated in histology."

tests/resources/ephys_data_description/data_description_0.6.2_wrong_field.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"ror_id": "04szwah67",
3434
"data_level": "raw",
3535
"group": "ephys",
36-
"project_name": "MRI-Guided Elecrophysiology",
36+
"project_name": "mri-guided-electrophysiology",
3737
"experiment_type": "ecephys",
3838
"subject_id": "661279",
3939
"data_summary": "This dataset was collected to evaluate the accuracy and feasibility of the AIND MRI-guided insertion pipeline. One probe targets the retinotopic center of LGN, with drifting grating for receptive field mapping to evaluate targeting. Other targets can be evaluated in histology."

tests/test_data_description.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import List
99

1010
from aind_data_schema.data_description import (
11+
AnalysisDescription,
1112
DataDescription,
1213
DerivedDataDescription,
1314
Funding,
@@ -38,6 +39,7 @@ def setUpClass(cls):
3839
BAD_NAME = "fizzbuzz"
3940
BASIC_NAME = "ecephys_1234_3033-12-21_04-22-11"
4041
DERIVED_NAME = "ecephys_1234_3033-12-21_04-22-11_spikesorted-ks25_2022-10-12_23-23-11"
42+
ANALYSIS_NAME = "project_analysis_3033-12-21_04-22-11"
4143

4244
def test_constructors(self):
4345
"""test building from component parts"""
@@ -120,6 +122,59 @@ def test_constructors(self):
120122
investigators=["Jane Smith"],
121123
)
122124

125+
ad = AnalysisDescription(
126+
analysis_name="analysis",
127+
project_name="project",
128+
creation_time=dt,
129+
subject_id="1234",
130+
modality=[Modality.SPIM],
131+
platform="exaspim",
132+
institution=Institution.AIND,
133+
funding_source=[f],
134+
investigators=["Jane Smith"],
135+
)
136+
137+
self.assertEqual(ad.label, "project_analysis")
138+
139+
with self.assertRaises(ValueError):
140+
AnalysisDescription(
141+
analysis_name="ana lysis",
142+
project_name="pro_ject",
143+
subject_id="1234",
144+
modality=[Modality.SPIM],
145+
platform="exaspim",
146+
creation_time=dt,
147+
institution=Institution.AIND,
148+
funding_source=[f],
149+
investigators=["Jane Smith"],
150+
)
151+
152+
with self.assertRaises(ValueError):
153+
AnalysisDescription(
154+
analysis_name="",
155+
project_name="project",
156+
subject_id="1234",
157+
modality=[Modality.SPIM],
158+
platform="exaspim",
159+
creation_time=dt,
160+
institution=Institution.AIND,
161+
funding_source=[f],
162+
investigators=["Jane Smith"],
163+
)
164+
165+
with self.assertRaises(ValueError):
166+
AnalysisDescription(
167+
analysis_name="analysis",
168+
project_name="",
169+
subject_id="1234",
170+
modality=[Modality.SPIM],
171+
platform="exaspim",
172+
creation_time=dt,
173+
institution=Institution.AIND,
174+
funding_source=[f],
175+
investigators=["Jane Smith"],
176+
)
177+
123178
def test_round_trip(self):
124179
"""make sure we can round trip from json"""
125180

@@ -167,6 +222,14 @@ def test_parse_name(self):
167222
with self.assertRaises(ValueError):
168223
toks = DerivedDataDescription.parse_name(self.BAD_NAME)
169224

225+
toks = AnalysisDescription.parse_name(self.ANALYSIS_NAME)
226+
assert toks["project_abbreviation"] == "project"
227+
assert toks["analysis_name"] == "analysis"
228+
assert toks["creation_time"] == datetime.datetime(3033, 12, 21, 4, 22, 11)
229+
230+
with self.assertRaises(ValueError):
231+
toks = AnalysisDescription.parse_name(self.BAD_NAME)
232+
170233
def test_abbreviation_enums(self):
171234
"""Tests that BaseName enums can be constructed from abbreviations"""
172235
# Tests that Modality constructed as expected

tests/test_processing.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,6 @@ def test_constructors(self):
2121
processing_pipeline=PipelineProcess(processor_full_name="Processor", data_processes=[]),
2222
)
2323

24-
with self.assertRaises(pydantic.ValidationError):
25-
DataProcess(name="Other")
26-
2724
with self.assertRaises(pydantic.ValidationError):
2825
DataProcess(name="Other", notes="")
2926

tests/test_schema_upgrade.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def test_upgrades_0_6_2(self):
205205
self.assertEqual(DataLevel.RAW, new_data_description.data_level)
206206
self.assertEqual(Group.EPHYS, new_data_description.group)
207207
self.assertEqual(["John Doe", "Mary Smith"], new_data_description.investigators)
208-
self.assertEqual("MRI-Guided Elecrophysiology", new_data_description.project_name)
208+
self.assertEqual("mri-guided-electrophysiology", new_data_description.project_name)
209209
self.assertIsNone(new_data_description.restrictions)
210210
self.assertEqual([Modality.ECEPHYS], new_data_description.modality)
211211
self.assertEqual("661279", new_data_description.subject_id)
@@ -265,7 +265,7 @@ def test_upgrades_0_6_2_wrong_field(self):
265265
self.assertEqual(DataLevel.RAW, new_data_description.data_level)
266266
self.assertEqual(Group.EPHYS, new_data_description.group)
267267
self.assertEqual(["John Doe", "Mary Smith"], new_data_description.investigators)
268-
self.assertEqual("MRI-Guided Elecrophysiology", new_data_description.project_name)
268+
self.assertEqual("mri-guided-electrophysiology", new_data_description.project_name)
269269
self.assertIsNone(new_data_description.restrictions)
270270
self.assertEqual([Modality.ECEPHYS], new_data_description.modality)
271271
self.assertEqual("661279", new_data_description.subject_id)

0 commit comments

Comments
 (0)