refactor: change how tags and default_grouping work

dbirman · dbirman · commit eefa5e09af69 · 2025-11-26T20:00:51.000-08:00
diff --git a/examples/quality_control.py b/examples/quality_control.py
@@ -45,7 +45,10 @@
         value=drift_value_with_options,
         reference="ecephys-drift-map",
         status_history=[sp],
-        tags=["Drift map", "Probe A"],
+        tags={
+            "probe": "Probe A",
+            "type": "drift map",
+        }
     ),
     QCMetric(
         name="Probe B drift",
@@ -55,7 +58,10 @@
         value=drift_value_with_flags,
         reference="ecephys-drift-map",
         status_history=[sp],
-        tags=["Drift map", "Probe B"],
+        tags={
+            "probe": "Probe B",
+            "type": "drift map",
+        }
     ),
     QCMetric(
         name="Probe C drift",
@@ -65,16 +71,10 @@
         value="Low",
         reference="ecephys-drift-map",
         status_history=[s],
-        tags=["Drift map", "Probe C"],
-    ),
-    QCMetric(
-        name="Expected frame count",
-        modality=Modality.BEHAVIOR_VIDEOS,
-        stage=Stage.RAW,
-        description="Expected frame count from experiment length, always pass",
-        value=662,
-        status_history=[s],
-        tags=["Frame count checks"],
+        tags={
+            "probe": "Probe C",
+            "type": "drift map",
+        }
     ),
     QCMetric(
         name="Video 1 frame count",
@@ -83,7 +83,10 @@
         description="Pass when frame count matches expected",
         value=662,
         status_history=[s],
-        tags=["Frame count checks", "Video 1"],
+        tags={
+            "video": "Video 1",
+            "type": "Frame count checks",
+        },
     ),
     QCMetric(
         name="Video 2 num frames",
@@ -92,7 +95,10 @@
         description="Pass when frame count matches expected",
         value=662,
         status_history=[s],
-        tags=["Frame count checks", "Video 2"],
+        tags={
+            "video": "Video 2",
+            "type": "Frame count checks",
+        },
     ),
     QCMetric(
         name="ProbeA",
@@ -101,7 +107,10 @@
         description="Pass when probe is present in the recording",
         value=True,
         status_history=[s],
-        tags=["Probes present"],
+        tags={
+            "probe": "Probe A",
+            "type": "Probes present",
+        },
     ),
     QCMetric(
         name="ProbeB",
@@ -110,7 +119,10 @@
         description="Pass when probe is present in the recording",
         value=True,
         status_history=[s],
-        tags=["Probes present"],
+        tags={
+            "probe": "Probe B",
+            "type": "Probes present",
+        },
     ),
     QCMetric(
         name="ProbeC",
@@ -119,14 +131,17 @@
         description="Pass when probe is present in the recording",
         value=True,
         status_history=[s],
-        tags=["Probes present"],
+        tags={
+            "probe": "Probe C",
+            "type": "Probes present",
+        },
     ),
 ]
 
 q = QualityControl(
     metrics=metrics,
-    default_grouping=["Drift map", "Frame count checks", "Probes present"],
-    allow_tag_failures=["Video 2"],  # this will allow the Video 2 metric to fail without failing the entire QC
+    default_grouping=[["probe", "video"], ["type"]],  # in visualizations group probes together and videos together, then group metrics by type
+    allow_tag_failures=["Video 2"],  # allow any metrics with tag video: Video 2 to fail without failing overall QC
 )
 
 if __name__ == "__main__":
diff --git a/src/aind_data_schema/core/quality_control.py b/src/aind_data_schema/core/quality_control.py
@@ -5,7 +5,7 @@
 from typing import Any, List, Literal, Optional, Union
 
 from aind_data_schema_models.modalities import Modality
-from pydantic import Field, SkipValidation, model_validator
+from pydantic import Field, SkipValidation, field_validator, model_validator
 
 from aind_data_schema.base import AwareDatetimeWithDefault, DataCoreModel, DataModel, DiscriminatedList
 from aind_data_schema.utils.merge import merge_notes, merge_optional_list, remove_duplicates
@@ -49,8 +49,9 @@ class QCMetric(DataModel):
     status_history: List[QCStatus] = Field(default=[], title="Metric status history", min_length=1)
     description: Optional[str] = Field(default=None, title="Metric description")
     reference: Optional[str] = Field(default=None, title="Metric reference image URL or plot type")
-    tags: List[str] = Field(
-        default=[], title="Tags", description="Tags group QCMetric objects to allow for grouping and filtering"
+    tags: dict[str, str] = Field(
+        default={}, title="Tags",
+        description="Tags group QCMetric objects. Unique keys define groups of tags, for example {'probe': 'probeA'}."
     )
     evaluated_assets: Optional[List[str]] = Field(
         default=None,
@@ -80,6 +81,28 @@ def validate_multi_asset(self):
             raise ValueError(f"Metric '{self.name}' is a single-asset metric and should not have evaluated_assets")
         return self
 
+    @model_validator(mode="before")
+    @classmethod
+    def fix_tag_lists(cls, self):
+        """Convert tags from list to dict if necessary
+
+        This function is for backwards compatibility with v2.2.X where tags were stored as lists of strings.
+
+        Remove this function in aind-data-schema v3.X
+        """
+        tags = self["tags"]
+        if isinstance(tags, list):
+            # Convert list of strings to dict with string keys
+            if len(tags) == 1:
+                self["tags"] = {
+                    "tag": tags[0],
+                    "name": self["name"],
+                }
+            else:
+                # Unfortunately there is no reasonable way to handle multiple tags, these assets should be re-generated
+                self["tags"] = {f"tag_{i+1}": tag for i, tag in enumerate(tags)}
+        return self
+
 
 class CurationHistory(DataModel):
     """Schema to track curator name and timestamp for curation events"""
@@ -110,15 +133,15 @@ class QualityControl(DataCoreModel):
     )
     notes: Optional[str] = Field(default=None, title="Notes")
 
-    default_grouping: List[str] = Field(
+    default_grouping: List[list[str]] = Field(
         ...,
         title="Default grouping",
-        description="Default tag grouping for this QualityControl object, used in visualizations",
+        description="Tag *keys* that should be used to group metrics hierarchically for visualization",
     )
-    allow_tag_failures: List[str | tuple] = Field(
+    allow_tag_failures: List[str] = Field(
         default=[],
         title="Allow tag failures",
-        description="List of tags that are allowed to fail without failing the overall QC",
+        description="List of tag *values* that are allowed to fail without failing the overall QC",
     )
     status: Optional[dict] = Field(
         default=None,
@@ -257,6 +280,17 @@ def __add__(self, other: "QualityControl") -> "QualityControl":
             allow_tag_failures=combined_allow_tag_failures,
         )
 
+    @field_validator("default_grouping", mode="before")
+    def fix_default_grouping_list(cls, value: dict) -> dict:
+        """Convert default grouping from list of strings to list of list of strings if necessary
+        This function is for backwards compatibility with v2.2.X where default_grouping was stored as a list of strings.
+        Remove this function in aind-data-schema v3.X
+        """
+        if value and len(value) > 0 and isinstance(value[0], str):
+            # Convert list of strings to list of list of strings
+            value = [[tag] for tag in value]
+        return value
+
 
 def _get_status_by_date(metric: QCMetric | CurationMetric, date: datetime) -> Status:
     """Get the status of a metric at a specific date by looking through status_history.