materialsproject · SilenNaihin · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025
diff --git a/mpcontribs-lux/mpcontribs/lux/projects/alab/pipelines/README.md b/mpcontribs-lux/mpcontribs/lux/projects/alab/pipelines/README.md
@@ -0,0 +1,13 @@
+# A-Lab Pipelines
+
+Pipeline code for the A-Lab project is maintained in a separate repository:
+
+**Repository:** [alab-pipeline](https://github.com/SilenNaihin/alab-pipeline.git)
+
+## Getting Started
+
+To clone the pipeline repository:
+
+```bash
+git clone https://github.com/SilenNaihin/alab-pipeline.git
+```
diff --git a/mpcontribs-lux/mpcontribs/lux/projects/alab/schemas/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/alab/schemas/__init__.py
@@ -0,0 +1,36 @@
+"""
+A-Lab Pydantic Schemas
+
+This package contains Pydantic schemas for all A-Lab parquet tables.
+These schemas are the source of truth for data validation.
+
+Each schema corresponds to one parquet file.
+Integrates team's validation patterns (constraints, Literal types) from results_schema.py.
+"""
+
+from .base import ExcludeFromUpload
+from .experiments import Experiment
+from .experiment_elements import ExperimentElement
+from .powder_doses import PowderDose, PowderItem
+from .temperature_logs import TemperatureLog, TemperatureLogEntry
+from .workflow_tasks import WorkflowTask
+from .xrd_data_points import XRDDataPoint
+from .xrd_refinements import XRDRefinement
+from .xrd_phases import XRDPhase
+
+__all__ = [
+    # Base
+    "ExcludeFromUpload",
+    # Parquet table schemas (one per .parquet file)
+    "Experiment",
+    "ExperimentElement",
+    "PowderDose",
+    "PowderItem",
+    "TemperatureLogEntry",
+    "TemperatureLog",
+    "WorkflowTask",
+    "XRDDataPoint",
+    "XRDRefinement",
+    "XRDPhase",
+]
+
diff --git a/mpcontribs-lux/mpcontribs/lux/projects/alab/schemas/base.py b/mpcontribs-lux/mpcontribs/lux/projects/alab/schemas/base.py
@@ -0,0 +1,69 @@
+"""
+Base utilities for A-Lab Pydantic schemas.
+
+Provides common types, validators, and field utilities.
+"""
+
+from typing import Any, Dict
+from pydantic import Field
+
+
+def ExcludeFromUpload(
+    default: Any = None,
+    description: str = "",
+    **kwargs
+) -> Any:
+    """
+    Field that should NOT be uploaded to MPContribs.
+
+    Use this for sensitive data that must remain private until publication.
+    Examples: weight_collected, mass measurements that are embargoed.
+
+    Usage:
+        weight_collected: float | None = ExcludeFromUpload(
+            description="Weight of powder collected (embargoed)"
+        )
+    """
+    return Field(
+        default=default,
+        description=description,
+        json_schema_extra={"exclude_from_upload": True},
+        **kwargs
+    )
+
+
+def get_uploadable_fields(model_class) -> list[str]:
+    """
+    Get list of fields that should be uploaded to MPContribs.
+
+    Args:
+        model_class: Pydantic model class
+
+    Returns:
+        List of field names that are NOT marked with exclude_from_upload
+    """
+    uploadable = []
+    for field_name, field_info in model_class.model_fields.items():
+        extra = field_info.json_schema_extra or {}
+        if not extra.get("exclude_from_upload", False):
+            uploadable.append(field_name)
+    return uploadable
+
+
+def get_excluded_fields(model_class) -> list[str]:
+    """
+    Get list of fields that should NOT be uploaded to MPContribs.
+
+    Args:
+        model_class: Pydantic model class
+
+    Returns:
+        List of field names that ARE marked with exclude_from_upload
+    """
+    excluded = []
+    for field_name, field_info in model_class.model_fields.items():
+        extra = field_info.json_schema_extra or {}
+        if extra.get("exclude_from_upload", False):
+            excluded.append(field_name)
+    return excluded
+
diff --git a/mpcontribs-lux/mpcontribs/lux/projects/alab/schemas/experiment_elements.py b/mpcontribs-lux/mpcontribs/lux/projects/alab/schemas/experiment_elements.py
@@ -0,0 +1,30 @@
+"""
+Experiment Elements Schema
+
+Elements present in each experiment (1:N relationship).
+Maps to: experiment_elements.parquet
+"""
+
+from pydantic import BaseModel, Field
+
+
+class ExperimentElement(BaseModel, extra="forbid"):
+    """
+    Element present in an experiment.
+
+    Each experiment can have multiple elements (1:N relationship).
+    """
+
+    experiment_id: str = Field(
+        description="Reference to parent experiment"
+    )
+
+    element_symbol: str = Field(
+        description="Element symbol (e.g., Na, Mg, O)"
+    )
+
+    target_atomic_percent: float | None = Field(
+        default=None,
+        description="Target atomic percentage of this element"
+    )
+
diff --git a/mpcontribs-lux/mpcontribs/lux/projects/alab/schemas/experiments.py b/mpcontribs-lux/mpcontribs/lux/projects/alab/schemas/experiments.py
@@ -0,0 +1,227 @@
+"""
+Experiment Schema (Consolidated)
+
+This is the main experiment table with ALL 1:1 data merged.
+Contains ~45 columns from: experiments + heating + recovery + xrd + finalization + dosing.
+
+Maps to: experiments.parquet
+"""
+
+from datetime import datetime
+from typing import Literal
+from pydantic import BaseModel, Field
+
+# Import ExcludeFromUpload utility
+try:
+    from .base import ExcludeFromUpload
+except ImportError:
+    # When imported dynamically, use absolute import
+    import sys
+    from pathlib import Path
+    sys.path.insert(0, str(Path(__file__).parent))
+    from base import ExcludeFromUpload
+
+
+class Experiment(BaseModel, extra="forbid"):
+    """
+    Main experiment schema with all 1:1 data consolidated.
+
+    This is the primary table - one row per experiment.
+    All related 1:1 data (heating, recovery, xrd, finalization, dosing) is merged here.
+    """
+
+    # === Core experiment fields ===
+    experiment_id: str = Field(
+        description="Unique experiment identifier (MongoDB _id)"
+    )
+
+    name: str = Field(
+        description="Experiment name (e.g., NSC_249, MINES_12)"
+    )
+
+    experiment_type: str = Field(
+        description="Root experiment type (NSC, Na, PG, MINES, TRI)"
+    )
+
+    experiment_subgroup: str | None = Field(
+        default=None,
+        description="Experiment subgroup (e.g., NSC_249, Na_123)"
+    )
+
+    target_formula: str = Field(
+        description="Target chemical formula"
+    )
+
+    last_updated: datetime = Field(
+        description="Last modification timestamp"
+    )
+
+    status: Literal["completed", "error", "active", "unknown"] = Field(
+        description="Workflow status"
+    )
+
+    notes: str | None = Field(
+        default=None,
+        description="Optional notes about the experiment"
+    )
+
+    # === Heating fields (prefix: heating_) ===
+    heating_method: Literal["standard", "atmosphere", "manual", "none"] | None = Field(
+        default=None,
+        description="Heating method used"
+    )
+
+    heating_temperature: float | None = Field(
+        default=None,
+        description="Target heating temperature in °C"
+    )
+
+    heating_time: float | None = Field(
+        default=None,
+        description="Heating duration in minutes"
+    )
+
+    heating_cooling_rate: float | None = Field(
+        default=None,
+        description="Cooling rate in °C/min"
+    )
+
+    heating_atmosphere: str | None = Field(
+        default=None,
+        description="Atmosphere used during heating (e.g., N2, Ar, Air)"
+    )
+
+    heating_flow_rate_ml_min: float | None = Field(
+        default=None,
+        description="Gas flow rate during heating in mL/min"
+    )
+
+    heating_low_temp_calcination: bool | None = Field(
+        default=None,
+        description="Whether low temperature calcination was used"
+    )
+
+    # === Recovery fields (prefix: recovery_) ===
+    recovery_total_dosed_mass_mg: float | None = Field(
+        default=None,
+        description="Total mass of all powders dosed in mg"
+    )
+
+    # EXCLUDED FROM UPLOAD per team request
+    recovery_weight_collected_mg: float | None = ExcludeFromUpload(
+        description="Weight of powder collected after heating in mg (EMBARGOED)"
+    )
+
+    recovery_yield_percent: float | None = Field(
+        default=None,
+        description="Recovery yield (collected / dosed * 100)"
+    )
+
+    recovery_initial_crucible_weight_mg: float | None = Field(
+        default=None,
+        description="Initial crucible weight before experiment in mg"
+    )
+
+    recovery_failure_classification: str | None = Field(
+        default=None,
+        description="Classification of any failure during recovery"
+    )
+
+    # === XRD measurement fields (prefix: xrd_) ===
+    xrd_sampleid_in_aeris: str | None = Field(
+        default=None,
+        description="Sample ID in Aeris XRD system"
+    )
+
+    xrd_holder_index: int | None = Field(
+        default=None,
+        description="XRD sample holder position index"
+    )
+
+    # EXCLUDED FROM UPLOAD per team request
+    xrd_total_mass_dispensed_mg: float | None = ExcludeFromUpload(
+        description="Mass dispensed for XRD measurement in mg (EMBARGOED)"
+    )
+
+    xrd_met_target_mass: bool | None = Field(
+        default=None,
+        description="Whether target mass was achieved for XRD"
+    )
+
+    # === Finalization fields (prefix: finalization_) ===
+    finalization_decoded_sample_id: str | None = Field(
+        default=None,
+        description="Decoded sample ID from barcode"
+    )
+
+    finalization_successful_labeling: bool | None = Field(
+        default=None,
+        description="Whether sample was successfully labeled"
+    )
+
+    finalization_storage_location: str | None = Field(
+        default=None,
+        description="Final storage location of sample"
+    )
+
+    # === Dosing fields (prefix: dosing_) ===
+    dosing_crucible_position: int | None = Field(
+        default=None,
+        description="Crucible position in rack",
+        ge=1,
+        le=4
+    )
+
+    dosing_crucible_sub_rack: Literal["SubRackA", "SubRackB", "SubRackC", "SubRackD"] | None = Field(
+        default=None,
+        description="Sub-rack identifier"
+    )
+
+    dosing_mixing_pot_position: int | None = Field(
+        default=None,
+        description="Mixing pot position",
+        ge=1,
+        le=16
+    )
+
+    dosing_ethanol_dispense_volume: int | None = Field(
+        default=None,
+        description="Volume of ethanol dispensed in µL",
+        ge=0
+    )
+
+    dosing_target_transfer_volume: int | None = Field(
+        default=None,
+        description="Target transfer volume in µL",
+        ge=0
+    )
+
+    dosing_actual_transfer_mass: float | None = Field(
+        default=None,
+        description="Actual mass transferred in g",
+        ge=0
+    )
+
+    dosing_dac_duration: int | None = Field(
+        default=None,
+        description="DAC duration in seconds",
+        ge=0
+    )
+
+    dosing_dac_speed: int | None = Field(
+        default=None,
+        description="DAC rotation speed in rpm",
+        ge=0
+    )
+
+    dosing_actual_heat_duration: int | None = Field(
+        default=None,
+        description="Actual heating duration during dosing in seconds",
+        ge=0
+    )
+
+    dosing_end_reason: str | None = Field(
+        default=None,
+        description="Reason for ending dosing session"
+    )
+