datajoint
diff --git a/‎README.md‎
Lines changed: 14 additions & 9 deletions b/‎README.md‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎src/datajoint/codecs.py‎
Lines changed: 74 additions & 0 deletions b/‎src/datajoint/codecs.py‎
Lines changed: 74 additions & 0 deletions
@@ -1,16 +1,21 @@
 # DataJoint for Python
 
-DataJoint is an open-source Python framework for building scientific data pipelines.
-It implements the **Relational Workflow Model**—a paradigm that extends relational
-databases with native support for computational workflows.
+DataJoint is a framework for scientific data pipelines that introduces the **Relational Workflow Model**—a paradigm where your database schema is an executable specification of your workflow.
 
-**Key Features:**
+Traditional databases store data but don't understand how it was computed. DataJoint extends relational databases with native workflow semantics:
 
-- **Declarative schema design** — Define tables and relationships in Python
-- **Automatic dependency tracking** — Foreign keys encode workflow dependencies
-- **Built-in computation** — Imported and Computed tables run automatically
-- **Data integrity** — Referential integrity and transaction support
-- **Reproducibility** — Immutable data with full provenance
+- **Tables represent workflow steps** — Each table is a step in your pipeline where entities are created
+- **Foreign keys encode dependencies** — Parent tables must be populated before child tables
+- **Computations are declarative** — Define *what* to compute; DataJoint determines *when* and tracks *what's done*
+- **Results are immutable** — Computed results preserve full provenance and reproducibility
+
+### Object-Augmented Schemas
+
+Scientific data includes both structured metadata and large data objects (time series, images, movies, neural recordings, gene sequences). DataJoint solves this with **Object-Augmented Schemas (OAS)**—a unified architecture where relational tables and object storage are managed as one system with identical guarantees for integrity, transactions, and lifecycle.
+
+### DataJoint 2.0
+
+**DataJoint 2.0** solidifies these core concepts with a modernized API, improved type system, and enhanced object storage integration. Existing users can refer to the [Migration Guide](https://docs.datajoint.com/migration/) for upgrading from earlier versions.
 
 **Documentation:** https://docs.datajoint.com
 
 
@@ -502,6 +502,80 @@ def lookup_codec(codec_spec: str) -> tuple[Codec, str | None]:
     raise DataJointError(f"Codec <{type_name}> is not registered. " "Define a Codec subclass with name='{type_name}'.")
 
 
+# =============================================================================
+# Decode Helper
+# =============================================================================
+
+
+def decode_attribute(attr, data, squeeze: bool = False):
+    """
+    Decode raw database value using attribute's codec or native type handling.
+
+    This is the central decode function used by all fetch methods. It handles:
+    - Codec chains (e.g., <blob@store> → <hash> → bytes)
+    - Native type conversions (JSON, UUID)
+    - External storage downloads (via config["download_path"])
+
+    Args:
+        attr: Attribute from the table's heading.
+        data: Raw value fetched from the database.
+        squeeze: If True, remove singleton dimensions from numpy arrays.
+
+    Returns:
+        Decoded Python value.
+    """
+    import json
+    import uuid as uuid_module
+
+    import numpy as np
+
+    if data is None:
+        return None
+
+    if attr.codec:
+        # Get store if present for external storage
+        store = getattr(attr, "store", None)
+        if store is not None:
+            dtype_spec = f"<{attr.codec.name}@{store}>"
+        else:
+            dtype_spec = f"<{attr.codec.name}>"
+
+        final_dtype, type_chain, _ = resolve_dtype(dtype_spec)
+
+        # Process the final storage type (what's in the database)
+        if final_dtype.lower() == "json":
+            data = json.loads(data)
+        elif final_dtype.lower() in ("longblob", "blob", "mediumblob", "tinyblob"):
+            pass  # Blob data is already bytes
+        elif final_dtype.lower() == "binary(16)":
+            data = uuid_module.UUID(bytes=data)
+
+        # Apply decoders in reverse order: innermost first, then outermost
+        for codec in reversed(type_chain):
+            data = codec.decode(data, key=None)
+
+        # Squeeze arrays if requested
+        if squeeze and isinstance(data, np.ndarray):
+            data = data.squeeze()
+
+        return data
+
+    # No codec - handle native types
+    if attr.json:
+        return json.loads(data)
+
+    if attr.uuid:
+        import uuid as uuid_module
+
+        return uuid_module.UUID(bytes=data)
+
+    if attr.is_blob:
+        return data  # Raw bytes
+
+    # Native types - pass through unchanged
+    return data
+
+
 # =============================================================================
 # Auto-register built-in codecs
 # =============================================================================