diff --git a/.gitattributes b/.gitattributes
index 235b1a2..a76e4dc 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1 @@
-postprocessing/** linguist-vendored
\ No newline at end of file
+postprocessing/** linguist-vendored
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index cda0ced..7de49f3 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -26,4 +26,4 @@
- Subsystem:
-* **Other information** (e.g. detailed explanation, stacktraces, related issues, suggestions how to fix, links for us to have context, eg. stackoverflow, gitter, etc)
\ No newline at end of file
+* **Other information** (e.g. detailed explanation, stacktraces, related issues, suggestions how to fix, links for us to have context, eg. stackoverflow, gitter, etc)
diff --git a/.github/workflows/check_copyright.yml b/.github/workflows/check_copyright.yml
index 9ed4c48..fdc46fc 100644
--- a/.github/workflows/check_copyright.yml
+++ b/.github/workflows/check_copyright.yml
@@ -10,6 +10,6 @@ jobs:
- name: Check license & copyright headers
uses: viperproject/check-license-header@v2
with:
- path:
+ path:
config: .github/workflows/check_copyright_config.json
- # strict: true
\ No newline at end of file
+ # strict: true
diff --git a/.github/workflows/check_mkdocs_build.yml b/.github/workflows/check_mkdocs_build.yml
index 116f6d4..2f9c08b 100644
--- a/.github/workflows/check_mkdocs_build.yml
+++ b/.github/workflows/check_mkdocs_build.yml
@@ -21,4 +21,4 @@ jobs:
uses: astral-sh/setup-uv@v4
# - run: cp -r examples/ docs/examples/
- run: uv pip install .[docs] --system
- - run: mkdocs build
\ No newline at end of file
+ - run: mkdocs build
diff --git a/.github/workflows/copyright.txt b/.github/workflows/copyright.txt
index 67eb334..084ae79 100644
--- a/.github/workflows/copyright.txt
+++ b/.github/workflows/copyright.txt
@@ -10,4 +10,4 @@
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
diff --git a/.gitignore b/.gitignore
index c6e81a6..fb5db85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,3 +174,5 @@ cython_debug/
*.h5
*.code-workspace
.pre-commit-config.yaml
+_scripts
+.vscode
diff --git a/README.md b/README.md
index 9dc494c..ab3b8b2 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ The design goals are to have:
- Minimizes *a priori* knowledge that is needed of the internal heirarchical structure, reducing friction for users to load data.
- Transparently return both raw and processed data, where the levels of post-processing can be selected by the user.
-To install,
+To install,
```bash
pip install git+https://github.com/OpenQuantumDesign/oqd-dataschema.git
```
diff --git a/docs/api.md b/docs/api.md
deleted file mode 100644
index 1621f49..0000000
--- a/docs/api.md
+++ /dev/null
@@ -1,33 +0,0 @@
-## Datastore
-
-
-::: oqd_dataschema.datastore
- options:
- heading_level: 3
- members: [
- "Datastore",
- ]
-
-
-## Base HDF5 Objects
-
-
-::: oqd_dataschema.base
- options:
- heading_level: 3
- members: [
- "Group",
- "Dataset",
- ]
-
-## Specified Groups
-
-
-::: oqd_dataschema.groups
- options:
- heading_level: 3
- members: [
- "SinaraRawDataGroup",
- "MeasurementOutcomesDataGroup",
- "ExpectationValueDataGroup",
- ]
\ No newline at end of file
diff --git a/docs/api/base.md b/docs/api/base.md
new file mode 100644
index 0000000..33fba53
--- /dev/null
+++ b/docs/api/base.md
@@ -0,0 +1,21 @@
+## Attributes Types
+
+
+::: oqd_dataschema.base
+ options:
+ heading_level: 3
+ members: [
+ "AttrKey",
+ "Attrs",
+ ]
+
+## Data Types
+
+
+::: oqd_dataschema.base
+ options:
+ heading_level: 3
+ members: [
+ "DTypes",
+ "DTypeNames",
+ ]
diff --git a/docs/api/datastore.md b/docs/api/datastore.md
new file mode 100644
index 0000000..c797a9c
--- /dev/null
+++ b/docs/api/datastore.md
@@ -0,0 +1,7 @@
+
+::: oqd_dataschema.datastore
+ options:
+ heading_level: 3
+ members: [
+ "Datastore",
+ ]
diff --git a/docs/api/group.md b/docs/api/group.md
new file mode 100644
index 0000000..1b8ff22
--- /dev/null
+++ b/docs/api/group.md
@@ -0,0 +1,8 @@
+
+::: oqd_dataschema.group
+ options:
+ heading_level: 3
+ members: [
+ "GroupBase",
+ "GroupRegistry",
+ ]
diff --git a/docs/api/groupfield.md b/docs/api/groupfield.md
new file mode 100644
index 0000000..af1319f
--- /dev/null
+++ b/docs/api/groupfield.md
@@ -0,0 +1,53 @@
+
+::: oqd_dataschema.base
+ options:
+ filters: []
+ heading_level: 3
+ members: [
+ "GroupField",
+ ]
+
+## Dataset
+
+
+::: oqd_dataschema.dataset
+ options:
+ heading_level: 3
+ members: [
+ "Dataset",
+ "CastDataset",
+ ]
+
+## Table
+
+
+::: oqd_dataschema.table
+ options:
+ heading_level: 3
+ members: [
+ "Table",
+ "CastTable",
+ ]
+
+## Folder
+
+
+::: oqd_dataschema.folder
+ options:
+ heading_level: 3
+ members: [
+ "Folder",
+ "CastFolder",
+ ]
+
+## Constrained Group Fields
+
+
+::: oqd_dataschema.constrained
+ options:
+ heading_level: 3
+ members: [
+ "condataset",
+ "contable",
+ "confolder",
+ ]
diff --git a/docs/api/utils.md b/docs/api/utils.md
new file mode 100644
index 0000000..cff7d91
--- /dev/null
+++ b/docs/api/utils.md
@@ -0,0 +1,10 @@
+## Dataset
+
+
+::: oqd_dataschema.utils
+ options:
+ heading_level: 3
+ members: [
+ "dict_to_structured",
+ "unstructured_to_structured",
+ ]
diff --git a/docs/explanation.md b/docs/explanation.md
index e69de29..7af1180 100644
--- a/docs/explanation.md
+++ b/docs/explanation.md
@@ -0,0 +1,22 @@
+## Datastore
+
+A [Datastore][oqd_dataschema.datastore.Datastore] represents a HDF5 file of a particular hierarchical structure.
+
+### Hierarchy
+
+```
+/
+├── group1/
+│ └── dataset1
+├── group2/
+│ ├── dataset2
+│ ├── table1
+│ └── folder1
+└── group3/
+ ├── table2
+ └── dataset_dict1/
+ ├── dataset5
+ └── dataset6
+```
+
+The top level of [Datastore][oqd_dataschema.datastore.Datastore] contains multiple [Groups](api/group.md)
diff --git a/docs/index.md b/docs/index.md
index 1355f50..9d7bf57 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,4 +1,4 @@
-#
+#
diff --git a/docs/stylesheets/admonition_template.css b/docs/stylesheets/admonition_template.css
index f32ebeb..94fee5c 100644
--- a/docs/stylesheets/admonition_template.css
+++ b/docs/stylesheets/admonition_template.css
@@ -14,4 +14,4 @@
background-color: #FFFFFF;
-webkit-mask-image: var(--md-admonition-icon--template);
mask-image: var(--md-admonition-icon--template);
- }
\ No newline at end of file
+ }
diff --git a/docs/stylesheets/admonitions.css b/docs/stylesheets/admonitions.css
index ff86542..eb1babd 100644
--- a/docs/stylesheets/admonitions.css
+++ b/docs/stylesheets/admonitions.css
@@ -130,6 +130,3 @@
-webkit-mask-image: var(--md-admonition-icon--acknowledgement);
mask-image: var(--md-admonition-icon--acknowledgement);
}
-
-
-
diff --git a/docs/stylesheets/brand.css b/docs/stylesheets/brand.css
index d2fdeeb..34d6112 100644
--- a/docs/stylesheets/brand.css
+++ b/docs/stylesheets/brand.css
@@ -36,12 +36,12 @@ h1, h2, h3, h4, h5, h6,
/* Apply Raleway to all navigation and sidebar elements */
-.md-nav,
-.md-nav__title,
-.md-nav__link,
-.md-header,
-.md-tabs,
-.md-sidebar,
+.md-nav,
+.md-nav__title,
+.md-nav__link,
+.md-header,
+.md-tabs,
+.md-sidebar,
.md-sidebar__inner,
.md-nav__item,
.md-footer,
@@ -79,7 +79,7 @@ h1, h2, h3, h4, h5, h6,
/* Light mode nav/ToC font color */
-[data-md-color-scheme="default"] .md-nav,
+[data-md-color-scheme="default"] .md-nav,
[data-md-color-scheme="default"] .md-nav__link,
[data-md-color-scheme="default"] .md-header,
[data-md-color-scheme="default"] .md-tabs {
@@ -88,7 +88,7 @@ h1, h2, h3, h4, h5, h6,
}
/* Dark mode nav/ToC font color */
-[data-md-color-scheme="slate"] .md-nav,
+[data-md-color-scheme="slate"] .md-nav,
[data-md-color-scheme="slate"] .md-nav__link,
[data-md-color-scheme="slate"] .md-header,
[data-md-color-scheme="slate"] .md-tabs {
@@ -113,4 +113,4 @@ h1, h2, h3, h4, h5, h6,
.md-header .md-tabs__link:hover {
color: #ffffff !important;
text-decoration: underline;
-}
\ No newline at end of file
+}
diff --git a/docs/tutorial.md b/docs/tutorial.md
index daa4bfa..c382434 100644
--- a/docs/tutorial.md
+++ b/docs/tutorial.md
@@ -1,108 +1,73 @@
-
# Tutorial
-```python
-import pathlib
-
-import numpy as np
-from rich.pretty import pprint
-
-from oqd_dataschema.base import Dataset
-from oqd_dataschema.datastore import Datastore
-from oqd_dataschema.groups import (
- ExpectationValueDataGroup,
- MeasurementOutcomesDataGroup,
- SinaraRawDataGroup,
-)
-```
+## Group Definition
```python
-raw = SinaraRawDataGroup(
- camera_images=Dataset(shape=(3, 2, 2), dtype="float32"),
- attrs={"date": "2025-03-26", "version": 0.1},
-)
-pprint(raw)
-```
-
+from oqd_dataschema import GroupBase, Attrs
-
-```python
-raw.camera_images.data = np.random.uniform(size=(3, 2, 2)).astype("float32")
-pprint(raw)
+class CustomGroup(GroupBase):
+ attrs: Attrs = Field(
+ default_factory=lambda: dict(
+ timestamp=str(datetime.datetime.now(datetime.timezone.utc))
+ )
+ )
+ t: Dataset
+ x: Dataset
```
-
+Defined groups are automatically registered into the [`GroupRegistry`][oqd_dataschema.group.GroupRegistry].
```python
-raw.camera_images.data = np.random.uniform(size=(3, 2, 2)).astype("float32")
-```
-
-
+from oqd_dataschema import GroupRegistry
-```python
-data = Datastore(groups={"raw": raw})
-pprint(data)
+GroupRegistry.groups
```
-
-
+## Initialize Group
```python
-def process_raw(raw: SinaraRawDataGroup) -> MeasurementOutcomesDataGroup:
- processed = MeasurementOutcomesDataGroup(
- outcomes=Dataset(
- data=np.round(raw.camera_images.data.mean(axis=(1, 2))),
- )
- )
- return processed
+t = np.linspace(0, 1, 101).astype(np.float32)
+x = np.sin(t).astype(np.complex64)
+group = CustomGroup(
+ t=Dataset(dtype="float32", shape=(101,)), x=Dataset(dtype="complex64", shape=(101,))
+)
-processed = process_raw(data.groups["raw"])
-pprint(processed)
+group.t.data = t
+group.x.data = x
```
-
-
+## Initialize Datastore
```python
-data.groups.update(processed=processed)
-pprint(data)
-```
-
+from oqd_datastore import Datastore
+datastore = Datastore(groups={"g1": group})
+```
+## Data pipeline
```python
-def process_outcomes(
- measurements: MeasurementOutcomesDataGroup,
-) -> ExpectationValueDataGroup:
- expval = ExpectationValueDataGroup(
- expectation_value=Dataset(
- shape=(),
- dtype="float32",
- data=measurements.outcomes.data.mean(),
- attrs={"date": "20", "input": 10},
- )
- )
- return expval
+def process(datastore) -> Datastore:
+ _g = datastore.get("g1")
+ g2 = CustomGroup(t=Dataset(data=_g.t.data), x=Dataset(data=_g.x.data + 1j))
+ g2.attrs["_gen_by_pipe"] = "process"
-expval = process_outcomes(processed)
-data.groups.update(expval=process_outcomes(data.groups["processed"]))
+ datastore.add(g2=g2)
-pprint(expval)
-```
+datastore.pipe(process)
+```
+## Save Datastore
```python
-filepath = pathlib.Path("test.h5")
-data.model_dump_hdf5(filepath)
+datastore.model_dump_hdf5(pathlib.Path("datastore.h5"), mode="w")
```
-
+## Load Datastore
```python
-data_reload = Datastore.model_validate_hdf5(filepath)
-pprint(data_reload)
-```
\ No newline at end of file
+reloaded_datastore = Datastore.model_validate_hdf5(pathlib.Path("datastore.h5"))
+```
diff --git a/docs/tutorials/advanced.md b/docs/tutorials/advanced.md
new file mode 100644
index 0000000..8d9cf31
--- /dev/null
+++ b/docs/tutorials/advanced.md
@@ -0,0 +1,82 @@
+# Tutorial
+
+## Group Definition
+
+```python
+from oqd_dataschema import GroupBase, Attrs
+
+class CustomGroup(GroupBase):
+ attrs: Attrs = Field(
+ default_factory=lambda: dict(
+ timestamp=str(datetime.datetime.now(datetime.timezone.utc))
+ )
+ )
+ dset: Dataset
+ tbl: Table
+ fld: Folder
+```
+
+Defined groups are automatically registered into the [`GroupRegistry`][oqd_dataschema.group.GroupRegistry].
+
+```python
+from oqd_dataschema import GroupRegistry
+
+GroupRegistry.groups
+```
+
+## Initialize Group
+
+```python
+from oqd_dataschema import Dataset, Table, Folder, unstructured_to_structured
+
+dset = Dataset(data=np.linspace(0, 1, 101).astype(np.float32))
+tbl = Table(
+ columns=[("t", "float32"), ("x", "complex128")],
+ data=unstructured_to_structured(
+ np.stack([np.linspace(0, 1, 101), np.sin(np.linspace(0, 1, 101))], -1),
+ dtype=np.dtype([("t", np.float32), ("x", np.complex128)]),
+ ),
+)
+fld = Folder(
+ document_schema={"t": "float32", "signal": {"x": "complex128", "y": "complex128"}},
+ data=unstructured_to_structured(
+ np.stack(
+ [
+ np.linspace(0, 1, 101),
+ np.sin(np.linspace(0, 1, 101)),
+ np.cos(np.linspace(0, 1, 101)),
+ ],
+ -1,
+ ),
+ dtype=np.dtype(
+ [
+ ("t", np.float32),
+ ("signal", np.dtype([("x", np.complex128), ("y", np.complex128)])),
+ ]
+ ),
+ ),
+)
+
+
+group = CustomGroup(dset=dset, tbl=tbl, fld=fld)
+```
+
+## Initialize Datastore
+
+```python
+from oqd_datastore import Datastore
+
+datastore = Datastore(groups={"g1": group})
+```
+
+## Save Datastore
+
+```python
+datastore.model_dump_hdf5(pathlib.Path("datastore.h5"), mode="w")
+```
+
+## Load Datastore
+
+```python
+reloaded_datastore = Datastore.model_validate_hdf5(pathlib.Path("datastore.h5"))
+```
diff --git a/docs/tutorials/basic.md b/docs/tutorials/basic.md
new file mode 100644
index 0000000..c382434
--- /dev/null
+++ b/docs/tutorials/basic.md
@@ -0,0 +1,73 @@
+# Tutorial
+
+## Group Definition
+
+```python
+from oqd_dataschema import GroupBase, Attrs
+
+class CustomGroup(GroupBase):
+ attrs: Attrs = Field(
+ default_factory=lambda: dict(
+ timestamp=str(datetime.datetime.now(datetime.timezone.utc))
+ )
+ )
+ t: Dataset
+ x: Dataset
+```
+
+Defined groups are automatically registered into the [`GroupRegistry`][oqd_dataschema.group.GroupRegistry].
+
+```python
+from oqd_dataschema import GroupRegistry
+
+GroupRegistry.groups
+```
+
+## Initialize Group
+
+```python
+t = np.linspace(0, 1, 101).astype(np.float32)
+x = np.sin(t).astype(np.complex64)
+
+group = CustomGroup(
+ t=Dataset(dtype="float32", shape=(101,)), x=Dataset(dtype="complex64", shape=(101,))
+)
+
+group.t.data = t
+group.x.data = x
+```
+
+## Initialize Datastore
+
+```python
+from oqd_datastore import Datastore
+
+datastore = Datastore(groups={"g1": group})
+```
+
+## Data pipeline
+
+```python
+def process(datastore) -> Datastore:
+ _g = datastore.get("g1")
+
+ g2 = CustomGroup(t=Dataset(data=_g.t.data), x=Dataset(data=_g.x.data + 1j))
+ g2.attrs["_gen_by_pipe"] = "process"
+
+ datastore.add(g2=g2)
+
+
+datastore.pipe(process)
+```
+
+## Save Datastore
+
+```python
+datastore.model_dump_hdf5(pathlib.Path("datastore.h5"), mode="w")
+```
+
+## Load Datastore
+
+```python
+reloaded_datastore = Datastore.model_validate_hdf5(pathlib.Path("datastore.h5"))
+```
diff --git a/examples/custom_group.ipynb b/examples/custom_group.ipynb
index 7109567..c632665 100644
--- a/examples/custom_group.ipynb
+++ b/examples/custom_group.ipynb
@@ -11,7 +11,7 @@
"import numpy as np\n",
"from rich.pretty import pprint\n",
"\n",
- "from oqd_dataschema.base import Dataset, GroupBase, GroupRegistry\n",
+ "from oqd_dataschema.base import Dataset, GroupBase, GroupRegistry, condataset\n",
"from oqd_dataschema.datastore import Datastore\n",
"from oqd_dataschema.groups import (\n",
" SinaraRawDataGroup,\n",
@@ -29,7 +29,7 @@
" Here we define a custom Group, which is automatically added at runtime to the GroupRegistry.\n",
" \"\"\"\n",
"\n",
- " array: Dataset"
+ " array: condataset(shape_constraint=(None, 10)) # type: ignore"
]
},
{
@@ -119,7 +119,8 @@
"│ │ │ ),\n",
"│ │ │ class_='YourCustomGroup'\n",
"│ │ )\n",
- "│ }\n",
+ "│ },\n",
+ "│ attrs={}\n",
")\n",
"\n"
],
@@ -164,7 +165,8 @@
"\u001b[2;32m│ │ │ \u001b[0m\u001b[1m)\u001b[0m,\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33mclass_\u001b[0m=\u001b[32m'YourCustomGroup'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n",
- "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
"\u001b[1m)\u001b[0m\n"
]
},
@@ -230,7 +232,8 @@
"│ │ │ ),\n",
"│ │ │ class_='YourCustomGroup'\n",
"│ │ )\n",
- "│ }\n",
+ "│ },\n",
+ "│ attrs={}\n",
")\n",
"\n"
],
@@ -275,7 +278,146 @@
"\u001b[2;32m│ │ │ \u001b[0m\u001b[1m)\u001b[0m,\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33mclass_\u001b[0m=\u001b[32m'YourCustomGroup'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n",
- "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ "\u001b[1m)\u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "parse = Datastore.model_validate_hdf5(filepath)\n",
+ "pprint(parse)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from typing import Dict\n",
+ "\n",
+ "from oqd_dataschema.base import CastDataset\n",
+ "\n",
+ "\n",
+ "class A(GroupBase):\n",
+ " data: Dict[str, CastDataset]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
Datastore(\n",
+ "│ groups={\n",
+ "│ │ 'A': A(\n",
+ "│ │ │ attrs={},\n",
+ "│ │ │ data={\n",
+ "│ │ │ │ 'x': Dataset(\n",
+ "│ │ │ │ │ dtype='float64',\n",
+ "│ │ │ │ │ shape=(10,),\n",
+ "│ │ │ │ │ data=array([0.90326782, 0.17363226, 0.13827196, 0.8917397 , 0.68175954,\n",
+ "│ 0.47647195, 0.88443397, 0.75703312, 0.74991232, 0.68161151]),\n",
+ "│ │ │ │ │ attrs={'type': 'mytype'}\n",
+ "│ │ │ │ )\n",
+ "│ │ │ },\n",
+ "│ │ │ class_='A'\n",
+ "│ │ )\n",
+ "│ },\n",
+ "│ attrs={}\n",
+ ")\n",
+ "\n"
+ ],
+ "text/plain": [
+ "\u001b[1;35mDatastore\u001b[0m\u001b[1m(\u001b[0m\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[33mgroups\u001b[0m=\u001b[1m{\u001b[0m\n",
+ "\u001b[2;32m│ │ \u001b[0m\u001b[32m'A'\u001b[0m: \u001b[1;35mA\u001b[0m\u001b[1m(\u001b[0m\n",
+ "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mdata\u001b[0m=\u001b[1m{\u001b[0m\n",
+ "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'x'\u001b[0m: \u001b[1;35mDataset\u001b[0m\u001b[1m(\u001b[0m\n",
+ "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mdtype\u001b[0m=\u001b[32m'float64'\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mshape\u001b[0m=\u001b[1m(\u001b[0m\u001b[1;36m10\u001b[0m,\u001b[1m)\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mdata\u001b[0m=\u001b[1;35marray\u001b[0m\u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1;36m0.90326782\u001b[0m, \u001b[1;36m0.17363226\u001b[0m, \u001b[1;36m0.13827196\u001b[0m, \u001b[1;36m0.8917397\u001b[0m , \u001b[1;36m0.68175954\u001b[0m,\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[1;36m0.47647195\u001b[0m, \u001b[1;36m0.88443397\u001b[0m, \u001b[1;36m0.75703312\u001b[0m, \u001b[1;36m0.74991232\u001b[0m, \u001b[1;36m0.68161151\u001b[0m\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'mytype'\u001b[0m\u001b[1m}\u001b[0m\n",
+ "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m)\u001b[0m\n",
+ "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mclass_\u001b[0m=\u001b[32m'A'\u001b[0m\n",
+ "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ "\u001b[1m)\u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "filepath = pathlib.Path(\"test.h5\")\n",
+ "\n",
+ "datastore = Datastore(\n",
+ " groups={\n",
+ " \"A\": A(data={\"x\": Dataset(data=np.random.rand(10), attrs={\"type\": \"mytype\"})})\n",
+ " }\n",
+ ")\n",
+ "pprint(datastore)\n",
+ "datastore.model_dump_hdf5(filepath)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Datastore(\n",
+ "│ groups={\n",
+ "│ │ 'A': A(\n",
+ "│ │ │ attrs={},\n",
+ "│ │ │ data={\n",
+ "│ │ │ │ 'x': Dataset(\n",
+ "│ │ │ │ │ dtype='float64',\n",
+ "│ │ │ │ │ shape=(10,),\n",
+ "│ │ │ │ │ data=array([0.90326782, 0.17363226, 0.13827196, 0.8917397 , 0.68175954,\n",
+ "│ 0.47647195, 0.88443397, 0.75703312, 0.74991232, 0.68161151]),\n",
+ "│ │ │ │ │ attrs={'type': 'mytype'}\n",
+ "│ │ │ │ )\n",
+ "│ │ │ },\n",
+ "│ │ │ class_='A'\n",
+ "│ │ )\n",
+ "│ },\n",
+ "│ attrs={}\n",
+ ")\n",
+ "\n"
+ ],
+ "text/plain": [
+ "\u001b[1;35mDatastore\u001b[0m\u001b[1m(\u001b[0m\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[33mgroups\u001b[0m=\u001b[1m{\u001b[0m\n",
+ "\u001b[2;32m│ │ \u001b[0m\u001b[32m'A'\u001b[0m: \u001b[1;35mA\u001b[0m\u001b[1m(\u001b[0m\n",
+ "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mdata\u001b[0m=\u001b[1m{\u001b[0m\n",
+ "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'x'\u001b[0m: \u001b[1;35mDataset\u001b[0m\u001b[1m(\u001b[0m\n",
+ "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mdtype\u001b[0m=\u001b[32m'float64'\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mshape\u001b[0m=\u001b[1m(\u001b[0m\u001b[1;36m10\u001b[0m,\u001b[1m)\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mdata\u001b[0m=\u001b[1;35marray\u001b[0m\u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1;36m0.90326782\u001b[0m, \u001b[1;36m0.17363226\u001b[0m, \u001b[1;36m0.13827196\u001b[0m, \u001b[1;36m0.8917397\u001b[0m , \u001b[1;36m0.68175954\u001b[0m,\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[1;36m0.47647195\u001b[0m, \u001b[1;36m0.88443397\u001b[0m, \u001b[1;36m0.75703312\u001b[0m, \u001b[1;36m0.74991232\u001b[0m, \u001b[1;36m0.68161151\u001b[0m\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'mytype'\u001b[0m\u001b[1m}\u001b[0m\n",
+ "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m)\u001b[0m\n",
+ "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
+ "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mclass_\u001b[0m=\u001b[32m'A'\u001b[0m\n",
+ "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n",
+ "\u001b[2;32m│ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
"\u001b[1m)\u001b[0m\n"
]
},
diff --git a/mkdocs.yaml b/mkdocs.yaml
index fca9c52..732c693 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -18,9 +18,16 @@ extra:
nav:
- Get Started: index.md
- - Tutorial: tutorial.md
- # - Explanation: explanation.md
- - API Reference: api.md
+ - Tutorials:
+ - Basics: tutorials/basic.md
+ - Datasets/Tables/Folders: tutorials/advanced.md
+ - Explanation: explanation.md
+ - API Reference:
+ - Base: api/base.md
+ - Group Field: api/groupfield.md
+ - Group: api/group.md
+ - Datastore: api/datastore.md
+ - Utilities: api/utils.md
theme:
name: material
@@ -59,6 +66,7 @@ theme:
- toc.follow
plugins:
+ - search
- mkdocstrings:
handlers:
python:
@@ -79,7 +87,7 @@ plugins:
separate_signature: false
group_by_category: true
members_order: "source"
- import:
+ inventories:
- https://docs.python.org/3/objects.inv
- https://docs.pydantic.dev/latest/objects.inv
- https://pandas.pydata.org/docs/objects.inv
diff --git a/pyproject.toml b/pyproject.toml
index 47a7ccc..6e17436 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,9 +27,9 @@ classifiers = [
]
dependencies = [
- "bidict>=0.23.1",
- "h5py>=3.13.0",
- "pydantic>=2.10.6",
+ "h5py>=3.14.0",
+ "pandas>=2.3.3",
+ "pydantic>=2.10.6",
]
[project.optional-dependencies]
@@ -52,12 +52,7 @@ select = ["E4", "E7", "E9", "F", "I"]
fixable = ["ALL"]
[dependency-groups]
-dev = [
- "jupyter>=1.1.1",
- "pre-commit>=4.1.0",
- "rich>=14.1.0",
- "ruff>=0.13.1",
-]
+dev = ["jupyter>=1.1.1", "pre-commit>=4.1.0", "rich>=14.1.0", "ruff>=0.13.1"]
[project.urls]
diff --git a/src/oqd_dataschema/__init__.py b/src/oqd_dataschema/__init__.py
index 38c732a..ef09b9f 100644
--- a/src/oqd_dataschema/__init__.py
+++ b/src/oqd_dataschema/__init__.py
@@ -12,24 +12,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from .base import Dataset, GroupBase, GroupRegistry
+from .base import Attrs, DTypes
+from .constrained import condataset, confolder, contable
+from .dataset import CastDataset, Dataset
from .datastore import Datastore
-from .groups import (
- ExpectationValueDataGroup,
- MeasurementOutcomesDataGroup,
- OQDTestbenchDataGroup,
- SinaraRawDataGroup,
-)
+from .folder import CastFolder, Folder
+from .group import GroupBase, GroupRegistry
+from .table import CastTable, Table
+from .utils import dict_to_structured, unstructured_to_structured
########################################################################################
__all__ = [
- "Dataset",
+ "Attrs",
+ "DTypes",
"Datastore",
"GroupBase",
"GroupRegistry",
- "ExpectationValueDataGroup",
- "MeasurementOutcomesDataGroup",
- "OQDTestbenchDataGroup",
- "SinaraRawDataGroup",
+ "Dataset",
+ "CastDataset",
+ "condataset",
+ "Table",
+ "CastTable",
+ "contable",
+ "Folder",
+ "CastFolder",
+ "confolder",
+ "dict_to_structured",
+ "unstructured_to_structured",
]
diff --git a/src/oqd_dataschema/base.py b/src/oqd_dataschema/base.py
index 1139849..a25d639 100644
--- a/src/oqd_dataschema/base.py
+++ b/src/oqd_dataschema/base.py
@@ -13,171 +13,131 @@
# limitations under the License.
# %%
-import warnings
-from typing import Annotated, Any, Literal, Optional, Union
+from __future__ import annotations
+
+import typing
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Annotated, Literal, Union
import numpy as np
-from bidict import bidict
from pydantic import (
BaseModel,
- ConfigDict,
- Discriminator,
+ BeforeValidator,
Field,
- TypeAdapter,
- model_validator,
)
########################################################################################
-__all__ = ["GroupBase", "Dataset", "GroupRegistry"]
+__all__ = ["Attrs", "DTypes", "DTypeNames", "GroupField"]
########################################################################################
-# %%
-mapping = bidict(
- {
- "int32": np.dtype("int32"),
- "int64": np.dtype("int64"),
- "float32": np.dtype("float32"),
- "float64": np.dtype("float64"),
- "complex64": np.dtype("complex64"),
- "complex128": np.dtype("complex128"),
- # 'string': np.type
- }
-)
+class DTypes(Enum):
+ """
+ Enum for data types supported by oqd-dataschema.
+
+ |Type |Variant|
+ |-------|-------|
+ |Boolean|`BOOL` |
+ |Integer|`INT16`, `INT32`, `INT64` (signed)
`UINT16`, `UINT32`, `UINT64` (unsigned)|
+ |Float |`FLOAT32`, `FLOAT64`|
+ |Complex|`COMPLEX64`, `COMPLEX128`|
+ |Bytes |`BYTES`|
+ |String |`STR`, `STRING`|
+ """
+ BOOL = np.dtypes.BoolDType
+ INT16 = np.dtypes.Int16DType
+ INT32 = np.dtypes.Int32DType
+ INT64 = np.dtypes.Int64DType
+ UINT16 = np.dtypes.UInt16DType
+ UINT32 = np.dtypes.UInt32DType
+ UINT64 = np.dtypes.UInt64DType
+ FLOAT16 = np.dtypes.Float16DType
+ FLOAT32 = np.dtypes.Float32DType
+ FLOAT64 = np.dtypes.Float64DType
+ COMPLEX64 = np.dtypes.Complex64DType
+ COMPLEX128 = np.dtypes.Complex128DType
+ STR = np.dtypes.StrDType
+ BYTES = np.dtypes.BytesDType
+ STRING = np.dtypes.StringDType
-class GroupBase(BaseModel, extra="forbid"):
- """
- Schema representation for a group object within an HDF5 file.
+ @classmethod
+ def get(cls, name: str) -> DTypes:
+ """
+ Get the [`DTypes`][oqd_dataschema.base.DTypes] enum variant by lowercase name.
+ """
+ return cls[name.upper()]
- Each grouping of data should be defined as a subclass of `Group`, and specify the datasets that it will contain.
- This base object only has attributes, `attrs`, which are associated to the HDF5 group.
+ @classmethod
+ def names(cls):
+ """
+ Get the lowercase names of all variants of [`DTypes`][oqd_dataschema.base.DTypes] enum.
+ """
+ return tuple((dtype.name.lower() for dtype in cls))
- Attributes:
- attrs: A dictionary of attributes to append to the dataset.
- Example:
- ```
- group = Group(attrs={'version': 2, 'date': '2025-01-01'})
- ```
- """
+DTypeNames = Literal[DTypes.names()]
+"""
+Literal list of lowercase names for [`DTypes`][oqd_dataschema.base.DTypes] variants.
+"""
- attrs: Optional[dict[str, Union[int, float, str, complex]]] = {}
- def __init_subclass__(cls, **kwargs):
- super().__init_subclass__(**kwargs)
- cls.__annotations__["class_"] = Literal[cls.__name__]
- setattr(cls, "class_", cls.__name__)
+########################################################################################
- # Auto-register new group types
- GroupRegistry.register(cls)
+invalid_attrs = ["_datastore_signature", "_group_schema"]
-class Dataset(BaseModel, extra="forbid"):
+def _valid_attr_key(value: str) -> str:
+ """
+ Validates attribute keys (prevents overwriting of protected attrs).
"""
- Schema representation for a dataset object to be saved within an HDF5 file.
+ if value in invalid_attrs:
+ raise KeyError
- Attributes:
- dtype: The datatype of the dataset, such as `int32`, `float32`, `int64`, `float64`, etc.
- Types are inferred from the `data` attribute if provided.
- shape: The shape of the dataset.
- data: The numpy ndarray of the data, from which `dtype` and `shape` are inferred.
+ return value
- attrs: A dictionary of attributes to append to the dataset.
- Example:
- ```
- dataset = Dataset(data=np.array([1, 2, 3, 4]))
+AttrKey = Annotated[str, BeforeValidator(_valid_attr_key)]
+"""
+Annotated type that represents a valid key for attributes (prevents overwriting of protected attrs).
+"""
- dataset = Dataset(dtype='int64', shape=[4,])
- dataset.data = np.array([1, 2, 3, 4])
- ```
- """
+Attrs = dict[AttrKey, Union[int, float, str, complex]]
+"""
+Type that represents attributes of an object.
+"""
+
+########################################################################################
- dtype: Optional[Literal[tuple(mapping.keys())]] = None
- shape: Optional[tuple[int, ...]] = None
- data: Optional[Any] = Field(default=None, exclude=True)
- attrs: Optional[dict[str, Union[int, float, str, complex]]] = {}
+class GroupField(BaseModel, ABC):
+ """
+ Abstract class for a valid data field of Group.
+
+ Attributes:
+ attrs: A dictionary of attributes to append to the object.
+ """
- model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True)
+ attrs: Attrs = Field(default_factory=lambda: {})
- @model_validator(mode="before")
@classmethod
- def validate_and_update(cls, values: dict):
- data = values.get("data")
- dtype = values.get("dtype")
- shape = values.get("shape")
-
- if data is None and (dtype is not None and shape is not None):
- return values
-
- elif data is not None and (dtype is None and shape is None):
- if not isinstance(data, np.ndarray):
- raise TypeError("`data` must be a numpy.ndarray.")
-
- if data.dtype not in mapping.values():
- raise TypeError(
- f"`data` must be a numpy array of dtype in {tuple(mapping.keys())}."
- )
-
- values["dtype"] = mapping.inverse[data.dtype]
- values["shape"] = data.shape
-
- return values
-
- @model_validator(mode="after")
- def validate_data_matches_shape_dtype(self):
- """Ensure that `data` matches `dtype` and `shape`."""
- if self.data is not None:
- expected_dtype = mapping[self.dtype]
- if self.data.dtype != expected_dtype:
- raise ValueError(
- f"Expected data dtype `{self.dtype}`, but got `{self.data.dtype.name}`."
- )
- if self.data.shape != self.shape:
- raise ValueError(
- f"Expected shape {self.shape}, but got {self.data.shape}."
- )
- return self
-
-
-class MetaGroupRegistry(type):
- def __new__(cls, clsname, superclasses, attributedict):
- attributedict["groups"] = dict()
- return super().__new__(cls, clsname, superclasses, attributedict)
-
- def register(cls, group):
- if not issubclass(group, GroupBase):
- raise TypeError("You may only register subclasses of GroupBase.")
-
- if group.__name__ in cls.groups.keys():
- warnings.warn(
- f"Overwriting previously registered `{group.__name__}` group of the same name.",
- UserWarning,
- stacklevel=2,
- )
-
- cls.groups[group.__name__] = group
-
- def clear(cls):
- """Clear all registered types (useful for testing)"""
- cls.groups.clear()
-
- @property
- def union(cls):
- """Get the current Union of all registered types"""
- return Annotated[
- Union[tuple(cls.groups.values())], Discriminator(discriminator="class_")
- ]
-
- @property
- def adapter(cls):
- """Get TypeAdapter for current registered types"""
- return TypeAdapter(cls.union)
-
-
-class GroupRegistry(metaclass=MetaGroupRegistry):
- pass
+ def _is_supported_type(cls, type_):
+ return type_ == cls or (
+ typing.get_origin(type_) is Annotated and type_.__origin__ is cls
+ )
+
+ @abstractmethod
+ def _handle_data_dump(self, data: np.ndarray) -> np.ndarray:
+ """Hook into [Datastore.model_dump_hdf5][oqd_dataschema.datastore.Datastore.model_dump_hdf5] for compatibility mapping to HDF5."""
+ pass
+
+ @abstractmethod
+ def _handle_data_load(self, data: np.ndarray) -> np.ndarray:
+ """Hook into [Datastore.model_validate_hdf5][oqd_dataschema.datastore.Datastore.model_validate_hdf5] for reversing compatibility mapping, i.e. mapping data back to original type."""
+ pass
+
+
+# %%
diff --git a/src/oqd_dataschema/constrained.py b/src/oqd_dataschema/constrained.py
new file mode 100644
index 0000000..671060c
--- /dev/null
+++ b/src/oqd_dataschema/constrained.py
@@ -0,0 +1,244 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Annotated, Sequence, TypeAlias
+
+from pydantic import AfterValidator
+
+from oqd_dataschema.dataset import CastDataset
+from oqd_dataschema.folder import Folder
+from oqd_dataschema.table import CastTable
+from oqd_dataschema.utils import _flex_shape_equal, _validator_from_condition
+
+########################################################################################
+
+__all__ = ["contable", "condataset", "confolder"]
+
+########################################################################################
+
+
+@_validator_from_condition
+def _constrain_dim(model, *, min_dim=None, max_dim=None):
+ """Constrains the dimension of a Dataset or Table."""
+
+ if min_dim is not None and max_dim is not None and min_dim > max_dim:
+ raise ValueError("Impossible to satisfy dimension constraints on dataset.")
+
+ min_dim = 0 if min_dim is None else min_dim
+
+ # fast escape
+ if min_dim == 0 and max_dim is None:
+ return
+
+ dims = len(model.shape)
+ if dims < min_dim or (max_dim is not None and dims > max_dim):
+ raise ValueError(
+ f"Expected {min_dim} <= dimension of shape{f' <= {max_dim}'}, but got shape = {model.shape}."
+ )
+
+
+@_validator_from_condition
+def _constrain_shape(model, *, shape_constraint=None):
+ """Constrains the shape of a Dataset or Table."""
+
+ # fast escape
+ if shape_constraint is None:
+ return
+
+ if not _flex_shape_equal(shape_constraint, model.shape):
+ raise ValueError(
+ f"Expected shape to be {shape_constraint}, but got {model.shape}."
+ )
+
+
+########################################################################################
+
+
+@_validator_from_condition
+def _constrain_dtype_dataset(dataset, *, dtype_constraint=None):
+ """Constrains the dtype of a Dataset."""
+
+ # fast escape
+ if dtype_constraint is None:
+ return
+
+ # convert dtype constraint to set
+ if (not isinstance(dtype_constraint, str)) and isinstance(
+ dtype_constraint, Sequence
+ ):
+ dtype_constraint = set(dtype_constraint)
+ elif isinstance(dtype_constraint, str):
+ dtype_constraint = {dtype_constraint}
+
+ # apply dtype constraint
+ if dataset.dtype not in dtype_constraint:
+ raise ValueError(
+ f"Expected dtype to be of type one of {dtype_constraint}, but got {dataset.dtype}."
+ )
+
+
+def condataset(
+ *,
+ shape_constraint=None,
+ dtype_constraint=None,
+ min_dim=None,
+ max_dim=None,
+) -> TypeAlias:
+ """Implements dtype, dimension and shape constrains on the Dataset.
+
+ Arguments:
+ shape_constraint (Tuple[Union[None, int],...]):
+ dtype_constraint (Tuple[DTypeNames,...]):
+ min_dim (int):
+ max_dim (int):
+
+ Example:
+ ```
+ class CustomGroup:
+ x: condataset(dtype_contraint=("int16","int32","int64))
+ y: condataset(shape_constraint=(100,))
+ z: condataset(min_dim=1, max_dim=1)
+
+ group = CustomGroup(x=,y=,z=) # succeeds as it obeys the constraints
+
+ group = CustomGroup(x=,y=,z=) # fails as it violates the constraints
+ ```
+
+ """
+ return Annotated[
+ CastDataset,
+ AfterValidator(_constrain_dtype_dataset(dtype_constraint=dtype_constraint)),
+ AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)),
+ AfterValidator(_constrain_shape(shape_constraint=shape_constraint)),
+ ]
+
+
+########################################################################################
+
+
+@_validator_from_condition
+def _constrain_dtype_table(table, *, dtype_constraint={}):
+ """Constrains the dtype of a Table."""
+
+ for k, v in dtype_constraint.items():
+ if (not isinstance(v, str)) and isinstance(v, Sequence):
+ _v = set(dtype_constraint[k])
+ elif isinstance(v, str):
+ _v = {dtype_constraint[k]}
+
+ if _v and dict(table.columns)[k] not in _v:
+ raise ValueError(
+ f"Expected dtype to be of type one of {_v}, but got {dict(table.columns)[k]}."
+ )
+
+
+@_validator_from_condition
+def _constrain_required_field(table, *, required_fields=None, strict_fields=False):
+ """Constrains the fields of a Table."""
+
+ if strict_fields and required_fields is None:
+ raise ValueError("Constraints force an empty Table.")
+
+ # fast escape
+ if required_fields is None:
+ return
+
+ # convert required fields to set
+ if (not isinstance(required_fields, str)) and isinstance(required_fields, Sequence):
+ required_fields = set(required_fields)
+ elif isinstance(required_fields, str):
+ required_fields = {required_fields}
+
+ diff = required_fields.difference(set([c[0] for c in table.columns]))
+ reverse_diff = set([c[0] for c in table.columns]).difference(required_fields)
+
+ if len(diff) > 0:
+ raise ValueError(f"Missing required fields {diff}.")
+
+ if strict_fields and len(reverse_diff):
+ raise ValueError(
+ f"Extra fields in the table are forbidden by constrains {reverse_diff}."
+ )
+
+
+def contable(
+ *,
+ required_fields=None,
+ strict_fields=False,
+ dtype_constraint={},
+ shape_constraint=None,
+ min_dim=None,
+ max_dim=None,
+) -> TypeAlias:
+ """Implements field, dtype, dimension and shape constrains on the Table.
+
+ Example:
+ ```
+ class CustomGroup:
+ x: contable(dtype_contraint=("int16","int32","int64))
+ y: contable(shape_constraint=(100,))
+ z: contable(min_dim=1, max_dim=1)
+ u: contable(required_field=("c1","c2"))
+ v: contable(required_field=("c1", "c2"), strict_fields=True)
+
+
+ group = CustomGroup(x=,y=,z=,u=,v=) # succeeds as it obeys the constraints
+
+ group = CustomGroup(x=,y=,z=,u=,v=) # fails as it violates the constraints
+ ```
+
+ """
+ return Annotated[
+ CastTable,
+ AfterValidator(
+ _constrain_required_field(
+ required_fields=required_fields, strict_fields=strict_fields
+ )
+ ),
+ AfterValidator(_constrain_dtype_table(dtype_constraint=dtype_constraint)),
+ AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)),
+ AfterValidator(_constrain_shape(shape_constraint=shape_constraint)),
+ ]
+
+
+########################################################################################
+
+
+def confolder(
+ *,
+ shape_constraint=None,
+ min_dim=None,
+ max_dim=None,
+) -> TypeAlias:
+ """Implements dimension and shape constrains on the Folder.
+
+ Example:
+ ```
+ class CustomGroup:
+ x: confolder(shape_constraint=(100,))
+ y: confolder(min_dim=1, max_dim=1)
+
+
+ group = CustomGroup(x=,y=) # succeeds as it obeys the constraints
+
+ group = CustomGroup(x=,y=) # fails as it violates the constraints
+ ```
+
+ """
+ return Annotated[
+ Folder,
+ AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)),
+ AfterValidator(_constrain_shape(shape_constraint=shape_constraint)),
+ ]
diff --git a/src/oqd_dataschema/dataset.py b/src/oqd_dataschema/dataset.py
new file mode 100644
index 0000000..ee9d7f9
--- /dev/null
+++ b/src/oqd_dataschema/dataset.py
@@ -0,0 +1,147 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# %%
+
+from __future__ import annotations
+
+from typing import Annotated, Any, Optional, Tuple, Union
+
+import numpy as np
+from pydantic import (
+ BeforeValidator,
+ ConfigDict,
+ Field,
+ field_validator,
+ model_validator,
+)
+
+from oqd_dataschema.base import Attrs, DTypeNames, DTypes, GroupField
+
+from .utils import _flex_shape_equal
+
+########################################################################################
+
+__all__ = [
+ "Dataset",
+ "CastDataset",
+]
+
+########################################################################################
+
+
+class Dataset(GroupField, extra="forbid"):
+ """
+ Schema representation for a dataset object to be saved within an HDF5 file.
+
+ Attributes:
+ dtype: The datatype of the dataset, such as `int32`, `float32`, `int64`, `float64`, etc.
+ Types are inferred from the `data` attribute if provided.
+ shape: The shape of the dataset.
+ data: The numpy ndarray of the data, from which `dtype` and `shape` are inferred.
+
+ attrs: A dictionary of attributes to append to the dataset.
+
+ Example:
+ ```
+ dataset = Dataset(data=np.array([1, 2, 3, 4]))
+
+ dataset = Dataset(dtype='int64', shape=[4,])
+ dataset.data = np.array([1, 2, 3, 4])
+ ```
+ """
+
+ dtype: Optional[DTypeNames] = None # type: ignore
+ shape: Optional[Tuple[Union[int, None], ...]] = None
+ data: Optional[Any] = Field(default=None, exclude=True)
+
+ attrs: Attrs = Field(default_factory=lambda: {})
+
+ model_config = ConfigDict(
+ use_enum_values=False, arbitrary_types_allowed=True, validate_assignment=True
+ )
+
+ @field_validator("data", mode="before")
+ @classmethod
+ def _validate_and_update(cls, value):
+ # check if data exist
+ if value is None:
+ return value
+
+ # check if data is a numpy array
+ if not isinstance(value, np.ndarray):
+ raise TypeError("`data` must be a numpy.ndarray.")
+
+ return value
+
+ @model_validator(mode="after")
+ def _validate_data_matches_shape_dtype(self):
+ """Ensure that `data` matches `dtype` and `shape`."""
+
+ # check if data exist
+ if self.data is None:
+ return self
+
+ # check if dtype matches data
+ if (
+ self.dtype is not None
+ and type(self.data.dtype) is not DTypes.get(self.dtype).value
+ ):
+ raise ValueError(
+ f"Expected data dtype `{self.dtype}`, but got `{self.data.dtype.name}`."
+ )
+
+ # check if shape mataches data
+ if self.shape is not None and not _flex_shape_equal(
+ self.data.shape, self.shape
+ ):
+ raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.")
+
+ # reassign dtype if it is None
+ if self.dtype != DTypes(type(self.data.dtype)).name.lower():
+ self.dtype = DTypes(type(self.data.dtype)).name.lower()
+
+ # resassign shape to concrete value if it is None or a flexible shape
+ if self.shape != self.data.shape:
+ self.shape = self.data.shape
+
+ return self
+
+ @classmethod
+ def cast(cls, data: np.ndarray) -> Dataset:
+ """Casts data from numpy array to Dataset."""
+ if isinstance(data, np.ndarray):
+ return cls(data=data)
+ return data
+
+ def __getitem__(self, idx):
+ return self.data[idx]
+
+ def _handle_data_dump(self, data):
+ np_dtype = (
+ np.dtypes.BytesDType if type(data.dtype) is np.dtypes.StrDType else None
+ )
+
+ if np_dtype is None:
+ return data
+
+ return data.astype(np_dtype)
+
+ def _handle_data_load(self, data):
+ np_dtype = DTypes.get(self.dtype).value
+ return data.astype(np_dtype)
+
+
+CastDataset = Annotated[Dataset, BeforeValidator(Dataset.cast)]
+"""Annotated type that automatically executes Dataset.cast"""
diff --git a/src/oqd_dataschema/datastore.py b/src/oqd_dataschema/datastore.py
index cf52c0c..8fb5066 100644
--- a/src/oqd_dataschema/datastore.py
+++ b/src/oqd_dataschema/datastore.py
@@ -14,15 +14,21 @@
# %%
+from __future__ import annotations
+
+import json
import pathlib
-from typing import Any, Dict, Literal, Optional
+from typing import Any, Callable, Dict, Literal
import h5py
-import numpy as np
-from pydantic import BaseModel, model_validator
-from pydantic.types import TypeVar
+from pydantic import (
+ BaseModel,
+ Field,
+ field_validator,
+)
-from oqd_dataschema.base import Dataset, GroupBase, GroupRegistry
+from oqd_dataschema.base import Attrs, GroupField
+from oqd_dataschema.group import GroupBase, GroupRegistry
########################################################################################
@@ -34,50 +40,93 @@
# %%
class Datastore(BaseModel, extra="forbid"):
"""
- Saves the model and its associated data to an HDF5 file.
- This method serializes the model's data and attributes into an HDF5 file
- at the specified filepath.
+ Class representing a datastore with restricted HDF5 format.
Attributes:
- filepath (pathlib.Path): The path to the HDF5 file where the model data will be saved.
+ groups (Dict[str,Group]): groups of data.
+ attrs (Attrs): attributes of the datastore.
"""
- groups: Dict[str, Any]
+ groups: Dict[str, Any] = Field(default_factory=lambda: {})
+
+ attrs: Attrs = Field(default_factory=lambda: {})
+
+ @classmethod
+ def _validate_group(cls, key, group):
+ """Helper function for validating group to be of type Group registered in the GroupRegistry."""
+ if isinstance(group, GroupBase):
+ return group
+
+ if isinstance(group, dict):
+ return GroupRegistry.adapter.validate_python(group)
+
+ raise ValueError(f"Key `{key}` contains invalid group data.")
- @model_validator(mode="before")
+ @field_validator("groups", mode="before")
@classmethod
def validate_groups(cls, data):
- if isinstance(data, dict) and "groups" in data:
- # Get the current adapter from registry
- try:
- validated_groups = {}
-
- for key, group_data in data["groups"].items():
- if isinstance(group_data, GroupBase):
- # Already a Group instance
- validated_groups[key] = group_data
- elif isinstance(group_data, dict):
- # Parse dict using discriminated union
- validated_groups[key] = GroupRegistry.adapter.validate_python(
- group_data
- )
- else:
- raise ValueError(
- f"Invalid group data for key '{key}': {type(group_data)}"
- )
-
- data["groups"] = validated_groups
-
- except ValueError as e:
- if "No group types registered" in str(e):
- raise ValueError(
- "No group types available. Register group types before creating Datastore."
- )
- raise
-
- return data
-
- def model_dump_hdf5(self, filepath: pathlib.Path, mode: Literal["w", "a"] = "a"):
+ """Validates groups to be of type Group registered in the GroupRegistry."""
+ if GroupRegistry.groups == {}:
+ raise ValueError(
+ "No group types available. Register group types before creating Datastore."
+ )
+
+ validated_groups = {k: cls._validate_group(k, v) for k, v in data.items()}
+ return validated_groups
+
+ def _dump_group(self, h5datastore, gkey, group):
+ """Helper function for dumping Group."""
+ # remove existing group
+ if gkey in h5datastore.keys():
+ del h5datastore[gkey]
+
+ # create group
+ h5_group = h5datastore.create_group(gkey)
+
+ # dump group schema
+ h5_group.attrs["_group_schema"] = json.dumps(
+ group.model_json_schema(), indent=2
+ )
+
+ # dump group attributes
+ for akey, attr in group.attrs.items():
+ h5_group.attrs[akey] = attr
+
+ # dump group data
+ for dkey, dataset in group.__dict__.items():
+ if dkey in ["attrs", "class_"]:
+ continue
+
+ # if group field contain dictionary of Dataset
+ if isinstance(dataset, dict):
+ h5_subgroup = h5_group.create_group(dkey)
+ for ddkey, ddataset in dataset.items():
+ self._dump_dataset(h5_subgroup, ddkey, ddataset)
+ continue
+
+ self._dump_dataset(h5_group, dkey, dataset)
+
+ def _dump_dataset(self, h5group, dkey, dataset):
+ """Helper function for dumping Dataset."""
+
+ if dataset is not None and not isinstance(dataset, GroupField):
+ raise ValueError("Group data field is not a Dataset or a Table.")
+
+ # handle optional dataset
+ if dataset is None:
+ h5_dataset = h5group.create_dataset(dkey, data=h5py.Empty("f"))
+ return
+
+ # dtype str converted to bytes when dumped (h5 compatibility)
+ h5_dataset = h5group.create_dataset(
+ dkey, data=dataset._handle_data_dump(dataset.data)
+ )
+
+ # dump dataset attributes
+ for akey, attr in dataset.attrs.items():
+ h5_dataset.attrs[akey] = attr
+
+ def model_dump_hdf5(self, filepath: pathlib.Path, mode: Literal["w", "a"] = "w"):
"""
Saves the model and its associated data to an HDF5 file.
This method serializes the model's data and attributes into an HDF5 file
@@ -89,28 +138,33 @@ def model_dump_hdf5(self, filepath: pathlib.Path, mode: Literal["w", "a"] = "a")
filepath.parent.mkdir(exist_ok=True, parents=True)
with h5py.File(filepath, mode) as f:
- # store the model JSON schema
- f.attrs["model"] = self.model_dump_json()
+ # dump the datastore signature
+ f.attrs["_datastore_signature"] = self.model_dump_json(indent=2)
+ for akey, attr in self.attrs.items():
+ f.attrs[akey] = attr
- # store each group
+ # dump each group
for gkey, group in self.groups.items():
- if gkey in f.keys():
- del f[gkey]
- h5_group = f.create_group(gkey)
- for akey, attr in group.attrs.items():
- h5_group.attrs[akey] = attr
-
- for dkey, dataset in group.__dict__.items():
- if not isinstance(dataset, Dataset):
- continue
- h5_dataset = h5_group.create_dataset(dkey, data=dataset.data)
- for akey, attr in dataset.attrs.items():
- h5_dataset.attrs[akey] = attr
+ if gkey in ["attrs", "class_"]:
+ continue
+
+ self._dump_group(f, gkey, group)
@classmethod
- def model_validate_hdf5(
- cls, filepath: pathlib.Path, types: Optional[TypeVar] = None
- ):
+ def _load_data(cls, group, h5group, dkey, ikey=None):
+ field = group.__dict__[ikey] if ikey else group.__dict__
+ h5field = h5group[ikey] if ikey else h5group
+
+ if isinstance(field[dkey], GroupField):
+ field[dkey].data = field[dkey]._handle_data_load(h5field[dkey][()])
+ return
+
+ raise ValueError(
+ "Attempted to load Group data field that is neither Dataset nor Table."
+ )
+
+ @classmethod
+ def model_validate_hdf5(cls, filepath: pathlib.Path):
"""
Loads the model from an HDF5 file at the specified filepath.
@@ -118,12 +172,61 @@ def model_validate_hdf5(
filepath (pathlib.Path): The path to the HDF5 file where the model data will be read and validated from.
"""
with h5py.File(filepath, "r") as f:
- self = cls.model_validate_json(f.attrs["model"])
+ # Load datastore signature
+ self = cls.model_validate_json(f.attrs["_datastore_signature"])
- # loop through all groups in the model schema and load HDF5 store
- for gkey, group in self.groups.items():
- for dkey, val in group.__dict__.items():
+ # loop through all groups in the model schema and load the data
+ for gkey, group in self:
+ for dkey in group.__class__.model_fields:
+ # ignore attrs and class_ fields
if dkey in ("attrs", "class_"):
continue
- group.__dict__[dkey].data = np.array(f[gkey][dkey][()])
+
+ if group.__dict__[dkey] is None:
+ continue
+
+ # load data for dict of Dataset or dict of Table
+ if isinstance(group.__dict__[dkey], dict):
+ for ddkey in group.__dict__[dkey]:
+ cls._load_data(group, f[gkey], dkey=ddkey, ikey=dkey)
+ continue
+
+ # load Dataset or Table data
+ cls._load_data(group, f[gkey], dkey=dkey)
+
return self
+
+ def __getitem__(self, key):
+ """Overloads indexing to retrieve elements in groups."""
+ return self.groups.__getitem__(key)
+
+ def __iter__(self):
+ """Overloads iter to iterate over elements in groups."""
+ return self.groups.items().__iter__()
+
+ def update(self, **groups):
+ """Updates groups in the datastore, overwriting past values."""
+ for k, v in groups.items():
+ self.groups[k] = v
+
+ def add(self, **groups):
+ """Adds a new groups to the datastore."""
+
+ existing_keys = set(groups.keys()).intersection(set(self.groups.keys()))
+ if existing_keys:
+ raise ValueError(
+ f"Keys {existing_keys} already exist in the datastore, use `update` instead if intending to overwrite past data."
+ )
+
+ self.update(**groups)
+
+ def pipe(self, func: Callable[[Datastore], None]) -> Datastore:
+ _result = func(self)
+
+ if _result is not None:
+ raise ValueError("`func` must return None.")
+
+ return self
+
+
+# %%
diff --git a/src/oqd_dataschema/folder.py b/src/oqd_dataschema/folder.py
new file mode 100644
index 0000000..4e8c41e
--- /dev/null
+++ b/src/oqd_dataschema/folder.py
@@ -0,0 +1,275 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from types import MappingProxyType
+from typing import Annotated, Any, Dict, Optional, Tuple, Union
+
+import numpy as np
+from pydantic import (
+ BeforeValidator,
+ ConfigDict,
+ Field,
+ field_validator,
+ model_validator,
+)
+from typing_extensions import TypeAliasType
+
+from oqd_dataschema.base import Attrs, DTypeNames, DTypes, GroupField
+from oqd_dataschema.utils import _flex_shape_equal
+
+########################################################################################
+
+__all__ = ["Folder", "CastFolder"]
+
+########################################################################################
+
+DocumentSchema = TypeAliasType(
+ "DocumentSchema",
+ Dict[str, Union["DocumentSchema", Optional[DTypeNames]]], # type: ignore
+)
+
+
+class Folder(GroupField, extra="forbid"):
+ """
+ Schema representation for a table object to be saved within an HDF5 file.
+
+ Attributes:
+ document_schema: The schema for a document (structured type with keys and their datatype). Types are inferred from the `data` attribute if not provided.
+ shape: The shape of the folder.
+ data: The numpy ndarray or recarray (of structured dtype) of the data, from which `dtype` and `shape` can be inferred.
+
+ attrs: A dictionary of attributes to append to the folder.
+
+ Example:
+ ```python
+ schema = dict(
+ index="int32",
+ t="float64",
+ channels=dict(ch1="complex128", ch2="complex128"),
+ label="str",
+ )
+ dt = np.dtype(
+ [
+ ("index", np.int32),
+ ("t", np.float64),
+ ("channels", np.dtype([("ch1", np.complex128), ("ch2", np.complex128)])),
+ ("label", np.dtype(" np.dtype:
+ return self._numpy_dtype(
+ self.document_schema, str_size=str_size, bytes_size=bytes_size
+ )
+
+ @staticmethod
+ def _dump_dtype_str_to_bytes(dtype):
+ np_dtype = []
+
+ for k, (v, _) in dtype.fields.items():
+ if isinstance(v.fields, MappingProxyType):
+ dt = Folder._dump_dtype_str_to_bytes(v)
+ elif type(v) is np.dtypes.StrDType:
+ dt = np.empty(0, dtype=v).astype(np.dtypes.BytesDType).dtype
+ else:
+ dt = v
+
+ np_dtype.append((k, dt))
+
+ return np.dtype(np_dtype)
+
+ def _handle_data_dump(self, data):
+ np_dtype = self._dump_dtype_str_to_bytes(data.dtype)
+
+ return data.astype(np_dtype)
+
+ @staticmethod
+ def _load_dtype_bytes_to_str(document_schema, dtype):
+ np_dtype = []
+
+ for k, (v, _) in dtype.fields.items():
+ if isinstance(v.fields, MappingProxyType):
+ dt = Folder._load_dtype_bytes_to_str(document_schema[k], v)
+ elif document_schema[k] == "str":
+ dt = np.empty(0, dtype=v).astype(np.dtypes.StrDType).dtype
+ else:
+ dt = v
+
+ np_dtype.append((k, dt))
+
+ return np.dtype(np_dtype)
+
+ def _handle_data_load(self, data):
+ np_dtype = self._load_dtype_bytes_to_str(self.document_schema, data.dtype)
+
+ return data.astype(np_dtype)
+
+ @classmethod
+ def cast(cls, data: np.ndarray) -> Folder:
+ """Casts data from numpy structured array to Folder."""
+ if isinstance(data, np.ndarray):
+ if not isinstance(data.dtype.fields, MappingProxyType):
+ raise TypeError("dtype of data must be a structured dtype.")
+
+ document_schema = cls._get_document_schema_from_dtype(data.dtype)
+
+ return cls(document_schema=document_schema, data=data)
+ return data
+
+
+CastFolder = Annotated[Folder, BeforeValidator(Folder.cast)]
+"""Annotated type that automatically executes Folder.cast"""
diff --git a/src/oqd_dataschema/group.py b/src/oqd_dataschema/group.py
new file mode 100644
index 0000000..2d2829d
--- /dev/null
+++ b/src/oqd_dataschema/group.py
@@ -0,0 +1,174 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+import warnings
+from functools import reduce
+from types import NoneType
+from typing import Annotated, ClassVar, Literal, Union
+
+from pydantic import (
+ BaseModel,
+ Discriminator,
+ Field,
+ TypeAdapter,
+)
+
+from oqd_dataschema.base import Attrs, GroupField
+
+########################################################################################
+
+__all__ = [
+ "GroupBase",
+ "GroupRegistry",
+]
+
+
+########################################################################################
+
+
+class GroupBase(BaseModel, extra="forbid"):
+ """
+ Schema representation for a group object within an HDF5 file.
+
+ Each grouping of data should be defined as a subclass of `GroupBase`, and specify the datasets that it will contain.
+ This base object only has attributes, `attrs`, which are associated to the HDF5 group.
+
+ Attributes:
+ attrs: A dictionary of attributes to append to the group.
+
+ """
+
+ attrs: Attrs = Field(default_factory=lambda: {})
+
+ @staticmethod
+ def _is_basic_groupfield_type(v):
+ return reduce(
+ lambda x, y: x or y,
+ (gf._is_supported_type(v) for gf in GroupField.__subclasses__()),
+ )
+
+ @classmethod
+ def _is_groupfield_type(cls, v):
+ is_datafield = cls._is_basic_groupfield_type(v)
+
+ is_annotated_datafield = typing.get_origin(
+ v
+ ) is Annotated and cls._is_basic_groupfield_type(v.__origin__)
+
+ is_optional_datafield = typing.get_origin(v) is Union and (
+ (v.__args__[0] == NoneType and cls._is_basic_groupfield_type(v.__args__[1]))
+ or (
+ v.__args__[1] == NoneType
+ and cls._is_basic_groupfield_type(v.__args__[0])
+ )
+ )
+
+ is_dict_datafield = (
+ typing.get_origin(v) is dict
+ and v.__args__[0] is str
+ and cls._is_basic_groupfield_type(v.__args__[1])
+ )
+
+ return (
+ is_datafield
+ or is_annotated_datafield
+ or is_optional_datafield
+ or is_dict_datafield
+ )
+
+ @classmethod
+ def _is_classvar(cls, v):
+ return v is ClassVar or typing.get_origin(v) is ClassVar
+
+ def __init_subclass__(cls, **kwargs):
+ super().__init_subclass__(**kwargs)
+
+ for k, v in cls.__annotations__.items():
+ if k == "class_":
+ raise AttributeError("`class_` attribute should not be set manually.")
+
+ if k == "attrs" and v is not Attrs:
+ raise AttributeError(
+ "`attrs` attribute must have type annotation of Attrs."
+ )
+
+ if k == "attrs" or cls._is_classvar(v):
+ continue
+
+ if not cls._is_groupfield_type(v):
+ raise TypeError(
+ "All fields of `GroupBase` have to be of type `Dataset`, `Table` or `Folder`."
+ )
+
+ cls.__annotations__["class_"] = Literal[cls.__name__]
+ setattr(cls, "class_", cls.__name__)
+
+ # Auto-register new group types
+ GroupRegistry.register(cls)
+
+
+########################################################################################
+
+
+class MetaGroupRegistry(type):
+ """
+ Metaclass for the GroupRegistry
+ """
+
+ def __new__(cls, clsname, superclasses, attributedict):
+ attributedict["groups"] = dict()
+ return super().__new__(cls, clsname, superclasses, attributedict)
+
+ def register(cls, group):
+ """Registers a group into the GroupRegistry."""
+ if not issubclass(group, GroupBase):
+ raise TypeError("You may only register subclasses of GroupBase.")
+
+ if group.__name__ in cls.groups.keys():
+ warnings.warn(
+ f"Overwriting previously registered `{group.__name__}` group of the same name.",
+ UserWarning,
+ stacklevel=2,
+ )
+
+ cls.groups[group.__name__] = group
+
+ def clear(cls):
+ """Clear all registered types (useful for testing)"""
+ cls.groups.clear()
+
+ @property
+ def union(cls):
+ """Get the current Union of all registered types"""
+
+ if len(cls.groups) > 1:
+ return Annotated[
+ Union[tuple(cls.groups.values())], Discriminator(discriminator="class_")
+ ]
+ else:
+ return next(iter(cls.groups.values()))
+
+ @property
+ def adapter(cls):
+ """Get TypeAdapter for current registered types"""
+ return TypeAdapter(cls.union)
+
+
+class GroupRegistry(metaclass=MetaGroupRegistry):
+ """
+ Represents the GroupRegistry
+ """
+
+ pass
diff --git a/src/oqd_dataschema/groups.py b/src/oqd_dataschema/groups.py
deleted file mode 100644
index 88ecd2f..0000000
--- a/src/oqd_dataschema/groups.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2024-2025 Open Quantum Design
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-
-# http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from oqd_dataschema.base import Dataset, GroupBase
-
-########################################################################################
-
-__all__ = [
- "SinaraRawDataGroup",
- "MeasurementOutcomesDataGroup",
- "ExpectationValueDataGroup",
- "OQDTestbenchDataGroup",
-]
-
-########################################################################################
-
-
-class SinaraRawDataGroup(GroupBase):
- """
- Example `Group` for raw data from the Sinara real-time control system.
- This is a placeholder for demonstration and development.
- """
-
- camera_images: Dataset
-
-
-class MeasurementOutcomesDataGroup(GroupBase):
- """
- Example `Group` for processed data classifying the readout of the state.
- This is a placeholder for demonstration and development.
- """
-
- outcomes: Dataset
-
-
-class ExpectationValueDataGroup(GroupBase):
- """
- Example `Group` for processed data calculating the expectation values.
- This is a placeholder for demonstration and development.
- """
-
- expectation_value: Dataset
-
-
-class OQDTestbenchDataGroup(GroupBase):
- """ """
-
- time: Dataset
- voltages: Dataset
diff --git a/src/oqd_dataschema/table.py b/src/oqd_dataschema/table.py
new file mode 100644
index 0000000..da72a57
--- /dev/null
+++ b/src/oqd_dataschema/table.py
@@ -0,0 +1,262 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from types import MappingProxyType
+from typing import Annotated, Any, List, Optional, Tuple, Union
+
+import numpy as np
+import pandas as pd
+from pydantic import (
+ BeforeValidator,
+ ConfigDict,
+ Field,
+ field_validator,
+ model_validator,
+)
+
+from oqd_dataschema.base import Attrs, DTypeNames, DTypes, GroupField
+from oqd_dataschema.utils import (
+ _flex_shape_equal,
+ _is_list_unique,
+)
+
+########################################################################################
+
+__all__ = [
+ "Table",
+ "CastTable",
+]
+
+########################################################################################
+
+
+Column = Tuple[str, Optional[DTypeNames]]
+
+
+class Table(GroupField, extra="forbid"):
+ """
+ Schema representation for a table object to be saved within an HDF5 file.
+
+ Attributes:
+ columns: The columns in the table accompanied by their datatype. Types are inferred from the `data` attribute if not provided.
+ shape: The shape of the table (excludes the column index).
+ data: The numpy ndarray or recarray (of structured dtype) of the data, from which `dtype` and `shape` can be inferred.
+
+ attrs: A dictionary of attributes to append to the table.
+
+ Example:
+ ```python
+ dt = np.dtype(
+ [
+ ("index", np.int32),
+ ("t", np.float64),
+ ("z", np.complex128),
+ ("label", np.dtype(" pd.DataFrame:
+ """Converts flat table to pandas DataFrame."""
+ if len(self.shape) > 1:
+ raise ValueError(
+ "Conversion to pandas DataFrame only supported on 1D Table."
+ )
+ return pd.DataFrame(
+ data=self.data, columns=[c[0] for c in self.columns]
+ ).astype({k: v for k, v in self.columns})
+
+ @staticmethod
+ def _pd_to_np(df):
+ np_dtype = []
+ for k, v in df.dtypes.items():
+ if type(v) is not np.dtypes.ObjectDType:
+ field_np_dtype = (k, v)
+ np_dtype.append(field_np_dtype)
+ continue
+
+ # Check if column of object dtype is actually str dtype
+ if (np.vectorize(lambda x: isinstance(x, str))(df[k].to_numpy())).all():
+ dt = df[k].to_numpy().astype(np.dtypes.StrDType).dtype
+ field_np_dtype = (k, dt)
+
+ np_dtype.append(field_np_dtype)
+ continue
+
+ raise ValueError(f"Unsupported datatype for column {k}")
+
+ return np.rec.fromarrays(
+ df.to_numpy().transpose(),
+ names=[dt[0] for dt in np_dtype],
+ formats=[dt[1] for dt in np_dtype],
+ ).astype(np.dtype(np_dtype))
+
+ @field_validator("data", mode="before")
+ @classmethod
+ def _validate_and_update(cls, value):
+ # check if data exist
+ if value is None:
+ return value
+
+ # check if data is a numpy array
+ if not isinstance(value, (np.ndarray, pd.DataFrame)):
+ raise TypeError("`data` must be a numpy.ndarray or pandas.DataFrame.")
+
+ if isinstance(value, pd.DataFrame):
+ value = cls._pd_to_np(value)
+
+ if not isinstance(value.dtype.fields, MappingProxyType):
+ raise TypeError("dtype of data must be a structured dtype.")
+
+ if isinstance(value, np.ndarray):
+ value = value.view(np.recarray)
+
+ return value
+
+ @model_validator(mode="after")
+ def _validate_data_matches_shape_dtype(self):
+ """Ensure that `data` matches `dtype` and `shape`."""
+
+ # check if data exist
+ if self.data is None:
+ return self
+
+ if set(self.data.dtype.fields.keys()) != set([c[0] for c in self.columns]):
+ raise ValueError("Fields of data do not match expected field for Table.")
+
+ # check if dtype matches data
+ for k, v in self.data.dtype.fields.items():
+ if (
+ dict(self.columns)[k] is not None
+ and type(v[0]) is not DTypes.get(dict(self.columns)[k]).value
+ ):
+ raise ValueError(
+ f"Expected data dtype `{dict(self.columns)[k]}`, but got `{v[0].name}`."
+ )
+
+ # check if shape mataches data
+ if self.shape is not None and not _flex_shape_equal(
+ self.data.shape, self.shape
+ ):
+ raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.")
+
+ # reassign dtype if it is None
+ for n, (k, v) in enumerate(self.columns):
+ if v != DTypes(type(self.data.dtype.fields[k][0])).name.lower():
+ self.columns[n] = (
+ k,
+ DTypes(type(self.data.dtype.fields[k][0])).name.lower(),
+ )
+
+ # resassign shape to concrete value if it is None or a flexible shape
+ if self.shape != self.data.shape:
+ self.shape = self.data.shape
+
+ return self
+
+ def numpy_dtype(self, *, str_size=64, bytes_size=64):
+ np_dtype = []
+
+ for k, v in self.columns:
+ if v is None:
+ raise ValueError(
+ "Method numpy_dtype can only be called on concrete types."
+ )
+ if v == "str":
+ dt = np.dtypes.StrDType(str_size)
+ elif v == "bytes":
+ dt = np.dtypes.BytesDType(bytes_size)
+ else:
+ dt = DTypes.get(v).value()
+
+ np_dtype.append((k, dt))
+
+ return np.dtype(np_dtype)
+
+ @classmethod
+ def cast(cls, data: np.ndarray | pd.DataFrame) -> Table:
+ """Casts data from pandas DataFrame or numpy structured array to Table."""
+ if isinstance(data, pd.DataFrame):
+ data = cls._pd_to_np(data)
+
+ if isinstance(data, np.ndarray):
+ if not isinstance(data.dtype.fields, MappingProxyType):
+ raise TypeError("dtype of data must be a structured dtype.")
+
+ columns = [
+ (k, DTypes(type(v)).name.lower())
+ for k, (v, _) in data.dtype.fields.items()
+ ]
+
+ return cls(columns=columns, data=data)
+ return data
+
+ def _handle_data_dump(self, data):
+ np_dtype = np.dtype(
+ [
+ (k, np.empty(0, dtype=v).astype(np.dtypes.BytesDType).dtype)
+ if type(v) is np.dtypes.StrDType
+ else (k, v)
+ for k, (v, _) in data.dtype.fields.items()
+ ]
+ )
+
+ return data.astype(np_dtype)
+
+ def _handle_data_load(self, data):
+ np_dtype = np.dtype(
+ [
+ (
+ k,
+ np.empty(0, dtype=v).astype(np.dtypes.StrDType).dtype,
+ )
+ if dict(self.columns)[k] == "str"
+ else (k, v)
+ for k, (v, _) in np.array(data).dtype.fields.items()
+ ]
+ )
+ return data.astype(np_dtype)
+
+
+CastTable = Annotated[Table, BeforeValidator(Table.cast)]
+"""Annotated type that automatically executes Table.cast"""
diff --git a/src/oqd_dataschema/utils.py b/src/oqd_dataschema/utils.py
new file mode 100644
index 0000000..b1153d6
--- /dev/null
+++ b/src/oqd_dataschema/utils.py
@@ -0,0 +1,160 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import reduce
+from types import MappingProxyType
+
+import numpy as np
+from numpy.lib import recfunctions as rfn
+
+########################################################################################
+
+__all__ = [
+ "unstructured_to_structured",
+ "dict_to_structured",
+]
+
+
+########################################################################################
+
+
+def _unstructured_to_structured_helper(data, dtype):
+ for n, (k, (v, _)) in enumerate(dtype.fields.items()):
+ if isinstance(v.fields, MappingProxyType):
+ x = _unstructured_to_structured_helper(data, v)
+
+ else:
+ x = data.pop(0).astype(type(v))
+
+ if n == 0:
+ new_data = x.astype(np.dtype([(k, x.dtype)]))
+ else:
+ if new_data.shape != x.shape:
+ raise ValueError(
+ f"Incompatible shape, expected {new_data.shape} but got {x.shape}."
+ )
+
+ new_data = rfn.append_fields(
+ new_data.flatten(), k, x.flatten(), usemask=False
+ ).reshape(x.shape)
+
+ return new_data.view(np.recarray)
+
+
+def unstructured_to_structured(data, dtype):
+ data = list(np.moveaxis(data, -1, 0))
+
+ leaves = len(rfn.flatten_descr(dtype))
+ if len(data) != leaves:
+ raise ValueError(
+ f"Incompatible shape, last dimension of data ({data.shape[-1]}) must match number of leaves in structured dtype ({leaves})."
+ )
+
+ new_data = _unstructured_to_structured_helper(data, dtype)
+
+ return new_data
+
+
+########################################################################################
+
+
+def _dtype_from_dict(data):
+ np_dtype = []
+
+ for k, v in data.items():
+ if isinstance(v, dict):
+ dt = _dtype_from_dict(v)
+ else:
+ dt = v.dtype
+
+ np_dtype.append((k, dt))
+
+ return np.dtype(np_dtype)
+
+
+def _dict_to_structured_helper(data, dtype):
+ for n, (k, (v, _)) in enumerate(dtype.fields.items()):
+ if isinstance(v.fields, MappingProxyType):
+ x = _dict_to_structured_helper(data[k], v)
+ else:
+ x = data[k]
+
+ if n == 0:
+ new_data = x.astype(np.dtype([(k, x.dtype)]))
+ else:
+ if new_data.shape != x.shape:
+ raise ValueError(
+ f"Incompatible shape, expected {new_data.shape} but got {x.shape}."
+ )
+
+ new_data = rfn.append_fields(
+ new_data.flatten(), k, x.flatten(), usemask=False
+ ).reshape(x.shape)
+
+ return new_data.view(np.recarray)
+
+
+def dict_to_structured(data):
+ data_dtype = _dtype_from_dict(data)
+ new_data = _dict_to_structured_helper(data, dtype=data_dtype)
+ return new_data
+
+
+########################################################################################
+
+
+def _flex_shape_equal(shape1, shape2):
+ """Helper function for comparing concrete and flex shapes."""
+ return len(shape1) == len(shape2) and reduce(
+ lambda x, y: x and y,
+ map(
+ lambda x: x[0] is None or x[1] is None or x[0] == x[1],
+ zip(shape1, shape2),
+ ),
+ )
+
+
+########################################################################################
+
+
+def _validator_from_condition(f):
+ """Helper decorator for turning a condition into a validation."""
+
+ def _wrapped_validator(*args, **kwargs):
+ def _wrapped_condition(model):
+ f(model, *args, **kwargs)
+ return model
+
+ return _wrapped_condition
+
+ return _wrapped_validator
+
+
+########################################################################################
+
+
+def _is_list_unique(data):
+ seen = set()
+ duplicates = set()
+ for element in data:
+ if element in duplicates:
+ continue
+
+ if element in seen:
+ duplicates.add(element)
+ continue
+
+ seen.add(element)
+
+ return (duplicates == set(), duplicates)
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
new file mode 100644
index 0000000..5ccd65d
--- /dev/null
+++ b/tests/test_dataset.py
@@ -0,0 +1,253 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# %%
+
+import numpy as np
+import pytest
+from pydantic import TypeAdapter
+
+from oqd_dataschema import CastDataset, Dataset, condataset
+from oqd_dataschema.base import DTypes
+
+########################################################################################
+
+
+class TestDatasetDtype:
+ @pytest.mark.parametrize(
+ ("dtype", "np_dtype"),
+ [
+ ("bool", np.dtypes.BoolDType),
+ ("int16", np.dtypes.Int16DType),
+ ("int32", np.dtypes.Int32DType),
+ ("int64", np.dtypes.Int64DType),
+ ("uint16", np.dtypes.UInt16DType),
+ ("uint32", np.dtypes.UInt32DType),
+ ("uint64", np.dtypes.UInt64DType),
+ ("float16", np.dtypes.Float16DType),
+ ("float32", np.dtypes.Float32DType),
+ ("float64", np.dtypes.Float64DType),
+ ("complex64", np.dtypes.Complex64DType),
+ ("complex128", np.dtypes.Complex128DType),
+ ("str", np.dtypes.StrDType),
+ ("bytes", np.dtypes.BytesDType),
+ ("string", np.dtypes.StringDType),
+ ],
+ )
+ def test_dtypes(self, dtype, np_dtype):
+ ds = Dataset(dtype=dtype, shape=(100,))
+
+ data = np.random.rand(100).astype(np_dtype)
+ ds.data = data
+
+ @pytest.mark.xfail(raises=ValueError)
+ @pytest.mark.parametrize("dtype", list(DTypes.names()))
+ def test_unmatched_dtype_data(self, dtype):
+ ds = Dataset(dtype=dtype, shape=(100,))
+
+ data = np.random.rand(100).astype("O")
+ ds.data = data
+
+ @pytest.mark.parametrize("dtype", list(DTypes.names()))
+ def test_flexible_dtype(self, dtype):
+ ds = Dataset(dtype=None, shape=(100,))
+
+ data = np.random.rand(100).astype(DTypes.get(dtype).value)
+ ds.data = data
+
+ assert ds.dtype == DTypes(type(ds.data.dtype)).name.lower()
+
+ def test_dtype_mutation(self):
+ ds = Dataset(dtype="float32", shape=(100,))
+
+ ds.dtype = "float64"
+
+ data = np.random.rand(100)
+ ds.data = data
+
+
+class TestDatasetShape:
+ @pytest.mark.xfail(raises=ValueError)
+ @pytest.mark.parametrize(
+ ("shape", "data_shape"),
+ [
+ ((0,), (100,)),
+ ((1,), (100,)),
+ ((99,), (100,)),
+ ((1, 1), (100,)),
+ ((100, None), (100,)),
+ ((None, None), (100,)),
+ ((None, 100), (100,)),
+ ],
+ )
+ def test_unmatched_shape_data(self, shape, data_shape):
+ ds = Dataset(dtype="float64", shape=shape)
+
+ data = np.random.rand(*data_shape)
+ ds.data = data
+
+ @pytest.mark.parametrize(
+ ("shape", "data_shape"),
+ [
+ ((None,), (0,)),
+ ((None,), (1,)),
+ ((None,), (100,)),
+ ((None, 0), (0, 0)),
+ ((None, 1), (1, 1)),
+ ((None, None), (1, 1)),
+ ((None, None), (10, 100)),
+ ((None, None, 1), (1, 1, 1)),
+ ],
+ )
+ def test_flexible_shape(self, shape, data_shape):
+ ds = Dataset(dtype="float64", shape=shape)
+
+ data = np.random.rand(*data_shape)
+ ds.data = data
+
+ assert ds.shape == ds.data.shape
+
+ def test_shape_mutation(self):
+ ds = Dataset(dtype="float64", shape=(1,))
+
+ ds.shape = (100,)
+
+ data = np.random.rand(100)
+ ds.data = data
+
+
+class TestCastDataset:
+ @pytest.fixture
+ def adapter(self):
+ return TypeAdapter(CastDataset)
+
+ @pytest.mark.parametrize(
+ ("data", "dtype", "shape"),
+ [
+ (np.random.rand(100), "float64", (100,)),
+ (np.random.rand(10).astype("str"), "str", (10,)),
+ (np.random.rand(1, 10, 100).astype("bytes"), "bytes", (1, 10, 100)),
+ ],
+ )
+ def test_cast(self, adapter, data, shape, dtype):
+ ds = adapter.validate_python(data)
+
+ assert ds.shape == shape and ds.dtype == dtype
+
+
+class TestConstrainedDataset:
+ @pytest.mark.parametrize(
+ ("cds", "data"),
+ [
+ (condataset(dtype_constraint="float64"), np.random.rand(10)),
+ (condataset(dtype_constraint="str"), np.random.rand(10).astype(str)),
+ (
+ condataset(dtype_constraint=("float16", "float32", "float64")),
+ np.random.rand(10),
+ ),
+ (
+ condataset(dtype_constraint=("float16", "float32", "float64")),
+ np.random.rand(10).astype("float16"),
+ ),
+ (
+ condataset(dtype_constraint=("float16", "float32", "float64")),
+ np.random.rand(10).astype("float32"),
+ ),
+ ],
+ )
+ def test_constrained_dataset_dtype(self, cds, data):
+ adapter = TypeAdapter(cds)
+
+ adapter.validate_python(data)
+
+ @pytest.mark.xfail(raises=ValueError)
+ @pytest.mark.parametrize(
+ ("cds", "data"),
+ [
+ (condataset(dtype_constraint="float64"), np.random.rand(10).astype(str)),
+ (condataset(dtype_constraint="str"), np.random.rand(10)),
+ (
+ condataset(dtype_constraint=("float16", "float32", "float64")),
+ np.random.rand(10).astype(str),
+ ),
+ ],
+ )
+ def test_violate_dtype_constraint(self, cds, data):
+ adapter = TypeAdapter(cds)
+
+ adapter.validate_python(data)
+
+ @pytest.mark.parametrize(
+ ("cds", "data"),
+ [
+ (condataset(min_dim=1, max_dim=1), np.random.rand(10)),
+ (condataset(min_dim=0, max_dim=1), np.random.rand(10)),
+ (condataset(max_dim=2), np.random.rand(10)),
+ (condataset(max_dim=3), np.random.rand(10, 10, 10)),
+ (condataset(min_dim=2), np.random.rand(10, 10)),
+ (condataset(min_dim=2), np.random.rand(10, 10, 10, 10, 10)),
+ (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10, 10, 10)),
+ (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10, 10)),
+ (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10)),
+ ],
+ )
+ def test_constrained_dataset_dimension(self, cds, data):
+ adapter = TypeAdapter(cds)
+
+ adapter.validate_python(data)
+
+ @pytest.mark.xfail(raises=ValueError)
+ @pytest.mark.parametrize(
+ ("cds", "data"),
+ [
+ (condataset(min_dim=1, max_dim=1), np.random.rand(10, 10)),
+ (condataset(min_dim=2, max_dim=3), np.random.rand(10)),
+ (condataset(min_dim=2, max_dim=3), np.random.rand(10, 10, 10, 10)),
+ ],
+ )
+ def test_violate_dimension_constraint(self, cds, data):
+ adapter = TypeAdapter(cds)
+
+ adapter.validate_python(data)
+
+ @pytest.mark.parametrize(
+ ("cds", "data"),
+ [
+ (condataset(shape_constraint=(None,)), np.random.rand(10)),
+ (condataset(shape_constraint=(10,)), np.random.rand(10)),
+ (condataset(shape_constraint=(None, None)), np.random.rand(1, 2)),
+ (condataset(shape_constraint=(1, None)), np.random.rand(1, 2)),
+ (condataset(shape_constraint=(1, 2)), np.random.rand(1, 2)),
+ (condataset(shape_constraint=(1, None, 3)), np.random.rand(1, 10, 3)),
+ ],
+ )
+ def test_constrained_dataset_shape(self, cds, data):
+ adapter = TypeAdapter(cds)
+
+ adapter.validate_python(data)
+
+ @pytest.mark.xfail(raises=ValueError)
+ @pytest.mark.parametrize(
+ ("cds", "data"),
+ [
+ (condataset(shape_constraint=(1,)), np.random.rand(10)),
+ (condataset(shape_constraint=(None,)), np.random.rand(10, 10)),
+ (condataset(shape_constraint=(None, 1)), np.random.rand(10, 10)),
+ (condataset(shape_constraint=(None, 1)), np.random.rand(1, 10)),
+ ],
+ )
+ def test_violate_shape_constraint(self, cds, data):
+ adapter = TypeAdapter(cds)
+
+ adapter.validate_python(data)
diff --git a/tests/test_datastore.py b/tests/test_datastore.py
index 6970b07..8499609 100644
--- a/tests/test_datastore.py
+++ b/tests/test_datastore.py
@@ -13,41 +13,96 @@
# limitations under the License.
# %%
-import pathlib
+import uuid
+from typing import Dict, Optional
import numpy as np
import pytest
-from oqd_dataschema.base import Dataset, mapping
-from oqd_dataschema.datastore import Datastore
-from oqd_dataschema.groups import (
- SinaraRawDataGroup,
-)
-
+from oqd_dataschema import Dataset, Datastore, GroupBase
# %%
-@pytest.mark.parametrize(
- "dtype",
- [
- "int32",
- "int64",
- "float32",
- "float64",
- "complex64",
- "complex128",
- ],
+
+_Group = type(
+ f"_Group_{uuid.uuid4()}".replace("-", ""),
+ (GroupBase,),
+ {
+ "__annotations__": {
+ "x": Dataset,
+ "y": Dict[str, Dataset],
+ "z": Optional[Dataset],
+ },
+ "y": {},
+ "z": None,
+ },
)
-def test_serialize_deserialize(dtype):
- data = np.ones([10, 10]).astype(dtype)
- dataset = SinaraRawDataGroup(camera_images=Dataset(data=data))
- data = Datastore(groups={"test": dataset})
- filepath = pathlib.Path("test.h5")
- data.model_dump_hdf5(filepath)
- data_reload = Datastore.model_validate_hdf5(filepath)
+class TestDatastore:
+ @pytest.mark.parametrize(
+ ("dtype", "np_dtype"),
+ [
+ ("bool", np.dtypes.BoolDType),
+ ("int16", np.dtypes.Int16DType),
+ ("int32", np.dtypes.Int32DType),
+ ("int64", np.dtypes.Int64DType),
+ ("uint16", np.dtypes.UInt16DType),
+ ("uint32", np.dtypes.UInt32DType),
+ ("uint64", np.dtypes.UInt64DType),
+ ("float16", np.dtypes.Float16DType),
+ ("float32", np.dtypes.Float32DType),
+ ("float64", np.dtypes.Float64DType),
+ ("complex64", np.dtypes.Complex64DType),
+ ("complex128", np.dtypes.Complex128DType),
+ ("str", np.dtypes.StrDType),
+ ("bytes", np.dtypes.BytesDType),
+ ("string", np.dtypes.StringDType),
+ ],
+ )
+ def test_serialize_deserialize_dtypes(self, dtype, np_dtype, tmp_path):
+ f = tmp_path / f"tmp{uuid.uuid4()}.h5"
- assert data_reload.groups["test"].camera_images.data.dtype == mapping[dtype]
+ datastore = Datastore(
+ groups={"g1": _Group(x=Dataset(data=np.random.rand(1).astype(np_dtype)))}
+ )
+ datastore.model_dump_hdf5(f)
-# %%
+ Datastore.model_validate_hdf5(f)
+
+ @pytest.mark.parametrize(
+ ("x", "y", "z"),
+ [
+ (
+ Dataset(data=np.random.rand(10)),
+ {},
+ None,
+ ),
+ (
+ Dataset(data=np.random.rand(10)),
+ {"f1": Dataset(data=np.random.rand(10))},
+ None,
+ ),
+ (
+ Dataset(data=np.random.rand(10)),
+ {"f1": Dataset(data=np.random.rand(10))},
+ Dataset(data=np.random.rand(10)),
+ ),
+ (
+ Dataset(data=np.random.rand(10)),
+ {
+ "f1": Dataset(data=np.random.rand(10)),
+ "f2": Dataset(data=np.random.rand(10)),
+ },
+ Dataset(data=np.random.rand(10)),
+ ),
+ ],
+ )
+ def test_serialize_deserialize_dataset_types(self, x, y, z, tmp_path):
+ f = tmp_path / f"tmp{uuid.uuid4()}.h5"
+
+ datastore = Datastore(groups={"g1": _Group(x=x, y=y, z=z)})
+
+ datastore.model_dump_hdf5(f)
+
+ Datastore.model_validate_hdf5(f)
diff --git a/tests/test_group.py b/tests/test_group.py
new file mode 100644
index 0000000..ca87fe6
--- /dev/null
+++ b/tests/test_group.py
@@ -0,0 +1,170 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# %%
+
+import uuid
+from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple
+
+import numpy as np
+import pytest
+
+from oqd_dataschema import CastDataset, Dataset, GroupBase, condataset
+
+########################################################################################
+
+
+class TestGroupDefinition:
+ @pytest.mark.parametrize(
+ "field_type",
+ [
+ Dataset,
+ CastDataset,
+ Dict[str, Dataset],
+ Dict[str, CastDataset],
+ condataset(dtype_constraint="float32"),
+ condataset(dtype_constraint=("float16", "float32", "float64")),
+ condataset(min_dim=1),
+ condataset(max_dim=1),
+ condataset(min_dim=1, max_dim=2),
+ condataset(shape_constraint=(1,)),
+ condataset(shape_constraint=(None,)),
+ condataset(shape_constraint=(None, 1)),
+ condataset(shape_constraint=(None, None)),
+ Optional[Dataset],
+ ],
+ )
+ def test_data_field_definition(self, field_type):
+ type(
+ f"_Group_{uuid.uuid4()}".replace("-", ""),
+ (GroupBase,),
+ {"__annotations__": {"x": field_type}},
+ )
+
+ @pytest.mark.xfail(raises=TypeError)
+ @pytest.mark.parametrize(
+ "field_type",
+ [
+ Any,
+ int,
+ List[int],
+ Tuple[int],
+ List[Dataset],
+ Tuple[Dataset],
+ Dict[int, Dataset],
+ ],
+ )
+ def test_invalid_data_field_definition(self, field_type):
+ type(
+ f"_Group_{uuid.uuid4()}".replace("-", ""),
+ (GroupBase,),
+ {"__annotations__": {"x": field_type}},
+ )
+
+ @pytest.mark.xfail(raises=AttributeError)
+ def test_overwriting_attrs(self):
+ type(
+ f"_Group_{uuid.uuid4()}".replace("-", ""),
+ (GroupBase,),
+ {"__annotations__": {"attrs": Dict[str, Any]}},
+ )
+
+ @pytest.mark.xfail(raises=AttributeError)
+ def test_overwriting_class_(self):
+ groupname = f"_Group_{uuid.uuid4()}".replace("-", "")
+ type(
+ groupname,
+ (GroupBase,),
+ {"__annotations__": {"class_": Literal[groupname]}},
+ )
+
+ @pytest.mark.parametrize(
+ ("field_type", "data"),
+ [
+ (Dataset, Dataset(data=np.random.rand(100))),
+ (CastDataset, Dataset(data=np.random.rand(100))),
+ (
+ Dict[str, Dataset],
+ {
+ "1": Dataset(data=np.random.rand(100)),
+ "2": Dataset(data=np.random.rand(100)),
+ },
+ ),
+ (
+ Dict[str, CastDataset],
+ {
+ "1": Dataset(data=np.random.rand(100)),
+ "2": Dataset(data=np.random.rand(100)),
+ },
+ ),
+ (condataset(dtype_constraint="float64"), Dataset(data=np.random.rand(100))),
+ (
+ condataset(dtype_constraint=("float16", "float32", "float64")),
+ Dataset(data=np.random.rand(100)),
+ ),
+ (Optional[Dataset], Dataset(data=np.random.rand(100))),
+ (Optional[Dataset], None),
+ ],
+ )
+ def test_group_instantiation(self, field_type, data):
+ _Group = type(
+ f"_Group_{uuid.uuid4()}".replace("-", ""),
+ (GroupBase,),
+ {"__annotations__": {"x": field_type}},
+ )
+
+ _Group(x=data)
+
+ @pytest.mark.parametrize(
+ ("classvar_type"),
+ [
+ ClassVar,
+ ClassVar[int],
+ ],
+ )
+ def test_class_variable(self, classvar_type):
+ type(
+ f"_Group_{uuid.uuid4()}".replace("-", ""),
+ (GroupBase,),
+ {"__annotations__": {"x": classvar_type}},
+ )
+
+ @pytest.mark.parametrize(
+ ("dataset"),
+ [
+ Dataset(),
+ Dataset(data=np.random.rand(10)),
+ Dataset(dtype="float64", shape=(10,)),
+ Dataset(dtype="float64", shape=(10,), data=np.random.rand(10)),
+ ],
+ )
+ def test_default_dataset(self, dataset):
+ _Group = type(
+ f"_Group_{uuid.uuid4()}".replace("-", ""),
+ (GroupBase,),
+ {"__annotations__": {"x": Dataset}, "x": dataset},
+ )
+
+ g = _Group()
+
+ assert (
+ (
+ (g.x.data == dataset.data).all()
+ and g.x.dtype == dataset.dtype
+ and g.x.shape == dataset.shape
+ and g.x.attrs == dataset.attrs
+ )
+ if isinstance(dataset.data, np.ndarray)
+ else g.x == dataset
+ )
diff --git a/tests/test_groupregistry.py b/tests/test_groupregistry.py
new file mode 100644
index 0000000..989ff4b
--- /dev/null
+++ b/tests/test_groupregistry.py
@@ -0,0 +1,77 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# %%
+
+import pytest
+
+from oqd_dataschema import (
+ CastDataset,
+ Dataset,
+ GroupBase,
+ GroupRegistry,
+ condataset,
+)
+
+
+class TestGroupRegistry:
+ def test_clear(self):
+ GroupRegistry.clear()
+
+ GroupRegistry.groups = dict()
+
+ def test_add_group(self):
+ GroupRegistry.clear()
+
+ groups = set()
+ for k in "ABCDE":
+ groups.add(
+ type(f"_Group{k}", (GroupBase,), {"__annotations__": {"x": Dataset}})
+ )
+
+ assert set(GroupRegistry.groups.values()) == groups
+
+ def test_overwrite_group(self):
+ GroupRegistry.clear()
+
+ _GroupA = type("_GroupA", (GroupBase,), {"__annotations__": {"x": Dataset}})
+
+ assert set(GroupRegistry.groups.values()) == {_GroupA}
+
+ with pytest.warns(UserWarning):
+ _mGroupA = type(
+ "_GroupA", (GroupBase,), {"__annotations__": {"x": CastDataset}}
+ )
+
+ assert set(GroupRegistry.groups.values()) == {_mGroupA}
+
+ @pytest.fixture
+ def group_generator(self):
+ def _groupgen():
+ groups = []
+ for k, dtype in zip(
+ "ABCDE",
+ ["str", "float64", "bytes", "bool", ("int16", "int32", "int64")],
+ ):
+ groups.append(
+ type(
+ f"_Group{k}",
+ (GroupBase,),
+ {"__annotations__": {"x": condataset(dtype_constraint=dtype)}},
+ )
+ )
+ return groups
+
+ return _groupgen
diff --git a/tests/test_table.py b/tests/test_table.py
new file mode 100644
index 0000000..ca3d3b0
--- /dev/null
+++ b/tests/test_table.py
@@ -0,0 +1,372 @@
+# Copyright 2024-2025 Open Quantum Design
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# %%
+
+import numpy as np
+import pytest
+
+from oqd_dataschema import Table
+from oqd_dataschema.base import DTypes
+
+########################################################################################
+
+
+class TestTable:
+ def test_empty_table(self):
+ Table(columns=[], shape=(100,))
+
+ @pytest.mark.xfail(raises=ValueError)
+ @pytest.mark.parametrize(
+ ("column"),
+ [
+ [("c1", "bool"), ("c1", "int16")],
+ [
+ ("c1", "str"),
+ ("c2", "int16"),
+ ("c3", "float64"),
+ ("c1", "complex128"),
+ ],
+ ],
+ )
+ def test_duplicate_column(self, column):
+ Table(columns=column, shape=(100,))
+
+
+class TestTableDType:
+ @pytest.mark.parametrize(
+ ("dtype", "np_dtype"),
+ [
+ ("bool", np.dtypes.BoolDType()),
+ ("int16", np.dtypes.Int16DType()),
+ ("int32", np.dtypes.Int32DType()),
+ ("int64", np.dtypes.Int64DType()),
+ ("uint16", np.dtypes.UInt16DType()),
+ ("uint32", np.dtypes.UInt32DType()),
+ ("uint64", np.dtypes.UInt64DType()),
+ ("float16", np.dtypes.Float16DType()),
+ ("float32", np.dtypes.Float32DType()),
+ ("float64", np.dtypes.Float64DType()),
+ ("complex64", np.dtypes.Complex64DType()),
+ ("complex128", np.dtypes.Complex128DType()),
+ ("str", np.dtypes.StrDType(16)),
+ ("bytes", np.dtypes.BytesDType(16)),
+ ],
+ )
+ def test_dtypes(self, dtype, np_dtype):
+ tbl = Table(columns=[("c", dtype)], shape=(100,))
+
+ data = np.rec.fromarrays(
+ np.random.rand(1, 100),
+ dtype=np.dtype(
+ [
+ ("c", np_dtype),
+ ]
+ ),
+ )
+ tbl.data = data
+
+ @pytest.mark.parametrize(
+ ("column", "np_dtype"),
+ [
+ (
+ [("c1", "bool"), ("c2", "int16")],
+ np.dtype(
+ [("c1", np.dtypes.BoolDType()), ("c2", np.dtypes.Int16DType())]
+ ),
+ ),
+ (
+ [
+ ("c1", "str"),
+ ("c2", "int16"),
+ ("c3", "float64"),
+ ("c4", "complex128"),
+ ],
+ np.dtype(
+ [
+ ("c1", np.dtypes.StrDType(16)),
+ ("c2", np.dtypes.Int16DType()),
+ ("c3", np.dtypes.Float64DType()),
+ ("c4", np.dtypes.Complex128DType()),
+ ]
+ ),
+ ),
+ ],
+ )
+ def test_multi_column_dtypes(self, column, np_dtype):
+ tbl = Table(columns=column, shape=(100,))
+
+ data = np.rec.fromarrays(np.random.rand(len(column), 100), dtype=np_dtype)
+ tbl.data = data
+
+ @pytest.mark.xfail(raises=ValueError)
+ @pytest.mark.parametrize(
+ "dtype",
+ [
+ "bool",
+ "int16",
+ "int32",
+ "int64",
+ "uint16",
+ "uint32",
+ "uint64",
+ "float16",
+ "float32",
+ "float64",
+ "complex64",
+ "complex128",
+ "str",
+ "bytes",
+ ],
+ )
+ def test_unmatched_dtype_data(self, dtype):
+ tbl = Table(columns=[("c", dtype)], shape=(100,))
+
+ data = np.rec.fromarrays(
+ np.random.rand(1, 100),
+ dtype=np.dtype(
+ [
+ ("c", "O"),
+ ]
+ ),
+ )
+ tbl.data = data
+
+ @pytest.mark.parametrize(
+ "np_dtype",
+ [
+ np.dtypes.BoolDType(),
+ np.dtypes.Int16DType(),
+ np.dtypes.Int32DType(),
+ np.dtypes.Int64DType(),
+ np.dtypes.UInt16DType(),
+ np.dtypes.UInt32DType(),
+ np.dtypes.UInt64DType(),
+ np.dtypes.Float16DType(),
+ np.dtypes.Float32DType(),
+ np.dtypes.Float64DType(),
+ np.dtypes.Complex64DType(),
+ np.dtypes.Complex128DType(),
+ np.dtypes.StrDType(16),
+ np.dtypes.BytesDType(16),
+ ],
+ )
+ def test_flexible_dtype(self, np_dtype):
+ tbl = Table(columns=[("c", None)], shape=(100,))
+
+ data = np.rec.fromarrays(
+ np.random.rand(1, 100),
+ dtype=np.dtype(
+ [
+ ("c", np_dtype),
+ ]
+ ),
+ )
+ tbl.data = data
+
+ assert (
+ dict(tbl.columns)["c"]
+ == DTypes(type(tbl.data.dtype.fields["c"][0])).name.lower()
+ )
+
+ def test_dtype_mutation(self):
+ tbl = Table(columns=[("c", "float32")], shape=(100,))
+
+ tbl.columns[0] = ("c", "float64")
+
+ data = np.rec.fromarrays(
+ np.random.rand(1, 100),
+ dtype=np.dtype(
+ [
+ ("c", "float64"),
+ ]
+ ),
+ )
+ tbl.data = data
+
+
+# class TestDatasetShape:
+# @pytest.mark.xfail(raises=ValueError)
+# @pytest.mark.parametrize(
+# ("shape", "data_shape"),
+# [
+# ((0,), (100,)),
+# ((1,), (100,)),
+# ((99,), (100,)),
+# ((1, 1), (100,)),
+# ((100, None), (100,)),
+# ((None, None), (100,)),
+# ((None, 100), (100,)),
+# ],
+# )
+# def test_unmatched_shape_data(self, shape, data_shape):
+# ds = Dataset(dtype="float64", shape=shape)
+
+# data = np.random.rand(*data_shape)
+# ds.data = data
+
+# @pytest.mark.parametrize(
+# ("shape", "data_shape"),
+# [
+# ((None,), (0,)),
+# ((None,), (1,)),
+# ((None,), (100,)),
+# ((None, 0), (0, 0)),
+# ((None, 1), (1, 1)),
+# ((None, None), (1, 1)),
+# ((None, None), (10, 100)),
+# ((None, None, 1), (1, 1, 1)),
+# ],
+# )
+# def test_flexible_shape(self, shape, data_shape):
+# ds = Dataset(dtype="float64", shape=shape)
+
+# data = np.random.rand(*data_shape)
+# ds.data = data
+
+# assert ds.shape == ds.data.shape
+
+# def test_shape_mutation(self):
+# ds = Dataset(dtype="float64", shape=(1,))
+
+# ds.shape = (100,)
+
+# data = np.random.rand(100)
+# ds.data = data
+
+
+# class TestCastDataset:
+# @pytest.fixture
+# def adapter(self):
+# return TypeAdapter(CastDataset)
+
+# @pytest.mark.parametrize(
+# ("data", "dtype", "shape"),
+# [
+# (np.random.rand(100), "float64", (100,)),
+# (np.random.rand(10).astype("str"), "str", (10,)),
+# (np.random.rand(1, 10, 100).astype("bytes"), "bytes", (1, 10, 100)),
+# ],
+# )
+# def test_cast(self, adapter, data, shape, dtype):
+# ds = adapter.validate_python(data)
+
+# assert ds.shape == shape and ds.dtype == dtype
+
+
+# class TestConstrainedDataset:
+# @pytest.mark.parametrize(
+# ("cds", "data"),
+# [
+# (condataset(dtype_constraint="float64"), np.random.rand(10)),
+# (condataset(dtype_constraint="str"), np.random.rand(10).astype(str)),
+# (
+# condataset(dtype_constraint=("float16", "float32", "float64")),
+# np.random.rand(10),
+# ),
+# (
+# condataset(dtype_constraint=("float16", "float32", "float64")),
+# np.random.rand(10).astype("float16"),
+# ),
+# (
+# condataset(dtype_constraint=("float16", "float32", "float64")),
+# np.random.rand(10).astype("float32"),
+# ),
+# ],
+# )
+# def test_constrained_dataset_dtype(self, cds, data):
+# adapter = TypeAdapter(cds)
+
+# adapter.validate_python(data)
+
+# @pytest.mark.xfail(raises=ValueError)
+# @pytest.mark.parametrize(
+# ("cds", "data"),
+# [
+# (condataset(dtype_constraint="float64"), np.random.rand(10).astype(str)),
+# (condataset(dtype_constraint="str"), np.random.rand(10)),
+# (
+# condataset(dtype_constraint=("float16", "float32", "float64")),
+# np.random.rand(10).astype(str),
+# ),
+# ],
+# )
+# def test_violate_dtype_constraint(self, cds, data):
+# adapter = TypeAdapter(cds)
+
+# adapter.validate_python(data)
+
+# @pytest.mark.parametrize(
+# ("cds", "data"),
+# [
+# (condataset(min_dim=1, max_dim=1), np.random.rand(10)),
+# (condataset(min_dim=0, max_dim=1), np.random.rand(10)),
+# (condataset(max_dim=2), np.random.rand(10)),
+# (condataset(max_dim=3), np.random.rand(10, 10, 10)),
+# (condataset(min_dim=2), np.random.rand(10, 10)),
+# (condataset(min_dim=2), np.random.rand(10, 10, 10, 10, 10)),
+# (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10, 10, 10)),
+# (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10, 10)),
+# (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10)),
+# ],
+# )
+# def test_constrained_dataset_dimension(self, cds, data):
+# adapter = TypeAdapter(cds)
+
+# adapter.validate_python(data)
+
+# @pytest.mark.xfail(raises=ValueError)
+# @pytest.mark.parametrize(
+# ("cds", "data"),
+# [
+# (condataset(min_dim=1, max_dim=1), np.random.rand(10, 10)),
+# (condataset(min_dim=2, max_dim=3), np.random.rand(10)),
+# (condataset(min_dim=2, max_dim=3), np.random.rand(10, 10, 10, 10)),
+# ],
+# )
+# def test_violate_dimension_constraint(self, cds, data):
+# adapter = TypeAdapter(cds)
+
+# adapter.validate_python(data)
+
+# @pytest.mark.parametrize(
+# ("cds", "data"),
+# [
+# (condataset(shape_constraint=(None,)), np.random.rand(10)),
+# (condataset(shape_constraint=(10,)), np.random.rand(10)),
+# (condataset(shape_constraint=(None, None)), np.random.rand(1, 2)),
+# (condataset(shape_constraint=(1, None)), np.random.rand(1, 2)),
+# (condataset(shape_constraint=(1, 2)), np.random.rand(1, 2)),
+# (condataset(shape_constraint=(1, None, 3)), np.random.rand(1, 10, 3)),
+# ],
+# )
+# def test_constrained_dataset_shape(self, cds, data):
+# adapter = TypeAdapter(cds)
+
+# adapter.validate_python(data)
+
+# @pytest.mark.xfail(raises=ValueError)
+# @pytest.mark.parametrize(
+# ("cds", "data"),
+# [
+# (condataset(shape_constraint=(1,)), np.random.rand(10)),
+# (condataset(shape_constraint=(None,)), np.random.rand(10, 10)),
+# (condataset(shape_constraint=(None, 1)), np.random.rand(10, 10)),
+# (condataset(shape_constraint=(None, 1)), np.random.rand(1, 10)),
+# ],
+# )
+# def test_violate_shape_constraint(self, cds, data):
+# adapter = TypeAdapter(cds)
+
+# adapter.validate_python(data)
diff --git a/tests/test_typeadapt.py b/tests/test_typeadapt.py
deleted file mode 100644
index 609f09c..0000000
--- a/tests/test_typeadapt.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2024-2025 Open Quantum Design
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-
-# http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# %%
-import pathlib
-
-import numpy as np
-
-from oqd_dataschema.base import Dataset, GroupBase
-from oqd_dataschema.datastore import Datastore
-from oqd_dataschema.groups import (
- SinaraRawDataGroup,
-)
-
-
-# %%
-def test_adapt():
- class TestNewGroup(GroupBase):
- """ """
-
- array: Dataset
-
- filepath = pathlib.Path("test.h5")
-
- data = np.ones([10, 10]).astype("int64")
- group1 = TestNewGroup(array=Dataset(data=data))
-
- data = np.ones([10, 10]).astype("int32")
- group2 = SinaraRawDataGroup(camera_images=Dataset(data=data))
-
- datastore = Datastore(
- groups={
- "group1": group1,
- "group2": group2,
- }
- )
- datastore.model_dump_hdf5(filepath, mode="w")
-
- Datastore.model_validate_hdf5(filepath)
diff --git a/uv.lock b/uv.lock
index 22accdc..e015315 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,7 +2,8 @@ version = 1
requires-python = ">=3.10"
resolution-markers = [
"python_full_version >= '3.14'",
- "python_full_version >= '3.11' and python_full_version < '3.14'",
+ "python_full_version >= '3.12' and python_full_version < '3.14'",
+ "python_full_version == '3.11.*'",
"python_full_version < '3.11'",
]
@@ -166,15 +167,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/04/eb/f4151e0c7377a6e08a38108609ba5cede57986802757848688aeedd1b9e8/beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a", size = 105113 },
]
-[[package]]
-name = "bidict"
-version = "0.23.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/9a/6e/026678aa5a830e07cd9498a05d3e7e650a4f56a42f267a53d22bcda1bdc9/bidict-0.23.1.tar.gz", hash = "sha256:03069d763bc387bbd20e7d49914e75fc4132a41937fa3405417e1a5a2d006d71", size = 29093 }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/99/37/e8730c3587a65eb5645d4aba2d27aae48e8003614d6aaf15dda67f702f1f/bidict-0.23.1-py3-none-any.whl", hash = "sha256:5dae8d4d79b552a71cbabc7deb25dfe8ce710b17ff41711e13010ead2abfc3e5", size = 32764 },
-]
-
[[package]]
name = "bleach"
version = "6.2.0"
@@ -525,33 +517,33 @@ wheels = [
[[package]]
name = "h5py"
-version = "3.13.0"
+version = "3.14.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/03/2e/a22d6a8bfa6f8be33e7febd985680fba531562795f0a9077ed1eb047bfb0/h5py-3.13.0.tar.gz", hash = "sha256:1870e46518720023da85d0895a1960ff2ce398c5671eac3b1a41ec696b7105c3", size = 414876 }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/02/8a/bc76588ff1a254e939ce48f30655a8f79fac614ca8bd1eda1a79fa276671/h5py-3.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5540daee2b236d9569c950b417f13fd112d51d78b4c43012de05774908dff3f5", size = 3413286 },
- { url = "https://files.pythonhosted.org/packages/19/bd/9f249ecc6c517b2796330b0aab7d2351a108fdbd00d4bb847c0877b5533e/h5py-3.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:10894c55d46df502d82a7a4ed38f9c3fdbcb93efb42e25d275193e093071fade", size = 2915673 },
- { url = "https://files.pythonhosted.org/packages/72/71/0dd079208d7d3c3988cebc0776c2de58b4d51d8eeb6eab871330133dfee6/h5py-3.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb267ce4b83f9c42560e9ff4d30f60f7ae492eacf9c7ede849edf8c1b860e16b", size = 4283822 },
- { url = "https://files.pythonhosted.org/packages/d8/fa/0b6a59a1043c53d5d287effa02303bd248905ee82b25143c7caad8b340ad/h5py-3.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2cf6a231a07c14acd504a945a6e9ec115e0007f675bde5e0de30a4dc8d86a31", size = 4548100 },
- { url = "https://files.pythonhosted.org/packages/12/42/ad555a7ff7836c943fe97009405566dc77bcd2a17816227c10bd067a3ee1/h5py-3.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:851ae3a8563d87a5a0dc49c2e2529c75b8842582ccaefbf84297d2cfceeacd61", size = 2950547 },
- { url = "https://files.pythonhosted.org/packages/86/2b/50b15fdefb577d073b49699e6ea6a0a77a3a1016c2b67e2149fc50124a10/h5py-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8a8e38ef4ceb969f832cc230c0cf808c613cc47e31e768fd7b1106c55afa1cb8", size = 3422922 },
- { url = "https://files.pythonhosted.org/packages/94/59/36d87a559cab9c59b59088d52e86008d27a9602ce3afc9d3b51823014bf3/h5py-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f35640e81b03c02a88b8bf99fb6a9d3023cc52f7c627694db2f379e0028f2868", size = 2921619 },
- { url = "https://files.pythonhosted.org/packages/37/ef/6f80b19682c0b0835bbee7b253bec9c16af9004f2fd6427b1dd858100273/h5py-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:337af114616f3656da0c83b68fcf53ecd9ce9989a700b0883a6e7c483c3235d4", size = 4259366 },
- { url = "https://files.pythonhosted.org/packages/03/71/c99f662d4832c8835453cf3476f95daa28372023bda4aa1fca9e97c24f09/h5py-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:782ff0ac39f455f21fd1c8ebc007328f65f43d56718a89327eec76677ebf238a", size = 4509058 },
- { url = "https://files.pythonhosted.org/packages/56/89/e3ff23e07131ff73a72a349be9639e4de84e163af89c1c218b939459a98a/h5py-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:22ffe2a25770a2d67213a1b94f58006c14dce06933a42d2aaa0318c5868d1508", size = 2966428 },
- { url = "https://files.pythonhosted.org/packages/d8/20/438f6366ba4ded80eadb38f8927f5e2cd6d2e087179552f20ae3dbcd5d5b/h5py-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:477c58307b6b9a2509c59c57811afb9f598aedede24a67da808262dfa0ee37b4", size = 3384442 },
- { url = "https://files.pythonhosted.org/packages/10/13/cc1cb7231399617d9951233eb12fddd396ff5d4f7f057ee5d2b1ca0ee7e7/h5py-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:57c4c74f627c616f02b7aec608a8c706fe08cb5b0ba7c08555a4eb1dde20805a", size = 2917567 },
- { url = "https://files.pythonhosted.org/packages/9e/d9/aed99e1c858dc698489f916eeb7c07513bc864885d28ab3689d572ba0ea0/h5py-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:357e6dc20b101a805ccfd0024731fbaf6e8718c18c09baf3b5e4e9d198d13fca", size = 4669544 },
- { url = "https://files.pythonhosted.org/packages/a7/da/3c137006ff5f0433f0fb076b1ebe4a7bf7b5ee1e8811b5486af98b500dd5/h5py-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6f13f9b5ce549448c01e4dfe08ea8d1772e6078799af2c1c8d09e941230a90d", size = 4932139 },
- { url = "https://files.pythonhosted.org/packages/25/61/d897952629cae131c19d4c41b2521e7dd6382f2d7177c87615c2e6dced1a/h5py-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:21daf38171753899b5905f3d82c99b0b1ec2cbbe282a037cad431feb620e62ec", size = 2954179 },
- { url = "https://files.pythonhosted.org/packages/60/43/f276f27921919a9144074320ce4ca40882fc67b3cfee81c3f5c7df083e97/h5py-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e520ec76de00943dd017c8ea3f354fa1d2f542eac994811943a8faedf2a7d5cb", size = 3358040 },
- { url = "https://files.pythonhosted.org/packages/1b/86/ad4a4cf781b08d4572be8bbdd8f108bb97b266a14835c640dc43dafc0729/h5py-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e79d8368cd9295045956bfb436656bea3f915beaa11d342e9f79f129f5178763", size = 2892766 },
- { url = "https://files.pythonhosted.org/packages/69/84/4c6367d6b58deaf0fa84999ec819e7578eee96cea6cbd613640d0625ed5e/h5py-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56dd172d862e850823c4af02dc4ddbc308f042b85472ffdaca67f1598dff4a57", size = 4664255 },
- { url = "https://files.pythonhosted.org/packages/fd/41/bc2df86b72965775f6d621e0ee269a5f3ac23e8f870abf519de9c7d93b4d/h5py-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be949b46b7388074c5acae017fbbe3e5ba303fd9daaa52157fdfef30bbdacadd", size = 4927580 },
- { url = "https://files.pythonhosted.org/packages/97/34/165b87ea55184770a0c1fcdb7e017199974ad2e271451fd045cfe35f3add/h5py-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:4f97ecde7ac6513b21cd95efdfc38dc6d19f96f6ca6f2a30550e94e551458e0a", size = 2940890 },
+sdist = { url = "https://files.pythonhosted.org/packages/5d/57/dfb3c5c3f1bf5f5ef2e59a22dec4ff1f3d7408b55bfcefcfb0ea69ef21c6/h5py-3.14.0.tar.gz", hash = "sha256:2372116b2e0d5d3e5e705b7f663f7c8d96fa79a4052d250484ef91d24d6a08f4", size = 424323 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/52/89/06cbb421e01dea2e338b3154326523c05d9698f89a01f9d9b65e1ec3fb18/h5py-3.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:24df6b2622f426857bda88683b16630014588a0e4155cba44e872eb011c4eaed", size = 3332522 },
+ { url = "https://files.pythonhosted.org/packages/c3/e7/6c860b002329e408348735bfd0459e7b12f712c83d357abeef3ef404eaa9/h5py-3.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ff2389961ee5872de697054dd5a033b04284afc3fb52dc51d94561ece2c10c6", size = 2831051 },
+ { url = "https://files.pythonhosted.org/packages/fa/cd/3dd38cdb7cc9266dc4d85f27f0261680cb62f553f1523167ad7454e32b11/h5py-3.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:016e89d3be4c44f8d5e115fab60548e518ecd9efe9fa5c5324505a90773e6f03", size = 4324677 },
+ { url = "https://files.pythonhosted.org/packages/b1/45/e1a754dc7cd465ba35e438e28557119221ac89b20aaebef48282654e3dc7/h5py-3.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1223b902ef0b5d90bcc8a4778218d6d6cd0f5561861611eda59fa6c52b922f4d", size = 4557272 },
+ { url = "https://files.pythonhosted.org/packages/5c/06/f9506c1531645829d302c420851b78bb717af808dde11212c113585fae42/h5py-3.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:852b81f71df4bb9e27d407b43071d1da330d6a7094a588efa50ef02553fa7ce4", size = 2866734 },
+ { url = "https://files.pythonhosted.org/packages/61/1b/ad24a8ce846cf0519695c10491e99969d9d203b9632c4fcd5004b1641c2e/h5py-3.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f30dbc58f2a0efeec6c8836c97f6c94afd769023f44e2bb0ed7b17a16ec46088", size = 3352382 },
+ { url = "https://files.pythonhosted.org/packages/36/5b/a066e459ca48b47cc73a5c668e9924d9619da9e3c500d9fb9c29c03858ec/h5py-3.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:543877d7f3d8f8a9828ed5df6a0b78ca3d8846244b9702e99ed0d53610b583a8", size = 2852492 },
+ { url = "https://files.pythonhosted.org/packages/08/0c/5e6aaf221557314bc15ba0e0da92e40b24af97ab162076c8ae009320a42b/h5py-3.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c497600c0496548810047257e36360ff551df8b59156d3a4181072eed47d8ad", size = 4298002 },
+ { url = "https://files.pythonhosted.org/packages/21/d4/d461649cafd5137088fb7f8e78fdc6621bb0c4ff2c090a389f68e8edc136/h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:723a40ee6505bd354bfd26385f2dae7bbfa87655f4e61bab175a49d72ebfc06b", size = 4516618 },
+ { url = "https://files.pythonhosted.org/packages/db/0c/6c3f879a0f8e891625817637fad902da6e764e36919ed091dc77529004ac/h5py-3.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:d2744b520440a996f2dae97f901caa8a953afc055db4673a993f2d87d7f38713", size = 2874888 },
+ { url = "https://files.pythonhosted.org/packages/3e/77/8f651053c1843391e38a189ccf50df7e261ef8cd8bfd8baba0cbe694f7c3/h5py-3.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e0045115d83272090b0717c555a31398c2c089b87d212ceba800d3dc5d952e23", size = 3312740 },
+ { url = "https://files.pythonhosted.org/packages/ff/10/20436a6cf419b31124e59fefc78d74cb061ccb22213226a583928a65d715/h5py-3.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6da62509b7e1d71a7d110478aa25d245dd32c8d9a1daee9d2a42dba8717b047a", size = 2829207 },
+ { url = "https://files.pythonhosted.org/packages/3f/19/c8bfe8543bfdd7ccfafd46d8cfd96fce53d6c33e9c7921f375530ee1d39a/h5py-3.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:554ef0ced3571366d4d383427c00c966c360e178b5fb5ee5bb31a435c424db0c", size = 4708455 },
+ { url = "https://files.pythonhosted.org/packages/86/f9/f00de11c82c88bfc1ef22633557bfba9e271e0cb3189ad704183fc4a2644/h5py-3.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cbd41f4e3761f150aa5b662df991868ca533872c95467216f2bec5fcad84882", size = 4929422 },
+ { url = "https://files.pythonhosted.org/packages/7a/6d/6426d5d456f593c94b96fa942a9b3988ce4d65ebaf57d7273e452a7222e8/h5py-3.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:bf4897d67e613ecf5bdfbdab39a1158a64df105827da70ea1d90243d796d367f", size = 2862845 },
+ { url = "https://files.pythonhosted.org/packages/6c/c2/7efe82d09ca10afd77cd7c286e42342d520c049a8c43650194928bcc635c/h5py-3.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:aa4b7bbce683379b7bf80aaba68e17e23396100336a8d500206520052be2f812", size = 3289245 },
+ { url = "https://files.pythonhosted.org/packages/4f/31/f570fab1239b0d9441024b92b6ad03bb414ffa69101a985e4c83d37608bd/h5py-3.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ef9603a501a04fcd0ba28dd8f0995303d26a77a980a1f9474b3417543d4c6174", size = 2807335 },
+ { url = "https://files.pythonhosted.org/packages/0d/ce/3a21d87896bc7e3e9255e0ad5583ae31ae9e6b4b00e0bcb2a67e2b6acdbc/h5py-3.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8cbaf6910fa3983c46172666b0b8da7b7bd90d764399ca983236f2400436eeb", size = 4700675 },
+ { url = "https://files.pythonhosted.org/packages/e7/ec/86f59025306dcc6deee5fda54d980d077075b8d9889aac80f158bd585f1b/h5py-3.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d90e6445ab7c146d7f7981b11895d70bc1dd91278a4f9f9028bc0c95e4a53f13", size = 4921632 },
+ { url = "https://files.pythonhosted.org/packages/3f/6d/0084ed0b78d4fd3e7530c32491f2884140d9b06365dac8a08de726421d4a/h5py-3.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:ae18e3de237a7a830adb76aaa68ad438d85fe6e19e0d99944a3ce46b772c69b3", size = 2852929 },
]
[[package]]
@@ -665,7 +657,8 @@ version = "9.5.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14'",
- "python_full_version >= '3.11' and python_full_version < '3.14'",
+ "python_full_version >= '3.12' and python_full_version < '3.14'",
+ "python_full_version == '3.11.*'",
]
dependencies = [
{ name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" },
@@ -1436,8 +1429,8 @@ name = "oqd-dataschema"
version = "0.1.0"
source = { editable = "." }
dependencies = [
- { name = "bidict" },
{ name = "h5py" },
+ { name = "pandas" },
{ name = "pydantic" },
]
@@ -1463,12 +1456,12 @@ dev = [
[package.metadata]
requires-dist = [
- { name = "bidict", specifier = ">=0.23.1" },
- { name = "h5py", specifier = ">=3.13.0" },
+ { name = "h5py", specifier = ">=3.14.0" },
{ name = "mdx-truly-sane-lists", marker = "extra == 'docs'" },
{ name = "mkdocs-material", marker = "extra == 'docs'" },
{ name = "mkdocstrings", marker = "extra == 'docs'" },
{ name = "mkdocstrings-python", marker = "extra == 'docs'" },
+ { name = "pandas", specifier = ">=2.3.3" },
{ name = "pydantic", specifier = ">=2.10.6" },
{ name = "pymdown-extensions", marker = "extra == 'docs'" },
{ name = "pytest", marker = "extra == 'tests'" },
@@ -1509,6 +1502,67 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/90/96/04b8e52da071d28f5e21a805b19cb9390aa17a47462ac87f5e2696b9566d/paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591", size = 13746 },
]
+[[package]]
+name = "pandas"
+version = "2.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+ { name = "python-dateutil" },
+ { name = "pytz" },
+ { name = "tzdata" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3d/f7/f425a00df4fcc22b292c6895c6831c0c8ae1d9fac1e024d16f98a9ce8749/pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c", size = 11555763 },
+ { url = "https://files.pythonhosted.org/packages/13/4f/66d99628ff8ce7857aca52fed8f0066ce209f96be2fede6cef9f84e8d04f/pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a", size = 10801217 },
+ { url = "https://files.pythonhosted.org/packages/1d/03/3fc4a529a7710f890a239cc496fc6d50ad4a0995657dccc1d64695adb9f4/pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1", size = 12148791 },
+ { url = "https://files.pythonhosted.org/packages/40/a8/4dac1f8f8235e5d25b9955d02ff6f29396191d4e665d71122c3722ca83c5/pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838", size = 12769373 },
+ { url = "https://files.pythonhosted.org/packages/df/91/82cc5169b6b25440a7fc0ef3a694582418d875c8e3ebf796a6d6470aa578/pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250", size = 13200444 },
+ { url = "https://files.pythonhosted.org/packages/10/ae/89b3283800ab58f7af2952704078555fa60c807fff764395bb57ea0b0dbd/pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4", size = 13858459 },
+ { url = "https://files.pythonhosted.org/packages/85/72/530900610650f54a35a19476eca5104f38555afccda1aa11a92ee14cb21d/pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826", size = 11346086 },
+ { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790 },
+ { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831 },
+ { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267 },
+ { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281 },
+ { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453 },
+ { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361 },
+ { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702 },
+ { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846 },
+ { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618 },
+ { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212 },
+ { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693 },
+ { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002 },
+ { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971 },
+ { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722 },
+ { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671 },
+ { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807 },
+ { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872 },
+ { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371 },
+ { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333 },
+ { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120 },
+ { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991 },
+ { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227 },
+ { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056 },
+ { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189 },
+ { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912 },
+ { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160 },
+ { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233 },
+ { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635 },
+ { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079 },
+ { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049 },
+ { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638 },
+ { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834 },
+ { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925 },
+ { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071 },
+ { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504 },
+ { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702 },
+ { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535 },
+ { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582 },
+ { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963 },
+ { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175 },
+]
+
[[package]]
name = "pandocfilters"
version = "1.5.1"
@@ -1796,6 +1850,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163 },
]
+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 },
+]
+
[[package]]
name = "pywin32"
version = "311"
@@ -2366,6 +2429,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 },
]
+[[package]]
+name = "tzdata"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 },
+]
+
[[package]]
name = "uri-template"
version = "1.3.0"