diff --git a/.gitattributes b/.gitattributes index 235b1a2..a76e4dc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -postprocessing/** linguist-vendored \ No newline at end of file +postprocessing/** linguist-vendored diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index cda0ced..7de49f3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -26,4 +26,4 @@ - Subsystem: -* **Other information** (e.g. detailed explanation, stacktraces, related issues, suggestions how to fix, links for us to have context, eg. stackoverflow, gitter, etc) \ No newline at end of file +* **Other information** (e.g. detailed explanation, stacktraces, related issues, suggestions how to fix, links for us to have context, eg. stackoverflow, gitter, etc) diff --git a/.github/workflows/check_copyright.yml b/.github/workflows/check_copyright.yml index 9ed4c48..fdc46fc 100644 --- a/.github/workflows/check_copyright.yml +++ b/.github/workflows/check_copyright.yml @@ -10,6 +10,6 @@ jobs: - name: Check license & copyright headers uses: viperproject/check-license-header@v2 with: - path: + path: config: .github/workflows/check_copyright_config.json - # strict: true \ No newline at end of file + # strict: true diff --git a/.github/workflows/check_mkdocs_build.yml b/.github/workflows/check_mkdocs_build.yml index 116f6d4..2f9c08b 100644 --- a/.github/workflows/check_mkdocs_build.yml +++ b/.github/workflows/check_mkdocs_build.yml @@ -21,4 +21,4 @@ jobs: uses: astral-sh/setup-uv@v4 # - run: cp -r examples/ docs/examples/ - run: uv pip install .[docs] --system - - run: mkdocs build \ No newline at end of file + - run: mkdocs build diff --git a/.github/workflows/copyright.txt b/.github/workflows/copyright.txt index 67eb334..084ae79 100644 --- a/.github/workflows/copyright.txt +++ b/.github/workflows/copyright.txt @@ -10,4 +10,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/.gitignore b/.gitignore index c6e81a6..fb5db85 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,5 @@ cython_debug/ *.h5 *.code-workspace .pre-commit-config.yaml +_scripts +.vscode diff --git a/README.md b/README.md index 9dc494c..ab3b8b2 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ The design goals are to have: - Minimizes *a priori* knowledge that is needed of the internal heirarchical structure, reducing friction for users to load data. - Transparently return both raw and processed data, where the levels of post-processing can be selected by the user. -To install, +To install, ```bash pip install git+https://github.com/OpenQuantumDesign/oqd-dataschema.git ``` diff --git a/docs/api.md b/docs/api.md deleted file mode 100644 index 1621f49..0000000 --- a/docs/api.md +++ /dev/null @@ -1,33 +0,0 @@ -## Datastore - - -::: oqd_dataschema.datastore - options: - heading_level: 3 - members: [ - "Datastore", - ] - - -## Base HDF5 Objects - - -::: oqd_dataschema.base - options: - heading_level: 3 - members: [ - "Group", - "Dataset", - ] - -## Specified Groups - - -::: oqd_dataschema.groups - options: - heading_level: 3 - members: [ - "SinaraRawDataGroup", - "MeasurementOutcomesDataGroup", - "ExpectationValueDataGroup", - ] \ No newline at end of file diff --git a/docs/api/base.md b/docs/api/base.md new file mode 100644 index 0000000..33fba53 --- /dev/null +++ b/docs/api/base.md @@ -0,0 +1,21 @@ +## Attributes Types + + +::: oqd_dataschema.base + options: + heading_level: 3 + members: [ + "AttrKey", + "Attrs", + ] + +## Data Types + + +::: oqd_dataschema.base + options: + heading_level: 3 + members: [ + "DTypes", + "DTypeNames", + ] diff --git a/docs/api/datastore.md b/docs/api/datastore.md new file mode 100644 index 0000000..c797a9c --- /dev/null +++ b/docs/api/datastore.md @@ -0,0 +1,7 @@ + +::: oqd_dataschema.datastore + options: + heading_level: 3 + members: [ + "Datastore", + ] diff --git a/docs/api/group.md b/docs/api/group.md new file mode 100644 index 0000000..1b8ff22 --- /dev/null +++ b/docs/api/group.md @@ -0,0 +1,8 @@ + +::: oqd_dataschema.group + options: + heading_level: 3 + members: [ + "GroupBase", + "GroupRegistry", + ] diff --git a/docs/api/groupfield.md b/docs/api/groupfield.md new file mode 100644 index 0000000..af1319f --- /dev/null +++ b/docs/api/groupfield.md @@ -0,0 +1,53 @@ + +::: oqd_dataschema.base + options: + filters: [] + heading_level: 3 + members: [ + "GroupField", + ] + +## Dataset + + +::: oqd_dataschema.dataset + options: + heading_level: 3 + members: [ + "Dataset", + "CastDataset", + ] + +## Table + + +::: oqd_dataschema.table + options: + heading_level: 3 + members: [ + "Table", + "CastTable", + ] + +## Folder + + +::: oqd_dataschema.folder + options: + heading_level: 3 + members: [ + "Folder", + "CastFolder", + ] + +## Constrained Group Fields + + +::: oqd_dataschema.constrained + options: + heading_level: 3 + members: [ + "condataset", + "contable", + "confolder", + ] diff --git a/docs/api/utils.md b/docs/api/utils.md new file mode 100644 index 0000000..cff7d91 --- /dev/null +++ b/docs/api/utils.md @@ -0,0 +1,10 @@ +## Dataset + + +::: oqd_dataschema.utils + options: + heading_level: 3 + members: [ + "dict_to_structured", + "unstructured_to_structured", + ] diff --git a/docs/explanation.md b/docs/explanation.md index e69de29..7af1180 100644 --- a/docs/explanation.md +++ b/docs/explanation.md @@ -0,0 +1,22 @@ +## Datastore + +A [Datastore][oqd_dataschema.datastore.Datastore] represents a HDF5 file of a particular hierarchical structure. + +### Hierarchy + +``` +/ +├── group1/ +│ └── dataset1 +├── group2/ +│ ├── dataset2 +│ ├── table1 +│ └── folder1 +└── group3/ + ├── table2 + └── dataset_dict1/ + ├── dataset5 + └── dataset6 +``` + +The top level of [Datastore][oqd_dataschema.datastore.Datastore] contains multiple [Groups](api/group.md) diff --git a/docs/index.md b/docs/index.md index 1355f50..9d7bf57 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -# +#

Logo diff --git a/docs/stylesheets/admonition_template.css b/docs/stylesheets/admonition_template.css index f32ebeb..94fee5c 100644 --- a/docs/stylesheets/admonition_template.css +++ b/docs/stylesheets/admonition_template.css @@ -14,4 +14,4 @@ background-color: #FFFFFF; -webkit-mask-image: var(--md-admonition-icon--template); mask-image: var(--md-admonition-icon--template); - } \ No newline at end of file + } diff --git a/docs/stylesheets/admonitions.css b/docs/stylesheets/admonitions.css index ff86542..eb1babd 100644 --- a/docs/stylesheets/admonitions.css +++ b/docs/stylesheets/admonitions.css @@ -130,6 +130,3 @@ -webkit-mask-image: var(--md-admonition-icon--acknowledgement); mask-image: var(--md-admonition-icon--acknowledgement); } - - - diff --git a/docs/stylesheets/brand.css b/docs/stylesheets/brand.css index d2fdeeb..34d6112 100644 --- a/docs/stylesheets/brand.css +++ b/docs/stylesheets/brand.css @@ -36,12 +36,12 @@ h1, h2, h3, h4, h5, h6, /* Apply Raleway to all navigation and sidebar elements */ -.md-nav, -.md-nav__title, -.md-nav__link, -.md-header, -.md-tabs, -.md-sidebar, +.md-nav, +.md-nav__title, +.md-nav__link, +.md-header, +.md-tabs, +.md-sidebar, .md-sidebar__inner, .md-nav__item, .md-footer, @@ -79,7 +79,7 @@ h1, h2, h3, h4, h5, h6, /* Light mode nav/ToC font color */ -[data-md-color-scheme="default"] .md-nav, +[data-md-color-scheme="default"] .md-nav, [data-md-color-scheme="default"] .md-nav__link, [data-md-color-scheme="default"] .md-header, [data-md-color-scheme="default"] .md-tabs { @@ -88,7 +88,7 @@ h1, h2, h3, h4, h5, h6, } /* Dark mode nav/ToC font color */ -[data-md-color-scheme="slate"] .md-nav, +[data-md-color-scheme="slate"] .md-nav, [data-md-color-scheme="slate"] .md-nav__link, [data-md-color-scheme="slate"] .md-header, [data-md-color-scheme="slate"] .md-tabs { @@ -113,4 +113,4 @@ h1, h2, h3, h4, h5, h6, .md-header .md-tabs__link:hover { color: #ffffff !important; text-decoration: underline; -} \ No newline at end of file +} diff --git a/docs/tutorial.md b/docs/tutorial.md index daa4bfa..c382434 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -1,108 +1,73 @@ - # Tutorial -```python -import pathlib - -import numpy as np -from rich.pretty import pprint - -from oqd_dataschema.base import Dataset -from oqd_dataschema.datastore import Datastore -from oqd_dataschema.groups import ( - ExpectationValueDataGroup, - MeasurementOutcomesDataGroup, - SinaraRawDataGroup, -) -``` +## Group Definition ```python -raw = SinaraRawDataGroup( - camera_images=Dataset(shape=(3, 2, 2), dtype="float32"), - attrs={"date": "2025-03-26", "version": 0.1}, -) -pprint(raw) -``` - +from oqd_dataschema import GroupBase, Attrs - -```python -raw.camera_images.data = np.random.uniform(size=(3, 2, 2)).astype("float32") -pprint(raw) +class CustomGroup(GroupBase): + attrs: Attrs = Field( + default_factory=lambda: dict( + timestamp=str(datetime.datetime.now(datetime.timezone.utc)) + ) + ) + t: Dataset + x: Dataset ``` - +Defined groups are automatically registered into the [`GroupRegistry`][oqd_dataschema.group.GroupRegistry]. ```python -raw.camera_images.data = np.random.uniform(size=(3, 2, 2)).astype("float32") -``` - - +from oqd_dataschema import GroupRegistry -```python -data = Datastore(groups={"raw": raw}) -pprint(data) +GroupRegistry.groups ``` - - +## Initialize Group ```python -def process_raw(raw: SinaraRawDataGroup) -> MeasurementOutcomesDataGroup: - processed = MeasurementOutcomesDataGroup( - outcomes=Dataset( - data=np.round(raw.camera_images.data.mean(axis=(1, 2))), - ) - ) - return processed +t = np.linspace(0, 1, 101).astype(np.float32) +x = np.sin(t).astype(np.complex64) +group = CustomGroup( + t=Dataset(dtype="float32", shape=(101,)), x=Dataset(dtype="complex64", shape=(101,)) +) -processed = process_raw(data.groups["raw"]) -pprint(processed) +group.t.data = t +group.x.data = x ``` - - +## Initialize Datastore ```python -data.groups.update(processed=processed) -pprint(data) -``` - +from oqd_datastore import Datastore +datastore = Datastore(groups={"g1": group}) +``` +## Data pipeline ```python -def process_outcomes( - measurements: MeasurementOutcomesDataGroup, -) -> ExpectationValueDataGroup: - expval = ExpectationValueDataGroup( - expectation_value=Dataset( - shape=(), - dtype="float32", - data=measurements.outcomes.data.mean(), - attrs={"date": "20", "input": 10}, - ) - ) - return expval +def process(datastore) -> Datastore: + _g = datastore.get("g1") + g2 = CustomGroup(t=Dataset(data=_g.t.data), x=Dataset(data=_g.x.data + 1j)) + g2.attrs["_gen_by_pipe"] = "process" -expval = process_outcomes(processed) -data.groups.update(expval=process_outcomes(data.groups["processed"])) + datastore.add(g2=g2) -pprint(expval) -``` +datastore.pipe(process) +``` +## Save Datastore ```python -filepath = pathlib.Path("test.h5") -data.model_dump_hdf5(filepath) +datastore.model_dump_hdf5(pathlib.Path("datastore.h5"), mode="w") ``` - +## Load Datastore ```python -data_reload = Datastore.model_validate_hdf5(filepath) -pprint(data_reload) -``` \ No newline at end of file +reloaded_datastore = Datastore.model_validate_hdf5(pathlib.Path("datastore.h5")) +``` diff --git a/docs/tutorials/advanced.md b/docs/tutorials/advanced.md new file mode 100644 index 0000000..8d9cf31 --- /dev/null +++ b/docs/tutorials/advanced.md @@ -0,0 +1,82 @@ +# Tutorial + +## Group Definition + +```python +from oqd_dataschema import GroupBase, Attrs + +class CustomGroup(GroupBase): + attrs: Attrs = Field( + default_factory=lambda: dict( + timestamp=str(datetime.datetime.now(datetime.timezone.utc)) + ) + ) + dset: Dataset + tbl: Table + fld: Folder +``` + +Defined groups are automatically registered into the [`GroupRegistry`][oqd_dataschema.group.GroupRegistry]. + +```python +from oqd_dataschema import GroupRegistry + +GroupRegistry.groups +``` + +## Initialize Group + +```python +from oqd_dataschema import Dataset, Table, Folder, unstructured_to_structured + +dset = Dataset(data=np.linspace(0, 1, 101).astype(np.float32)) +tbl = Table( + columns=[("t", "float32"), ("x", "complex128")], + data=unstructured_to_structured( + np.stack([np.linspace(0, 1, 101), np.sin(np.linspace(0, 1, 101))], -1), + dtype=np.dtype([("t", np.float32), ("x", np.complex128)]), + ), +) +fld = Folder( + document_schema={"t": "float32", "signal": {"x": "complex128", "y": "complex128"}}, + data=unstructured_to_structured( + np.stack( + [ + np.linspace(0, 1, 101), + np.sin(np.linspace(0, 1, 101)), + np.cos(np.linspace(0, 1, 101)), + ], + -1, + ), + dtype=np.dtype( + [ + ("t", np.float32), + ("signal", np.dtype([("x", np.complex128), ("y", np.complex128)])), + ] + ), + ), +) + + +group = CustomGroup(dset=dset, tbl=tbl, fld=fld) +``` + +## Initialize Datastore + +```python +from oqd_datastore import Datastore + +datastore = Datastore(groups={"g1": group}) +``` + +## Save Datastore + +```python +datastore.model_dump_hdf5(pathlib.Path("datastore.h5"), mode="w") +``` + +## Load Datastore + +```python +reloaded_datastore = Datastore.model_validate_hdf5(pathlib.Path("datastore.h5")) +``` diff --git a/docs/tutorials/basic.md b/docs/tutorials/basic.md new file mode 100644 index 0000000..c382434 --- /dev/null +++ b/docs/tutorials/basic.md @@ -0,0 +1,73 @@ +# Tutorial + +## Group Definition + +```python +from oqd_dataschema import GroupBase, Attrs + +class CustomGroup(GroupBase): + attrs: Attrs = Field( + default_factory=lambda: dict( + timestamp=str(datetime.datetime.now(datetime.timezone.utc)) + ) + ) + t: Dataset + x: Dataset +``` + +Defined groups are automatically registered into the [`GroupRegistry`][oqd_dataschema.group.GroupRegistry]. + +```python +from oqd_dataschema import GroupRegistry + +GroupRegistry.groups +``` + +## Initialize Group + +```python +t = np.linspace(0, 1, 101).astype(np.float32) +x = np.sin(t).astype(np.complex64) + +group = CustomGroup( + t=Dataset(dtype="float32", shape=(101,)), x=Dataset(dtype="complex64", shape=(101,)) +) + +group.t.data = t +group.x.data = x +``` + +## Initialize Datastore + +```python +from oqd_datastore import Datastore + +datastore = Datastore(groups={"g1": group}) +``` + +## Data pipeline + +```python +def process(datastore) -> Datastore: + _g = datastore.get("g1") + + g2 = CustomGroup(t=Dataset(data=_g.t.data), x=Dataset(data=_g.x.data + 1j)) + g2.attrs["_gen_by_pipe"] = "process" + + datastore.add(g2=g2) + + +datastore.pipe(process) +``` + +## Save Datastore + +```python +datastore.model_dump_hdf5(pathlib.Path("datastore.h5"), mode="w") +``` + +## Load Datastore + +```python +reloaded_datastore = Datastore.model_validate_hdf5(pathlib.Path("datastore.h5")) +``` diff --git a/examples/custom_group.ipynb b/examples/custom_group.ipynb index 7109567..c632665 100644 --- a/examples/custom_group.ipynb +++ b/examples/custom_group.ipynb @@ -11,7 +11,7 @@ "import numpy as np\n", "from rich.pretty import pprint\n", "\n", - "from oqd_dataschema.base import Dataset, GroupBase, GroupRegistry\n", + "from oqd_dataschema.base import Dataset, GroupBase, GroupRegistry, condataset\n", "from oqd_dataschema.datastore import Datastore\n", "from oqd_dataschema.groups import (\n", " SinaraRawDataGroup,\n", @@ -29,7 +29,7 @@ " Here we define a custom Group, which is automatically added at runtime to the GroupRegistry.\n", " \"\"\"\n", "\n", - " array: Dataset" + " array: condataset(shape_constraint=(None, 10)) # type: ignore" ] }, { @@ -119,7 +119,8 @@ "│ │ │ ),\n", "│ │ │ class_='YourCustomGroup'\n", "│ │ )\n", - "}\n", + "},\n", + "attrs={}\n", ")\n", "\n" ], @@ -164,7 +165,8 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mclass_\u001b[0m=\u001b[32m'YourCustomGroup'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] }, @@ -230,7 +232,8 @@ "│ │ │ ),\n", "│ │ │ class_='YourCustomGroup'\n", "│ │ )\n", - "}\n", + "},\n", + "attrs={}\n", ")\n", "\n" ], @@ -275,7 +278,146 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mclass_\u001b[0m=\u001b[32m'YourCustomGroup'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "parse = Datastore.model_validate_hdf5(filepath)\n", + "pprint(parse)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Dict\n", + "\n", + "from oqd_dataschema.base import CastDataset\n", + "\n", + "\n", + "class A(GroupBase):\n", + " data: Dict[str, CastDataset]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Datastore(\n",
+       "groups={\n",
+       "│   │   'A': A(\n",
+       "│   │   │   attrs={},\n",
+       "│   │   │   data={\n",
+       "│   │   │   │   'x': Dataset(\n",
+       "│   │   │   │   │   dtype='float64',\n",
+       "│   │   │   │   │   shape=(10,),\n",
+       "│   │   │   │   │   data=array([0.90326782, 0.17363226, 0.13827196, 0.8917397 , 0.68175954,\n",
+       "0.47647195, 0.88443397, 0.75703312, 0.74991232, 0.68161151]),\n",
+       "│   │   │   │   │   attrs={'type': 'mytype'}\n",
+       "│   │   │   │   )\n",
+       "│   │   │   },\n",
+       "│   │   │   class_='A'\n",
+       "│   │   )\n",
+       "},\n",
+       "attrs={}\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mDatastore\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mgroups\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'A'\u001b[0m: \u001b[1;35mA\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mdata\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'x'\u001b[0m: \u001b[1;35mDataset\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mdtype\u001b[0m=\u001b[32m'float64'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mshape\u001b[0m=\u001b[1m(\u001b[0m\u001b[1;36m10\u001b[0m,\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mdata\u001b[0m=\u001b[1;35marray\u001b[0m\u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1;36m0.90326782\u001b[0m, \u001b[1;36m0.17363226\u001b[0m, \u001b[1;36m0.13827196\u001b[0m, \u001b[1;36m0.8917397\u001b[0m , \u001b[1;36m0.68175954\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;36m0.47647195\u001b[0m, \u001b[1;36m0.88443397\u001b[0m, \u001b[1;36m0.75703312\u001b[0m, \u001b[1;36m0.74991232\u001b[0m, \u001b[1;36m0.68161151\u001b[0m\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'mytype'\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mclass_\u001b[0m=\u001b[32m'A'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "filepath = pathlib.Path(\"test.h5\")\n", + "\n", + "datastore = Datastore(\n", + " groups={\n", + " \"A\": A(data={\"x\": Dataset(data=np.random.rand(10), attrs={\"type\": \"mytype\"})})\n", + " }\n", + ")\n", + "pprint(datastore)\n", + "datastore.model_dump_hdf5(filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Datastore(\n",
+       "groups={\n",
+       "│   │   'A': A(\n",
+       "│   │   │   attrs={},\n",
+       "│   │   │   data={\n",
+       "│   │   │   │   'x': Dataset(\n",
+       "│   │   │   │   │   dtype='float64',\n",
+       "│   │   │   │   │   shape=(10,),\n",
+       "│   │   │   │   │   data=array([0.90326782, 0.17363226, 0.13827196, 0.8917397 , 0.68175954,\n",
+       "0.47647195, 0.88443397, 0.75703312, 0.74991232, 0.68161151]),\n",
+       "│   │   │   │   │   attrs={'type': 'mytype'}\n",
+       "│   │   │   │   )\n",
+       "│   │   │   },\n",
+       "│   │   │   class_='A'\n",
+       "│   │   )\n",
+       "},\n",
+       "attrs={}\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mDatastore\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mgroups\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'A'\u001b[0m: \u001b[1;35mA\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mdata\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'x'\u001b[0m: \u001b[1;35mDataset\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mdtype\u001b[0m=\u001b[32m'float64'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mshape\u001b[0m=\u001b[1m(\u001b[0m\u001b[1;36m10\u001b[0m,\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mdata\u001b[0m=\u001b[1;35marray\u001b[0m\u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1;36m0.90326782\u001b[0m, \u001b[1;36m0.17363226\u001b[0m, \u001b[1;36m0.13827196\u001b[0m, \u001b[1;36m0.8917397\u001b[0m , \u001b[1;36m0.68175954\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;36m0.47647195\u001b[0m, \u001b[1;36m0.88443397\u001b[0m, \u001b[1;36m0.75703312\u001b[0m, \u001b[1;36m0.74991232\u001b[0m, \u001b[1;36m0.68161151\u001b[0m\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'mytype'\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mclass_\u001b[0m=\u001b[32m'A'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mattrs\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] }, diff --git a/mkdocs.yaml b/mkdocs.yaml index fca9c52..732c693 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -18,9 +18,16 @@ extra: nav: - Get Started: index.md - - Tutorial: tutorial.md - # - Explanation: explanation.md - - API Reference: api.md + - Tutorials: + - Basics: tutorials/basic.md + - Datasets/Tables/Folders: tutorials/advanced.md + - Explanation: explanation.md + - API Reference: + - Base: api/base.md + - Group Field: api/groupfield.md + - Group: api/group.md + - Datastore: api/datastore.md + - Utilities: api/utils.md theme: name: material @@ -59,6 +66,7 @@ theme: - toc.follow plugins: + - search - mkdocstrings: handlers: python: @@ -79,7 +87,7 @@ plugins: separate_signature: false group_by_category: true members_order: "source" - import: + inventories: - https://docs.python.org/3/objects.inv - https://docs.pydantic.dev/latest/objects.inv - https://pandas.pydata.org/docs/objects.inv diff --git a/pyproject.toml b/pyproject.toml index 47a7ccc..6e17436 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,9 +27,9 @@ classifiers = [ ] dependencies = [ - "bidict>=0.23.1", - "h5py>=3.13.0", - "pydantic>=2.10.6", + "h5py>=3.14.0", + "pandas>=2.3.3", + "pydantic>=2.10.6", ] [project.optional-dependencies] @@ -52,12 +52,7 @@ select = ["E4", "E7", "E9", "F", "I"] fixable = ["ALL"] [dependency-groups] -dev = [ - "jupyter>=1.1.1", - "pre-commit>=4.1.0", - "rich>=14.1.0", - "ruff>=0.13.1", -] +dev = ["jupyter>=1.1.1", "pre-commit>=4.1.0", "rich>=14.1.0", "ruff>=0.13.1"] [project.urls] diff --git a/src/oqd_dataschema/__init__.py b/src/oqd_dataschema/__init__.py index 38c732a..ef09b9f 100644 --- a/src/oqd_dataschema/__init__.py +++ b/src/oqd_dataschema/__init__.py @@ -12,24 +12,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .base import Dataset, GroupBase, GroupRegistry +from .base import Attrs, DTypes +from .constrained import condataset, confolder, contable +from .dataset import CastDataset, Dataset from .datastore import Datastore -from .groups import ( - ExpectationValueDataGroup, - MeasurementOutcomesDataGroup, - OQDTestbenchDataGroup, - SinaraRawDataGroup, -) +from .folder import CastFolder, Folder +from .group import GroupBase, GroupRegistry +from .table import CastTable, Table +from .utils import dict_to_structured, unstructured_to_structured ######################################################################################## __all__ = [ - "Dataset", + "Attrs", + "DTypes", "Datastore", "GroupBase", "GroupRegistry", - "ExpectationValueDataGroup", - "MeasurementOutcomesDataGroup", - "OQDTestbenchDataGroup", - "SinaraRawDataGroup", + "Dataset", + "CastDataset", + "condataset", + "Table", + "CastTable", + "contable", + "Folder", + "CastFolder", + "confolder", + "dict_to_structured", + "unstructured_to_structured", ] diff --git a/src/oqd_dataschema/base.py b/src/oqd_dataschema/base.py index 1139849..a25d639 100644 --- a/src/oqd_dataschema/base.py +++ b/src/oqd_dataschema/base.py @@ -13,171 +13,131 @@ # limitations under the License. # %% -import warnings -from typing import Annotated, Any, Literal, Optional, Union +from __future__ import annotations + +import typing +from abc import ABC, abstractmethod +from enum import Enum +from typing import Annotated, Literal, Union import numpy as np -from bidict import bidict from pydantic import ( BaseModel, - ConfigDict, - Discriminator, + BeforeValidator, Field, - TypeAdapter, - model_validator, ) ######################################################################################## -__all__ = ["GroupBase", "Dataset", "GroupRegistry"] +__all__ = ["Attrs", "DTypes", "DTypeNames", "GroupField"] ######################################################################################## -# %% -mapping = bidict( - { - "int32": np.dtype("int32"), - "int64": np.dtype("int64"), - "float32": np.dtype("float32"), - "float64": np.dtype("float64"), - "complex64": np.dtype("complex64"), - "complex128": np.dtype("complex128"), - # 'string': np.type - } -) +class DTypes(Enum): + """ + Enum for data types supported by oqd-dataschema. + + |Type |Variant| + |-------|-------| + |Boolean|`BOOL` | + |Integer|`INT16`, `INT32`, `INT64` (signed)
`UINT16`, `UINT32`, `UINT64` (unsigned)| + |Float |`FLOAT32`, `FLOAT64`| + |Complex|`COMPLEX64`, `COMPLEX128`| + |Bytes |`BYTES`| + |String |`STR`, `STRING`| + """ + BOOL = np.dtypes.BoolDType + INT16 = np.dtypes.Int16DType + INT32 = np.dtypes.Int32DType + INT64 = np.dtypes.Int64DType + UINT16 = np.dtypes.UInt16DType + UINT32 = np.dtypes.UInt32DType + UINT64 = np.dtypes.UInt64DType + FLOAT16 = np.dtypes.Float16DType + FLOAT32 = np.dtypes.Float32DType + FLOAT64 = np.dtypes.Float64DType + COMPLEX64 = np.dtypes.Complex64DType + COMPLEX128 = np.dtypes.Complex128DType + STR = np.dtypes.StrDType + BYTES = np.dtypes.BytesDType + STRING = np.dtypes.StringDType -class GroupBase(BaseModel, extra="forbid"): - """ - Schema representation for a group object within an HDF5 file. + @classmethod + def get(cls, name: str) -> DTypes: + """ + Get the [`DTypes`][oqd_dataschema.base.DTypes] enum variant by lowercase name. + """ + return cls[name.upper()] - Each grouping of data should be defined as a subclass of `Group`, and specify the datasets that it will contain. - This base object only has attributes, `attrs`, which are associated to the HDF5 group. + @classmethod + def names(cls): + """ + Get the lowercase names of all variants of [`DTypes`][oqd_dataschema.base.DTypes] enum. + """ + return tuple((dtype.name.lower() for dtype in cls)) - Attributes: - attrs: A dictionary of attributes to append to the dataset. - Example: - ``` - group = Group(attrs={'version': 2, 'date': '2025-01-01'}) - ``` - """ +DTypeNames = Literal[DTypes.names()] +""" +Literal list of lowercase names for [`DTypes`][oqd_dataschema.base.DTypes] variants. +""" - attrs: Optional[dict[str, Union[int, float, str, complex]]] = {} - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - cls.__annotations__["class_"] = Literal[cls.__name__] - setattr(cls, "class_", cls.__name__) +######################################################################################## - # Auto-register new group types - GroupRegistry.register(cls) +invalid_attrs = ["_datastore_signature", "_group_schema"] -class Dataset(BaseModel, extra="forbid"): +def _valid_attr_key(value: str) -> str: + """ + Validates attribute keys (prevents overwriting of protected attrs). """ - Schema representation for a dataset object to be saved within an HDF5 file. + if value in invalid_attrs: + raise KeyError - Attributes: - dtype: The datatype of the dataset, such as `int32`, `float32`, `int64`, `float64`, etc. - Types are inferred from the `data` attribute if provided. - shape: The shape of the dataset. - data: The numpy ndarray of the data, from which `dtype` and `shape` are inferred. + return value - attrs: A dictionary of attributes to append to the dataset. - Example: - ``` - dataset = Dataset(data=np.array([1, 2, 3, 4])) +AttrKey = Annotated[str, BeforeValidator(_valid_attr_key)] +""" +Annotated type that represents a valid key for attributes (prevents overwriting of protected attrs). +""" - dataset = Dataset(dtype='int64', shape=[4,]) - dataset.data = np.array([1, 2, 3, 4]) - ``` - """ +Attrs = dict[AttrKey, Union[int, float, str, complex]] +""" +Type that represents attributes of an object. +""" + +######################################################################################## - dtype: Optional[Literal[tuple(mapping.keys())]] = None - shape: Optional[tuple[int, ...]] = None - data: Optional[Any] = Field(default=None, exclude=True) - attrs: Optional[dict[str, Union[int, float, str, complex]]] = {} +class GroupField(BaseModel, ABC): + """ + Abstract class for a valid data field of Group. + + Attributes: + attrs: A dictionary of attributes to append to the object. + """ - model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True) + attrs: Attrs = Field(default_factory=lambda: {}) - @model_validator(mode="before") @classmethod - def validate_and_update(cls, values: dict): - data = values.get("data") - dtype = values.get("dtype") - shape = values.get("shape") - - if data is None and (dtype is not None and shape is not None): - return values - - elif data is not None and (dtype is None and shape is None): - if not isinstance(data, np.ndarray): - raise TypeError("`data` must be a numpy.ndarray.") - - if data.dtype not in mapping.values(): - raise TypeError( - f"`data` must be a numpy array of dtype in {tuple(mapping.keys())}." - ) - - values["dtype"] = mapping.inverse[data.dtype] - values["shape"] = data.shape - - return values - - @model_validator(mode="after") - def validate_data_matches_shape_dtype(self): - """Ensure that `data` matches `dtype` and `shape`.""" - if self.data is not None: - expected_dtype = mapping[self.dtype] - if self.data.dtype != expected_dtype: - raise ValueError( - f"Expected data dtype `{self.dtype}`, but got `{self.data.dtype.name}`." - ) - if self.data.shape != self.shape: - raise ValueError( - f"Expected shape {self.shape}, but got {self.data.shape}." - ) - return self - - -class MetaGroupRegistry(type): - def __new__(cls, clsname, superclasses, attributedict): - attributedict["groups"] = dict() - return super().__new__(cls, clsname, superclasses, attributedict) - - def register(cls, group): - if not issubclass(group, GroupBase): - raise TypeError("You may only register subclasses of GroupBase.") - - if group.__name__ in cls.groups.keys(): - warnings.warn( - f"Overwriting previously registered `{group.__name__}` group of the same name.", - UserWarning, - stacklevel=2, - ) - - cls.groups[group.__name__] = group - - def clear(cls): - """Clear all registered types (useful for testing)""" - cls.groups.clear() - - @property - def union(cls): - """Get the current Union of all registered types""" - return Annotated[ - Union[tuple(cls.groups.values())], Discriminator(discriminator="class_") - ] - - @property - def adapter(cls): - """Get TypeAdapter for current registered types""" - return TypeAdapter(cls.union) - - -class GroupRegistry(metaclass=MetaGroupRegistry): - pass + def _is_supported_type(cls, type_): + return type_ == cls or ( + typing.get_origin(type_) is Annotated and type_.__origin__ is cls + ) + + @abstractmethod + def _handle_data_dump(self, data: np.ndarray) -> np.ndarray: + """Hook into [Datastore.model_dump_hdf5][oqd_dataschema.datastore.Datastore.model_dump_hdf5] for compatibility mapping to HDF5.""" + pass + + @abstractmethod + def _handle_data_load(self, data: np.ndarray) -> np.ndarray: + """Hook into [Datastore.model_validate_hdf5][oqd_dataschema.datastore.Datastore.model_validate_hdf5] for reversing compatibility mapping, i.e. mapping data back to original type.""" + pass + + +# %% diff --git a/src/oqd_dataschema/constrained.py b/src/oqd_dataschema/constrained.py new file mode 100644 index 0000000..671060c --- /dev/null +++ b/src/oqd_dataschema/constrained.py @@ -0,0 +1,244 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Annotated, Sequence, TypeAlias + +from pydantic import AfterValidator + +from oqd_dataschema.dataset import CastDataset +from oqd_dataschema.folder import Folder +from oqd_dataschema.table import CastTable +from oqd_dataschema.utils import _flex_shape_equal, _validator_from_condition + +######################################################################################## + +__all__ = ["contable", "condataset", "confolder"] + +######################################################################################## + + +@_validator_from_condition +def _constrain_dim(model, *, min_dim=None, max_dim=None): + """Constrains the dimension of a Dataset or Table.""" + + if min_dim is not None and max_dim is not None and min_dim > max_dim: + raise ValueError("Impossible to satisfy dimension constraints on dataset.") + + min_dim = 0 if min_dim is None else min_dim + + # fast escape + if min_dim == 0 and max_dim is None: + return + + dims = len(model.shape) + if dims < min_dim or (max_dim is not None and dims > max_dim): + raise ValueError( + f"Expected {min_dim} <= dimension of shape{f' <= {max_dim}'}, but got shape = {model.shape}." + ) + + +@_validator_from_condition +def _constrain_shape(model, *, shape_constraint=None): + """Constrains the shape of a Dataset or Table.""" + + # fast escape + if shape_constraint is None: + return + + if not _flex_shape_equal(shape_constraint, model.shape): + raise ValueError( + f"Expected shape to be {shape_constraint}, but got {model.shape}." + ) + + +######################################################################################## + + +@_validator_from_condition +def _constrain_dtype_dataset(dataset, *, dtype_constraint=None): + """Constrains the dtype of a Dataset.""" + + # fast escape + if dtype_constraint is None: + return + + # convert dtype constraint to set + if (not isinstance(dtype_constraint, str)) and isinstance( + dtype_constraint, Sequence + ): + dtype_constraint = set(dtype_constraint) + elif isinstance(dtype_constraint, str): + dtype_constraint = {dtype_constraint} + + # apply dtype constraint + if dataset.dtype not in dtype_constraint: + raise ValueError( + f"Expected dtype to be of type one of {dtype_constraint}, but got {dataset.dtype}." + ) + + +def condataset( + *, + shape_constraint=None, + dtype_constraint=None, + min_dim=None, + max_dim=None, +) -> TypeAlias: + """Implements dtype, dimension and shape constrains on the Dataset. + + Arguments: + shape_constraint (Tuple[Union[None, int],...]): + dtype_constraint (Tuple[DTypeNames,...]): + min_dim (int): + max_dim (int): + + Example: + ``` + class CustomGroup: + x: condataset(dtype_contraint=("int16","int32","int64)) + y: condataset(shape_constraint=(100,)) + z: condataset(min_dim=1, max_dim=1) + + group = CustomGroup(x=,y=,z=) # succeeds as it obeys the constraints + + group = CustomGroup(x=,y=,z=) # fails as it violates the constraints + ``` + + """ + return Annotated[ + CastDataset, + AfterValidator(_constrain_dtype_dataset(dtype_constraint=dtype_constraint)), + AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)), + AfterValidator(_constrain_shape(shape_constraint=shape_constraint)), + ] + + +######################################################################################## + + +@_validator_from_condition +def _constrain_dtype_table(table, *, dtype_constraint={}): + """Constrains the dtype of a Table.""" + + for k, v in dtype_constraint.items(): + if (not isinstance(v, str)) and isinstance(v, Sequence): + _v = set(dtype_constraint[k]) + elif isinstance(v, str): + _v = {dtype_constraint[k]} + + if _v and dict(table.columns)[k] not in _v: + raise ValueError( + f"Expected dtype to be of type one of {_v}, but got {dict(table.columns)[k]}." + ) + + +@_validator_from_condition +def _constrain_required_field(table, *, required_fields=None, strict_fields=False): + """Constrains the fields of a Table.""" + + if strict_fields and required_fields is None: + raise ValueError("Constraints force an empty Table.") + + # fast escape + if required_fields is None: + return + + # convert required fields to set + if (not isinstance(required_fields, str)) and isinstance(required_fields, Sequence): + required_fields = set(required_fields) + elif isinstance(required_fields, str): + required_fields = {required_fields} + + diff = required_fields.difference(set([c[0] for c in table.columns])) + reverse_diff = set([c[0] for c in table.columns]).difference(required_fields) + + if len(diff) > 0: + raise ValueError(f"Missing required fields {diff}.") + + if strict_fields and len(reverse_diff): + raise ValueError( + f"Extra fields in the table are forbidden by constrains {reverse_diff}." + ) + + +def contable( + *, + required_fields=None, + strict_fields=False, + dtype_constraint={}, + shape_constraint=None, + min_dim=None, + max_dim=None, +) -> TypeAlias: + """Implements field, dtype, dimension and shape constrains on the Table. + + Example: + ``` + class CustomGroup: + x: contable(dtype_contraint=("int16","int32","int64)) + y: contable(shape_constraint=(100,)) + z: contable(min_dim=1, max_dim=1) + u: contable(required_field=("c1","c2")) + v: contable(required_field=("c1", "c2"), strict_fields=True) + + + group = CustomGroup(x=,y=,z=,u=,v=) # succeeds as it obeys the constraints + + group = CustomGroup(x=,y=,z=,u=,v=) # fails as it violates the constraints + ``` + + """ + return Annotated[ + CastTable, + AfterValidator( + _constrain_required_field( + required_fields=required_fields, strict_fields=strict_fields + ) + ), + AfterValidator(_constrain_dtype_table(dtype_constraint=dtype_constraint)), + AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)), + AfterValidator(_constrain_shape(shape_constraint=shape_constraint)), + ] + + +######################################################################################## + + +def confolder( + *, + shape_constraint=None, + min_dim=None, + max_dim=None, +) -> TypeAlias: + """Implements dimension and shape constrains on the Folder. + + Example: + ``` + class CustomGroup: + x: confolder(shape_constraint=(100,)) + y: confolder(min_dim=1, max_dim=1) + + + group = CustomGroup(x=,y=) # succeeds as it obeys the constraints + + group = CustomGroup(x=,y=) # fails as it violates the constraints + ``` + + """ + return Annotated[ + Folder, + AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)), + AfterValidator(_constrain_shape(shape_constraint=shape_constraint)), + ] diff --git a/src/oqd_dataschema/dataset.py b/src/oqd_dataschema/dataset.py new file mode 100644 index 0000000..ee9d7f9 --- /dev/null +++ b/src/oqd_dataschema/dataset.py @@ -0,0 +1,147 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# %% + +from __future__ import annotations + +from typing import Annotated, Any, Optional, Tuple, Union + +import numpy as np +from pydantic import ( + BeforeValidator, + ConfigDict, + Field, + field_validator, + model_validator, +) + +from oqd_dataschema.base import Attrs, DTypeNames, DTypes, GroupField + +from .utils import _flex_shape_equal + +######################################################################################## + +__all__ = [ + "Dataset", + "CastDataset", +] + +######################################################################################## + + +class Dataset(GroupField, extra="forbid"): + """ + Schema representation for a dataset object to be saved within an HDF5 file. + + Attributes: + dtype: The datatype of the dataset, such as `int32`, `float32`, `int64`, `float64`, etc. + Types are inferred from the `data` attribute if provided. + shape: The shape of the dataset. + data: The numpy ndarray of the data, from which `dtype` and `shape` are inferred. + + attrs: A dictionary of attributes to append to the dataset. + + Example: + ``` + dataset = Dataset(data=np.array([1, 2, 3, 4])) + + dataset = Dataset(dtype='int64', shape=[4,]) + dataset.data = np.array([1, 2, 3, 4]) + ``` + """ + + dtype: Optional[DTypeNames] = None # type: ignore + shape: Optional[Tuple[Union[int, None], ...]] = None + data: Optional[Any] = Field(default=None, exclude=True) + + attrs: Attrs = Field(default_factory=lambda: {}) + + model_config = ConfigDict( + use_enum_values=False, arbitrary_types_allowed=True, validate_assignment=True + ) + + @field_validator("data", mode="before") + @classmethod + def _validate_and_update(cls, value): + # check if data exist + if value is None: + return value + + # check if data is a numpy array + if not isinstance(value, np.ndarray): + raise TypeError("`data` must be a numpy.ndarray.") + + return value + + @model_validator(mode="after") + def _validate_data_matches_shape_dtype(self): + """Ensure that `data` matches `dtype` and `shape`.""" + + # check if data exist + if self.data is None: + return self + + # check if dtype matches data + if ( + self.dtype is not None + and type(self.data.dtype) is not DTypes.get(self.dtype).value + ): + raise ValueError( + f"Expected data dtype `{self.dtype}`, but got `{self.data.dtype.name}`." + ) + + # check if shape mataches data + if self.shape is not None and not _flex_shape_equal( + self.data.shape, self.shape + ): + raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.") + + # reassign dtype if it is None + if self.dtype != DTypes(type(self.data.dtype)).name.lower(): + self.dtype = DTypes(type(self.data.dtype)).name.lower() + + # resassign shape to concrete value if it is None or a flexible shape + if self.shape != self.data.shape: + self.shape = self.data.shape + + return self + + @classmethod + def cast(cls, data: np.ndarray) -> Dataset: + """Casts data from numpy array to Dataset.""" + if isinstance(data, np.ndarray): + return cls(data=data) + return data + + def __getitem__(self, idx): + return self.data[idx] + + def _handle_data_dump(self, data): + np_dtype = ( + np.dtypes.BytesDType if type(data.dtype) is np.dtypes.StrDType else None + ) + + if np_dtype is None: + return data + + return data.astype(np_dtype) + + def _handle_data_load(self, data): + np_dtype = DTypes.get(self.dtype).value + return data.astype(np_dtype) + + +CastDataset = Annotated[Dataset, BeforeValidator(Dataset.cast)] +"""Annotated type that automatically executes Dataset.cast""" diff --git a/src/oqd_dataschema/datastore.py b/src/oqd_dataschema/datastore.py index cf52c0c..8fb5066 100644 --- a/src/oqd_dataschema/datastore.py +++ b/src/oqd_dataschema/datastore.py @@ -14,15 +14,21 @@ # %% +from __future__ import annotations + +import json import pathlib -from typing import Any, Dict, Literal, Optional +from typing import Any, Callable, Dict, Literal import h5py -import numpy as np -from pydantic import BaseModel, model_validator -from pydantic.types import TypeVar +from pydantic import ( + BaseModel, + Field, + field_validator, +) -from oqd_dataschema.base import Dataset, GroupBase, GroupRegistry +from oqd_dataschema.base import Attrs, GroupField +from oqd_dataschema.group import GroupBase, GroupRegistry ######################################################################################## @@ -34,50 +40,93 @@ # %% class Datastore(BaseModel, extra="forbid"): """ - Saves the model and its associated data to an HDF5 file. - This method serializes the model's data and attributes into an HDF5 file - at the specified filepath. + Class representing a datastore with restricted HDF5 format. Attributes: - filepath (pathlib.Path): The path to the HDF5 file where the model data will be saved. + groups (Dict[str,Group]): groups of data. + attrs (Attrs): attributes of the datastore. """ - groups: Dict[str, Any] + groups: Dict[str, Any] = Field(default_factory=lambda: {}) + + attrs: Attrs = Field(default_factory=lambda: {}) + + @classmethod + def _validate_group(cls, key, group): + """Helper function for validating group to be of type Group registered in the GroupRegistry.""" + if isinstance(group, GroupBase): + return group + + if isinstance(group, dict): + return GroupRegistry.adapter.validate_python(group) + + raise ValueError(f"Key `{key}` contains invalid group data.") - @model_validator(mode="before") + @field_validator("groups", mode="before") @classmethod def validate_groups(cls, data): - if isinstance(data, dict) and "groups" in data: - # Get the current adapter from registry - try: - validated_groups = {} - - for key, group_data in data["groups"].items(): - if isinstance(group_data, GroupBase): - # Already a Group instance - validated_groups[key] = group_data - elif isinstance(group_data, dict): - # Parse dict using discriminated union - validated_groups[key] = GroupRegistry.adapter.validate_python( - group_data - ) - else: - raise ValueError( - f"Invalid group data for key '{key}': {type(group_data)}" - ) - - data["groups"] = validated_groups - - except ValueError as e: - if "No group types registered" in str(e): - raise ValueError( - "No group types available. Register group types before creating Datastore." - ) - raise - - return data - - def model_dump_hdf5(self, filepath: pathlib.Path, mode: Literal["w", "a"] = "a"): + """Validates groups to be of type Group registered in the GroupRegistry.""" + if GroupRegistry.groups == {}: + raise ValueError( + "No group types available. Register group types before creating Datastore." + ) + + validated_groups = {k: cls._validate_group(k, v) for k, v in data.items()} + return validated_groups + + def _dump_group(self, h5datastore, gkey, group): + """Helper function for dumping Group.""" + # remove existing group + if gkey in h5datastore.keys(): + del h5datastore[gkey] + + # create group + h5_group = h5datastore.create_group(gkey) + + # dump group schema + h5_group.attrs["_group_schema"] = json.dumps( + group.model_json_schema(), indent=2 + ) + + # dump group attributes + for akey, attr in group.attrs.items(): + h5_group.attrs[akey] = attr + + # dump group data + for dkey, dataset in group.__dict__.items(): + if dkey in ["attrs", "class_"]: + continue + + # if group field contain dictionary of Dataset + if isinstance(dataset, dict): + h5_subgroup = h5_group.create_group(dkey) + for ddkey, ddataset in dataset.items(): + self._dump_dataset(h5_subgroup, ddkey, ddataset) + continue + + self._dump_dataset(h5_group, dkey, dataset) + + def _dump_dataset(self, h5group, dkey, dataset): + """Helper function for dumping Dataset.""" + + if dataset is not None and not isinstance(dataset, GroupField): + raise ValueError("Group data field is not a Dataset or a Table.") + + # handle optional dataset + if dataset is None: + h5_dataset = h5group.create_dataset(dkey, data=h5py.Empty("f")) + return + + # dtype str converted to bytes when dumped (h5 compatibility) + h5_dataset = h5group.create_dataset( + dkey, data=dataset._handle_data_dump(dataset.data) + ) + + # dump dataset attributes + for akey, attr in dataset.attrs.items(): + h5_dataset.attrs[akey] = attr + + def model_dump_hdf5(self, filepath: pathlib.Path, mode: Literal["w", "a"] = "w"): """ Saves the model and its associated data to an HDF5 file. This method serializes the model's data and attributes into an HDF5 file @@ -89,28 +138,33 @@ def model_dump_hdf5(self, filepath: pathlib.Path, mode: Literal["w", "a"] = "a") filepath.parent.mkdir(exist_ok=True, parents=True) with h5py.File(filepath, mode) as f: - # store the model JSON schema - f.attrs["model"] = self.model_dump_json() + # dump the datastore signature + f.attrs["_datastore_signature"] = self.model_dump_json(indent=2) + for akey, attr in self.attrs.items(): + f.attrs[akey] = attr - # store each group + # dump each group for gkey, group in self.groups.items(): - if gkey in f.keys(): - del f[gkey] - h5_group = f.create_group(gkey) - for akey, attr in group.attrs.items(): - h5_group.attrs[akey] = attr - - for dkey, dataset in group.__dict__.items(): - if not isinstance(dataset, Dataset): - continue - h5_dataset = h5_group.create_dataset(dkey, data=dataset.data) - for akey, attr in dataset.attrs.items(): - h5_dataset.attrs[akey] = attr + if gkey in ["attrs", "class_"]: + continue + + self._dump_group(f, gkey, group) @classmethod - def model_validate_hdf5( - cls, filepath: pathlib.Path, types: Optional[TypeVar] = None - ): + def _load_data(cls, group, h5group, dkey, ikey=None): + field = group.__dict__[ikey] if ikey else group.__dict__ + h5field = h5group[ikey] if ikey else h5group + + if isinstance(field[dkey], GroupField): + field[dkey].data = field[dkey]._handle_data_load(h5field[dkey][()]) + return + + raise ValueError( + "Attempted to load Group data field that is neither Dataset nor Table." + ) + + @classmethod + def model_validate_hdf5(cls, filepath: pathlib.Path): """ Loads the model from an HDF5 file at the specified filepath. @@ -118,12 +172,61 @@ def model_validate_hdf5( filepath (pathlib.Path): The path to the HDF5 file where the model data will be read and validated from. """ with h5py.File(filepath, "r") as f: - self = cls.model_validate_json(f.attrs["model"]) + # Load datastore signature + self = cls.model_validate_json(f.attrs["_datastore_signature"]) - # loop through all groups in the model schema and load HDF5 store - for gkey, group in self.groups.items(): - for dkey, val in group.__dict__.items(): + # loop through all groups in the model schema and load the data + for gkey, group in self: + for dkey in group.__class__.model_fields: + # ignore attrs and class_ fields if dkey in ("attrs", "class_"): continue - group.__dict__[dkey].data = np.array(f[gkey][dkey][()]) + + if group.__dict__[dkey] is None: + continue + + # load data for dict of Dataset or dict of Table + if isinstance(group.__dict__[dkey], dict): + for ddkey in group.__dict__[dkey]: + cls._load_data(group, f[gkey], dkey=ddkey, ikey=dkey) + continue + + # load Dataset or Table data + cls._load_data(group, f[gkey], dkey=dkey) + return self + + def __getitem__(self, key): + """Overloads indexing to retrieve elements in groups.""" + return self.groups.__getitem__(key) + + def __iter__(self): + """Overloads iter to iterate over elements in groups.""" + return self.groups.items().__iter__() + + def update(self, **groups): + """Updates groups in the datastore, overwriting past values.""" + for k, v in groups.items(): + self.groups[k] = v + + def add(self, **groups): + """Adds a new groups to the datastore.""" + + existing_keys = set(groups.keys()).intersection(set(self.groups.keys())) + if existing_keys: + raise ValueError( + f"Keys {existing_keys} already exist in the datastore, use `update` instead if intending to overwrite past data." + ) + + self.update(**groups) + + def pipe(self, func: Callable[[Datastore], None]) -> Datastore: + _result = func(self) + + if _result is not None: + raise ValueError("`func` must return None.") + + return self + + +# %% diff --git a/src/oqd_dataschema/folder.py b/src/oqd_dataschema/folder.py new file mode 100644 index 0000000..4e8c41e --- /dev/null +++ b/src/oqd_dataschema/folder.py @@ -0,0 +1,275 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from types import MappingProxyType +from typing import Annotated, Any, Dict, Optional, Tuple, Union + +import numpy as np +from pydantic import ( + BeforeValidator, + ConfigDict, + Field, + field_validator, + model_validator, +) +from typing_extensions import TypeAliasType + +from oqd_dataschema.base import Attrs, DTypeNames, DTypes, GroupField +from oqd_dataschema.utils import _flex_shape_equal + +######################################################################################## + +__all__ = ["Folder", "CastFolder"] + +######################################################################################## + +DocumentSchema = TypeAliasType( + "DocumentSchema", + Dict[str, Union["DocumentSchema", Optional[DTypeNames]]], # type: ignore +) + + +class Folder(GroupField, extra="forbid"): + """ + Schema representation for a table object to be saved within an HDF5 file. + + Attributes: + document_schema: The schema for a document (structured type with keys and their datatype). Types are inferred from the `data` attribute if not provided. + shape: The shape of the folder. + data: The numpy ndarray or recarray (of structured dtype) of the data, from which `dtype` and `shape` can be inferred. + + attrs: A dictionary of attributes to append to the folder. + + Example: + ```python + schema = dict( + index="int32", + t="float64", + channels=dict(ch1="complex128", ch2="complex128"), + label="str", + ) + dt = np.dtype( + [ + ("index", np.int32), + ("t", np.float64), + ("channels", np.dtype([("ch1", np.complex128), ("ch2", np.complex128)])), + ("label", np.dtype(" np.dtype: + return self._numpy_dtype( + self.document_schema, str_size=str_size, bytes_size=bytes_size + ) + + @staticmethod + def _dump_dtype_str_to_bytes(dtype): + np_dtype = [] + + for k, (v, _) in dtype.fields.items(): + if isinstance(v.fields, MappingProxyType): + dt = Folder._dump_dtype_str_to_bytes(v) + elif type(v) is np.dtypes.StrDType: + dt = np.empty(0, dtype=v).astype(np.dtypes.BytesDType).dtype + else: + dt = v + + np_dtype.append((k, dt)) + + return np.dtype(np_dtype) + + def _handle_data_dump(self, data): + np_dtype = self._dump_dtype_str_to_bytes(data.dtype) + + return data.astype(np_dtype) + + @staticmethod + def _load_dtype_bytes_to_str(document_schema, dtype): + np_dtype = [] + + for k, (v, _) in dtype.fields.items(): + if isinstance(v.fields, MappingProxyType): + dt = Folder._load_dtype_bytes_to_str(document_schema[k], v) + elif document_schema[k] == "str": + dt = np.empty(0, dtype=v).astype(np.dtypes.StrDType).dtype + else: + dt = v + + np_dtype.append((k, dt)) + + return np.dtype(np_dtype) + + def _handle_data_load(self, data): + np_dtype = self._load_dtype_bytes_to_str(self.document_schema, data.dtype) + + return data.astype(np_dtype) + + @classmethod + def cast(cls, data: np.ndarray) -> Folder: + """Casts data from numpy structured array to Folder.""" + if isinstance(data, np.ndarray): + if not isinstance(data.dtype.fields, MappingProxyType): + raise TypeError("dtype of data must be a structured dtype.") + + document_schema = cls._get_document_schema_from_dtype(data.dtype) + + return cls(document_schema=document_schema, data=data) + return data + + +CastFolder = Annotated[Folder, BeforeValidator(Folder.cast)] +"""Annotated type that automatically executes Folder.cast""" diff --git a/src/oqd_dataschema/group.py b/src/oqd_dataschema/group.py new file mode 100644 index 0000000..2d2829d --- /dev/null +++ b/src/oqd_dataschema/group.py @@ -0,0 +1,174 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing +import warnings +from functools import reduce +from types import NoneType +from typing import Annotated, ClassVar, Literal, Union + +from pydantic import ( + BaseModel, + Discriminator, + Field, + TypeAdapter, +) + +from oqd_dataschema.base import Attrs, GroupField + +######################################################################################## + +__all__ = [ + "GroupBase", + "GroupRegistry", +] + + +######################################################################################## + + +class GroupBase(BaseModel, extra="forbid"): + """ + Schema representation for a group object within an HDF5 file. + + Each grouping of data should be defined as a subclass of `GroupBase`, and specify the datasets that it will contain. + This base object only has attributes, `attrs`, which are associated to the HDF5 group. + + Attributes: + attrs: A dictionary of attributes to append to the group. + + """ + + attrs: Attrs = Field(default_factory=lambda: {}) + + @staticmethod + def _is_basic_groupfield_type(v): + return reduce( + lambda x, y: x or y, + (gf._is_supported_type(v) for gf in GroupField.__subclasses__()), + ) + + @classmethod + def _is_groupfield_type(cls, v): + is_datafield = cls._is_basic_groupfield_type(v) + + is_annotated_datafield = typing.get_origin( + v + ) is Annotated and cls._is_basic_groupfield_type(v.__origin__) + + is_optional_datafield = typing.get_origin(v) is Union and ( + (v.__args__[0] == NoneType and cls._is_basic_groupfield_type(v.__args__[1])) + or ( + v.__args__[1] == NoneType + and cls._is_basic_groupfield_type(v.__args__[0]) + ) + ) + + is_dict_datafield = ( + typing.get_origin(v) is dict + and v.__args__[0] is str + and cls._is_basic_groupfield_type(v.__args__[1]) + ) + + return ( + is_datafield + or is_annotated_datafield + or is_optional_datafield + or is_dict_datafield + ) + + @classmethod + def _is_classvar(cls, v): + return v is ClassVar or typing.get_origin(v) is ClassVar + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + + for k, v in cls.__annotations__.items(): + if k == "class_": + raise AttributeError("`class_` attribute should not be set manually.") + + if k == "attrs" and v is not Attrs: + raise AttributeError( + "`attrs` attribute must have type annotation of Attrs." + ) + + if k == "attrs" or cls._is_classvar(v): + continue + + if not cls._is_groupfield_type(v): + raise TypeError( + "All fields of `GroupBase` have to be of type `Dataset`, `Table` or `Folder`." + ) + + cls.__annotations__["class_"] = Literal[cls.__name__] + setattr(cls, "class_", cls.__name__) + + # Auto-register new group types + GroupRegistry.register(cls) + + +######################################################################################## + + +class MetaGroupRegistry(type): + """ + Metaclass for the GroupRegistry + """ + + def __new__(cls, clsname, superclasses, attributedict): + attributedict["groups"] = dict() + return super().__new__(cls, clsname, superclasses, attributedict) + + def register(cls, group): + """Registers a group into the GroupRegistry.""" + if not issubclass(group, GroupBase): + raise TypeError("You may only register subclasses of GroupBase.") + + if group.__name__ in cls.groups.keys(): + warnings.warn( + f"Overwriting previously registered `{group.__name__}` group of the same name.", + UserWarning, + stacklevel=2, + ) + + cls.groups[group.__name__] = group + + def clear(cls): + """Clear all registered types (useful for testing)""" + cls.groups.clear() + + @property + def union(cls): + """Get the current Union of all registered types""" + + if len(cls.groups) > 1: + return Annotated[ + Union[tuple(cls.groups.values())], Discriminator(discriminator="class_") + ] + else: + return next(iter(cls.groups.values())) + + @property + def adapter(cls): + """Get TypeAdapter for current registered types""" + return TypeAdapter(cls.union) + + +class GroupRegistry(metaclass=MetaGroupRegistry): + """ + Represents the GroupRegistry + """ + + pass diff --git a/src/oqd_dataschema/groups.py b/src/oqd_dataschema/groups.py deleted file mode 100644 index 88ecd2f..0000000 --- a/src/oqd_dataschema/groups.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2024-2025 Open Quantum Design - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from oqd_dataschema.base import Dataset, GroupBase - -######################################################################################## - -__all__ = [ - "SinaraRawDataGroup", - "MeasurementOutcomesDataGroup", - "ExpectationValueDataGroup", - "OQDTestbenchDataGroup", -] - -######################################################################################## - - -class SinaraRawDataGroup(GroupBase): - """ - Example `Group` for raw data from the Sinara real-time control system. - This is a placeholder for demonstration and development. - """ - - camera_images: Dataset - - -class MeasurementOutcomesDataGroup(GroupBase): - """ - Example `Group` for processed data classifying the readout of the state. - This is a placeholder for demonstration and development. - """ - - outcomes: Dataset - - -class ExpectationValueDataGroup(GroupBase): - """ - Example `Group` for processed data calculating the expectation values. - This is a placeholder for demonstration and development. - """ - - expectation_value: Dataset - - -class OQDTestbenchDataGroup(GroupBase): - """ """ - - time: Dataset - voltages: Dataset diff --git a/src/oqd_dataschema/table.py b/src/oqd_dataschema/table.py new file mode 100644 index 0000000..da72a57 --- /dev/null +++ b/src/oqd_dataschema/table.py @@ -0,0 +1,262 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from types import MappingProxyType +from typing import Annotated, Any, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd +from pydantic import ( + BeforeValidator, + ConfigDict, + Field, + field_validator, + model_validator, +) + +from oqd_dataschema.base import Attrs, DTypeNames, DTypes, GroupField +from oqd_dataschema.utils import ( + _flex_shape_equal, + _is_list_unique, +) + +######################################################################################## + +__all__ = [ + "Table", + "CastTable", +] + +######################################################################################## + + +Column = Tuple[str, Optional[DTypeNames]] + + +class Table(GroupField, extra="forbid"): + """ + Schema representation for a table object to be saved within an HDF5 file. + + Attributes: + columns: The columns in the table accompanied by their datatype. Types are inferred from the `data` attribute if not provided. + shape: The shape of the table (excludes the column index). + data: The numpy ndarray or recarray (of structured dtype) of the data, from which `dtype` and `shape` can be inferred. + + attrs: A dictionary of attributes to append to the table. + + Example: + ```python + dt = np.dtype( + [ + ("index", np.int32), + ("t", np.float64), + ("z", np.complex128), + ("label", np.dtype(" pd.DataFrame: + """Converts flat table to pandas DataFrame.""" + if len(self.shape) > 1: + raise ValueError( + "Conversion to pandas DataFrame only supported on 1D Table." + ) + return pd.DataFrame( + data=self.data, columns=[c[0] for c in self.columns] + ).astype({k: v for k, v in self.columns}) + + @staticmethod + def _pd_to_np(df): + np_dtype = [] + for k, v in df.dtypes.items(): + if type(v) is not np.dtypes.ObjectDType: + field_np_dtype = (k, v) + np_dtype.append(field_np_dtype) + continue + + # Check if column of object dtype is actually str dtype + if (np.vectorize(lambda x: isinstance(x, str))(df[k].to_numpy())).all(): + dt = df[k].to_numpy().astype(np.dtypes.StrDType).dtype + field_np_dtype = (k, dt) + + np_dtype.append(field_np_dtype) + continue + + raise ValueError(f"Unsupported datatype for column {k}") + + return np.rec.fromarrays( + df.to_numpy().transpose(), + names=[dt[0] for dt in np_dtype], + formats=[dt[1] for dt in np_dtype], + ).astype(np.dtype(np_dtype)) + + @field_validator("data", mode="before") + @classmethod + def _validate_and_update(cls, value): + # check if data exist + if value is None: + return value + + # check if data is a numpy array + if not isinstance(value, (np.ndarray, pd.DataFrame)): + raise TypeError("`data` must be a numpy.ndarray or pandas.DataFrame.") + + if isinstance(value, pd.DataFrame): + value = cls._pd_to_np(value) + + if not isinstance(value.dtype.fields, MappingProxyType): + raise TypeError("dtype of data must be a structured dtype.") + + if isinstance(value, np.ndarray): + value = value.view(np.recarray) + + return value + + @model_validator(mode="after") + def _validate_data_matches_shape_dtype(self): + """Ensure that `data` matches `dtype` and `shape`.""" + + # check if data exist + if self.data is None: + return self + + if set(self.data.dtype.fields.keys()) != set([c[0] for c in self.columns]): + raise ValueError("Fields of data do not match expected field for Table.") + + # check if dtype matches data + for k, v in self.data.dtype.fields.items(): + if ( + dict(self.columns)[k] is not None + and type(v[0]) is not DTypes.get(dict(self.columns)[k]).value + ): + raise ValueError( + f"Expected data dtype `{dict(self.columns)[k]}`, but got `{v[0].name}`." + ) + + # check if shape mataches data + if self.shape is not None and not _flex_shape_equal( + self.data.shape, self.shape + ): + raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.") + + # reassign dtype if it is None + for n, (k, v) in enumerate(self.columns): + if v != DTypes(type(self.data.dtype.fields[k][0])).name.lower(): + self.columns[n] = ( + k, + DTypes(type(self.data.dtype.fields[k][0])).name.lower(), + ) + + # resassign shape to concrete value if it is None or a flexible shape + if self.shape != self.data.shape: + self.shape = self.data.shape + + return self + + def numpy_dtype(self, *, str_size=64, bytes_size=64): + np_dtype = [] + + for k, v in self.columns: + if v is None: + raise ValueError( + "Method numpy_dtype can only be called on concrete types." + ) + if v == "str": + dt = np.dtypes.StrDType(str_size) + elif v == "bytes": + dt = np.dtypes.BytesDType(bytes_size) + else: + dt = DTypes.get(v).value() + + np_dtype.append((k, dt)) + + return np.dtype(np_dtype) + + @classmethod + def cast(cls, data: np.ndarray | pd.DataFrame) -> Table: + """Casts data from pandas DataFrame or numpy structured array to Table.""" + if isinstance(data, pd.DataFrame): + data = cls._pd_to_np(data) + + if isinstance(data, np.ndarray): + if not isinstance(data.dtype.fields, MappingProxyType): + raise TypeError("dtype of data must be a structured dtype.") + + columns = [ + (k, DTypes(type(v)).name.lower()) + for k, (v, _) in data.dtype.fields.items() + ] + + return cls(columns=columns, data=data) + return data + + def _handle_data_dump(self, data): + np_dtype = np.dtype( + [ + (k, np.empty(0, dtype=v).astype(np.dtypes.BytesDType).dtype) + if type(v) is np.dtypes.StrDType + else (k, v) + for k, (v, _) in data.dtype.fields.items() + ] + ) + + return data.astype(np_dtype) + + def _handle_data_load(self, data): + np_dtype = np.dtype( + [ + ( + k, + np.empty(0, dtype=v).astype(np.dtypes.StrDType).dtype, + ) + if dict(self.columns)[k] == "str" + else (k, v) + for k, (v, _) in np.array(data).dtype.fields.items() + ] + ) + return data.astype(np_dtype) + + +CastTable = Annotated[Table, BeforeValidator(Table.cast)] +"""Annotated type that automatically executes Table.cast""" diff --git a/src/oqd_dataschema/utils.py b/src/oqd_dataschema/utils.py new file mode 100644 index 0000000..b1153d6 --- /dev/null +++ b/src/oqd_dataschema/utils.py @@ -0,0 +1,160 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import reduce +from types import MappingProxyType + +import numpy as np +from numpy.lib import recfunctions as rfn + +######################################################################################## + +__all__ = [ + "unstructured_to_structured", + "dict_to_structured", +] + + +######################################################################################## + + +def _unstructured_to_structured_helper(data, dtype): + for n, (k, (v, _)) in enumerate(dtype.fields.items()): + if isinstance(v.fields, MappingProxyType): + x = _unstructured_to_structured_helper(data, v) + + else: + x = data.pop(0).astype(type(v)) + + if n == 0: + new_data = x.astype(np.dtype([(k, x.dtype)])) + else: + if new_data.shape != x.shape: + raise ValueError( + f"Incompatible shape, expected {new_data.shape} but got {x.shape}." + ) + + new_data = rfn.append_fields( + new_data.flatten(), k, x.flatten(), usemask=False + ).reshape(x.shape) + + return new_data.view(np.recarray) + + +def unstructured_to_structured(data, dtype): + data = list(np.moveaxis(data, -1, 0)) + + leaves = len(rfn.flatten_descr(dtype)) + if len(data) != leaves: + raise ValueError( + f"Incompatible shape, last dimension of data ({data.shape[-1]}) must match number of leaves in structured dtype ({leaves})." + ) + + new_data = _unstructured_to_structured_helper(data, dtype) + + return new_data + + +######################################################################################## + + +def _dtype_from_dict(data): + np_dtype = [] + + for k, v in data.items(): + if isinstance(v, dict): + dt = _dtype_from_dict(v) + else: + dt = v.dtype + + np_dtype.append((k, dt)) + + return np.dtype(np_dtype) + + +def _dict_to_structured_helper(data, dtype): + for n, (k, (v, _)) in enumerate(dtype.fields.items()): + if isinstance(v.fields, MappingProxyType): + x = _dict_to_structured_helper(data[k], v) + else: + x = data[k] + + if n == 0: + new_data = x.astype(np.dtype([(k, x.dtype)])) + else: + if new_data.shape != x.shape: + raise ValueError( + f"Incompatible shape, expected {new_data.shape} but got {x.shape}." + ) + + new_data = rfn.append_fields( + new_data.flatten(), k, x.flatten(), usemask=False + ).reshape(x.shape) + + return new_data.view(np.recarray) + + +def dict_to_structured(data): + data_dtype = _dtype_from_dict(data) + new_data = _dict_to_structured_helper(data, dtype=data_dtype) + return new_data + + +######################################################################################## + + +def _flex_shape_equal(shape1, shape2): + """Helper function for comparing concrete and flex shapes.""" + return len(shape1) == len(shape2) and reduce( + lambda x, y: x and y, + map( + lambda x: x[0] is None or x[1] is None or x[0] == x[1], + zip(shape1, shape2), + ), + ) + + +######################################################################################## + + +def _validator_from_condition(f): + """Helper decorator for turning a condition into a validation.""" + + def _wrapped_validator(*args, **kwargs): + def _wrapped_condition(model): + f(model, *args, **kwargs) + return model + + return _wrapped_condition + + return _wrapped_validator + + +######################################################################################## + + +def _is_list_unique(data): + seen = set() + duplicates = set() + for element in data: + if element in duplicates: + continue + + if element in seen: + duplicates.add(element) + continue + + seen.add(element) + + return (duplicates == set(), duplicates) diff --git a/tests/test_dataset.py b/tests/test_dataset.py new file mode 100644 index 0000000..5ccd65d --- /dev/null +++ b/tests/test_dataset.py @@ -0,0 +1,253 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# %% + +import numpy as np +import pytest +from pydantic import TypeAdapter + +from oqd_dataschema import CastDataset, Dataset, condataset +from oqd_dataschema.base import DTypes + +######################################################################################## + + +class TestDatasetDtype: + @pytest.mark.parametrize( + ("dtype", "np_dtype"), + [ + ("bool", np.dtypes.BoolDType), + ("int16", np.dtypes.Int16DType), + ("int32", np.dtypes.Int32DType), + ("int64", np.dtypes.Int64DType), + ("uint16", np.dtypes.UInt16DType), + ("uint32", np.dtypes.UInt32DType), + ("uint64", np.dtypes.UInt64DType), + ("float16", np.dtypes.Float16DType), + ("float32", np.dtypes.Float32DType), + ("float64", np.dtypes.Float64DType), + ("complex64", np.dtypes.Complex64DType), + ("complex128", np.dtypes.Complex128DType), + ("str", np.dtypes.StrDType), + ("bytes", np.dtypes.BytesDType), + ("string", np.dtypes.StringDType), + ], + ) + def test_dtypes(self, dtype, np_dtype): + ds = Dataset(dtype=dtype, shape=(100,)) + + data = np.random.rand(100).astype(np_dtype) + ds.data = data + + @pytest.mark.xfail(raises=ValueError) + @pytest.mark.parametrize("dtype", list(DTypes.names())) + def test_unmatched_dtype_data(self, dtype): + ds = Dataset(dtype=dtype, shape=(100,)) + + data = np.random.rand(100).astype("O") + ds.data = data + + @pytest.mark.parametrize("dtype", list(DTypes.names())) + def test_flexible_dtype(self, dtype): + ds = Dataset(dtype=None, shape=(100,)) + + data = np.random.rand(100).astype(DTypes.get(dtype).value) + ds.data = data + + assert ds.dtype == DTypes(type(ds.data.dtype)).name.lower() + + def test_dtype_mutation(self): + ds = Dataset(dtype="float32", shape=(100,)) + + ds.dtype = "float64" + + data = np.random.rand(100) + ds.data = data + + +class TestDatasetShape: + @pytest.mark.xfail(raises=ValueError) + @pytest.mark.parametrize( + ("shape", "data_shape"), + [ + ((0,), (100,)), + ((1,), (100,)), + ((99,), (100,)), + ((1, 1), (100,)), + ((100, None), (100,)), + ((None, None), (100,)), + ((None, 100), (100,)), + ], + ) + def test_unmatched_shape_data(self, shape, data_shape): + ds = Dataset(dtype="float64", shape=shape) + + data = np.random.rand(*data_shape) + ds.data = data + + @pytest.mark.parametrize( + ("shape", "data_shape"), + [ + ((None,), (0,)), + ((None,), (1,)), + ((None,), (100,)), + ((None, 0), (0, 0)), + ((None, 1), (1, 1)), + ((None, None), (1, 1)), + ((None, None), (10, 100)), + ((None, None, 1), (1, 1, 1)), + ], + ) + def test_flexible_shape(self, shape, data_shape): + ds = Dataset(dtype="float64", shape=shape) + + data = np.random.rand(*data_shape) + ds.data = data + + assert ds.shape == ds.data.shape + + def test_shape_mutation(self): + ds = Dataset(dtype="float64", shape=(1,)) + + ds.shape = (100,) + + data = np.random.rand(100) + ds.data = data + + +class TestCastDataset: + @pytest.fixture + def adapter(self): + return TypeAdapter(CastDataset) + + @pytest.mark.parametrize( + ("data", "dtype", "shape"), + [ + (np.random.rand(100), "float64", (100,)), + (np.random.rand(10).astype("str"), "str", (10,)), + (np.random.rand(1, 10, 100).astype("bytes"), "bytes", (1, 10, 100)), + ], + ) + def test_cast(self, adapter, data, shape, dtype): + ds = adapter.validate_python(data) + + assert ds.shape == shape and ds.dtype == dtype + + +class TestConstrainedDataset: + @pytest.mark.parametrize( + ("cds", "data"), + [ + (condataset(dtype_constraint="float64"), np.random.rand(10)), + (condataset(dtype_constraint="str"), np.random.rand(10).astype(str)), + ( + condataset(dtype_constraint=("float16", "float32", "float64")), + np.random.rand(10), + ), + ( + condataset(dtype_constraint=("float16", "float32", "float64")), + np.random.rand(10).astype("float16"), + ), + ( + condataset(dtype_constraint=("float16", "float32", "float64")), + np.random.rand(10).astype("float32"), + ), + ], + ) + def test_constrained_dataset_dtype(self, cds, data): + adapter = TypeAdapter(cds) + + adapter.validate_python(data) + + @pytest.mark.xfail(raises=ValueError) + @pytest.mark.parametrize( + ("cds", "data"), + [ + (condataset(dtype_constraint="float64"), np.random.rand(10).astype(str)), + (condataset(dtype_constraint="str"), np.random.rand(10)), + ( + condataset(dtype_constraint=("float16", "float32", "float64")), + np.random.rand(10).astype(str), + ), + ], + ) + def test_violate_dtype_constraint(self, cds, data): + adapter = TypeAdapter(cds) + + adapter.validate_python(data) + + @pytest.mark.parametrize( + ("cds", "data"), + [ + (condataset(min_dim=1, max_dim=1), np.random.rand(10)), + (condataset(min_dim=0, max_dim=1), np.random.rand(10)), + (condataset(max_dim=2), np.random.rand(10)), + (condataset(max_dim=3), np.random.rand(10, 10, 10)), + (condataset(min_dim=2), np.random.rand(10, 10)), + (condataset(min_dim=2), np.random.rand(10, 10, 10, 10, 10)), + (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10, 10, 10)), + (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10, 10)), + (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10)), + ], + ) + def test_constrained_dataset_dimension(self, cds, data): + adapter = TypeAdapter(cds) + + adapter.validate_python(data) + + @pytest.mark.xfail(raises=ValueError) + @pytest.mark.parametrize( + ("cds", "data"), + [ + (condataset(min_dim=1, max_dim=1), np.random.rand(10, 10)), + (condataset(min_dim=2, max_dim=3), np.random.rand(10)), + (condataset(min_dim=2, max_dim=3), np.random.rand(10, 10, 10, 10)), + ], + ) + def test_violate_dimension_constraint(self, cds, data): + adapter = TypeAdapter(cds) + + adapter.validate_python(data) + + @pytest.mark.parametrize( + ("cds", "data"), + [ + (condataset(shape_constraint=(None,)), np.random.rand(10)), + (condataset(shape_constraint=(10,)), np.random.rand(10)), + (condataset(shape_constraint=(None, None)), np.random.rand(1, 2)), + (condataset(shape_constraint=(1, None)), np.random.rand(1, 2)), + (condataset(shape_constraint=(1, 2)), np.random.rand(1, 2)), + (condataset(shape_constraint=(1, None, 3)), np.random.rand(1, 10, 3)), + ], + ) + def test_constrained_dataset_shape(self, cds, data): + adapter = TypeAdapter(cds) + + adapter.validate_python(data) + + @pytest.mark.xfail(raises=ValueError) + @pytest.mark.parametrize( + ("cds", "data"), + [ + (condataset(shape_constraint=(1,)), np.random.rand(10)), + (condataset(shape_constraint=(None,)), np.random.rand(10, 10)), + (condataset(shape_constraint=(None, 1)), np.random.rand(10, 10)), + (condataset(shape_constraint=(None, 1)), np.random.rand(1, 10)), + ], + ) + def test_violate_shape_constraint(self, cds, data): + adapter = TypeAdapter(cds) + + adapter.validate_python(data) diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 6970b07..8499609 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -13,41 +13,96 @@ # limitations under the License. # %% -import pathlib +import uuid +from typing import Dict, Optional import numpy as np import pytest -from oqd_dataschema.base import Dataset, mapping -from oqd_dataschema.datastore import Datastore -from oqd_dataschema.groups import ( - SinaraRawDataGroup, -) - +from oqd_dataschema import Dataset, Datastore, GroupBase # %% -@pytest.mark.parametrize( - "dtype", - [ - "int32", - "int64", - "float32", - "float64", - "complex64", - "complex128", - ], + +_Group = type( + f"_Group_{uuid.uuid4()}".replace("-", ""), + (GroupBase,), + { + "__annotations__": { + "x": Dataset, + "y": Dict[str, Dataset], + "z": Optional[Dataset], + }, + "y": {}, + "z": None, + }, ) -def test_serialize_deserialize(dtype): - data = np.ones([10, 10]).astype(dtype) - dataset = SinaraRawDataGroup(camera_images=Dataset(data=data)) - data = Datastore(groups={"test": dataset}) - filepath = pathlib.Path("test.h5") - data.model_dump_hdf5(filepath) - data_reload = Datastore.model_validate_hdf5(filepath) +class TestDatastore: + @pytest.mark.parametrize( + ("dtype", "np_dtype"), + [ + ("bool", np.dtypes.BoolDType), + ("int16", np.dtypes.Int16DType), + ("int32", np.dtypes.Int32DType), + ("int64", np.dtypes.Int64DType), + ("uint16", np.dtypes.UInt16DType), + ("uint32", np.dtypes.UInt32DType), + ("uint64", np.dtypes.UInt64DType), + ("float16", np.dtypes.Float16DType), + ("float32", np.dtypes.Float32DType), + ("float64", np.dtypes.Float64DType), + ("complex64", np.dtypes.Complex64DType), + ("complex128", np.dtypes.Complex128DType), + ("str", np.dtypes.StrDType), + ("bytes", np.dtypes.BytesDType), + ("string", np.dtypes.StringDType), + ], + ) + def test_serialize_deserialize_dtypes(self, dtype, np_dtype, tmp_path): + f = tmp_path / f"tmp{uuid.uuid4()}.h5" - assert data_reload.groups["test"].camera_images.data.dtype == mapping[dtype] + datastore = Datastore( + groups={"g1": _Group(x=Dataset(data=np.random.rand(1).astype(np_dtype)))} + ) + datastore.model_dump_hdf5(f) -# %% + Datastore.model_validate_hdf5(f) + + @pytest.mark.parametrize( + ("x", "y", "z"), + [ + ( + Dataset(data=np.random.rand(10)), + {}, + None, + ), + ( + Dataset(data=np.random.rand(10)), + {"f1": Dataset(data=np.random.rand(10))}, + None, + ), + ( + Dataset(data=np.random.rand(10)), + {"f1": Dataset(data=np.random.rand(10))}, + Dataset(data=np.random.rand(10)), + ), + ( + Dataset(data=np.random.rand(10)), + { + "f1": Dataset(data=np.random.rand(10)), + "f2": Dataset(data=np.random.rand(10)), + }, + Dataset(data=np.random.rand(10)), + ), + ], + ) + def test_serialize_deserialize_dataset_types(self, x, y, z, tmp_path): + f = tmp_path / f"tmp{uuid.uuid4()}.h5" + + datastore = Datastore(groups={"g1": _Group(x=x, y=y, z=z)}) + + datastore.model_dump_hdf5(f) + + Datastore.model_validate_hdf5(f) diff --git a/tests/test_group.py b/tests/test_group.py new file mode 100644 index 0000000..ca87fe6 --- /dev/null +++ b/tests/test_group.py @@ -0,0 +1,170 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# %% + +import uuid +from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple + +import numpy as np +import pytest + +from oqd_dataschema import CastDataset, Dataset, GroupBase, condataset + +######################################################################################## + + +class TestGroupDefinition: + @pytest.mark.parametrize( + "field_type", + [ + Dataset, + CastDataset, + Dict[str, Dataset], + Dict[str, CastDataset], + condataset(dtype_constraint="float32"), + condataset(dtype_constraint=("float16", "float32", "float64")), + condataset(min_dim=1), + condataset(max_dim=1), + condataset(min_dim=1, max_dim=2), + condataset(shape_constraint=(1,)), + condataset(shape_constraint=(None,)), + condataset(shape_constraint=(None, 1)), + condataset(shape_constraint=(None, None)), + Optional[Dataset], + ], + ) + def test_data_field_definition(self, field_type): + type( + f"_Group_{uuid.uuid4()}".replace("-", ""), + (GroupBase,), + {"__annotations__": {"x": field_type}}, + ) + + @pytest.mark.xfail(raises=TypeError) + @pytest.mark.parametrize( + "field_type", + [ + Any, + int, + List[int], + Tuple[int], + List[Dataset], + Tuple[Dataset], + Dict[int, Dataset], + ], + ) + def test_invalid_data_field_definition(self, field_type): + type( + f"_Group_{uuid.uuid4()}".replace("-", ""), + (GroupBase,), + {"__annotations__": {"x": field_type}}, + ) + + @pytest.mark.xfail(raises=AttributeError) + def test_overwriting_attrs(self): + type( + f"_Group_{uuid.uuid4()}".replace("-", ""), + (GroupBase,), + {"__annotations__": {"attrs": Dict[str, Any]}}, + ) + + @pytest.mark.xfail(raises=AttributeError) + def test_overwriting_class_(self): + groupname = f"_Group_{uuid.uuid4()}".replace("-", "") + type( + groupname, + (GroupBase,), + {"__annotations__": {"class_": Literal[groupname]}}, + ) + + @pytest.mark.parametrize( + ("field_type", "data"), + [ + (Dataset, Dataset(data=np.random.rand(100))), + (CastDataset, Dataset(data=np.random.rand(100))), + ( + Dict[str, Dataset], + { + "1": Dataset(data=np.random.rand(100)), + "2": Dataset(data=np.random.rand(100)), + }, + ), + ( + Dict[str, CastDataset], + { + "1": Dataset(data=np.random.rand(100)), + "2": Dataset(data=np.random.rand(100)), + }, + ), + (condataset(dtype_constraint="float64"), Dataset(data=np.random.rand(100))), + ( + condataset(dtype_constraint=("float16", "float32", "float64")), + Dataset(data=np.random.rand(100)), + ), + (Optional[Dataset], Dataset(data=np.random.rand(100))), + (Optional[Dataset], None), + ], + ) + def test_group_instantiation(self, field_type, data): + _Group = type( + f"_Group_{uuid.uuid4()}".replace("-", ""), + (GroupBase,), + {"__annotations__": {"x": field_type}}, + ) + + _Group(x=data) + + @pytest.mark.parametrize( + ("classvar_type"), + [ + ClassVar, + ClassVar[int], + ], + ) + def test_class_variable(self, classvar_type): + type( + f"_Group_{uuid.uuid4()}".replace("-", ""), + (GroupBase,), + {"__annotations__": {"x": classvar_type}}, + ) + + @pytest.mark.parametrize( + ("dataset"), + [ + Dataset(), + Dataset(data=np.random.rand(10)), + Dataset(dtype="float64", shape=(10,)), + Dataset(dtype="float64", shape=(10,), data=np.random.rand(10)), + ], + ) + def test_default_dataset(self, dataset): + _Group = type( + f"_Group_{uuid.uuid4()}".replace("-", ""), + (GroupBase,), + {"__annotations__": {"x": Dataset}, "x": dataset}, + ) + + g = _Group() + + assert ( + ( + (g.x.data == dataset.data).all() + and g.x.dtype == dataset.dtype + and g.x.shape == dataset.shape + and g.x.attrs == dataset.attrs + ) + if isinstance(dataset.data, np.ndarray) + else g.x == dataset + ) diff --git a/tests/test_groupregistry.py b/tests/test_groupregistry.py new file mode 100644 index 0000000..989ff4b --- /dev/null +++ b/tests/test_groupregistry.py @@ -0,0 +1,77 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# %% + +import pytest + +from oqd_dataschema import ( + CastDataset, + Dataset, + GroupBase, + GroupRegistry, + condataset, +) + + +class TestGroupRegistry: + def test_clear(self): + GroupRegistry.clear() + + GroupRegistry.groups = dict() + + def test_add_group(self): + GroupRegistry.clear() + + groups = set() + for k in "ABCDE": + groups.add( + type(f"_Group{k}", (GroupBase,), {"__annotations__": {"x": Dataset}}) + ) + + assert set(GroupRegistry.groups.values()) == groups + + def test_overwrite_group(self): + GroupRegistry.clear() + + _GroupA = type("_GroupA", (GroupBase,), {"__annotations__": {"x": Dataset}}) + + assert set(GroupRegistry.groups.values()) == {_GroupA} + + with pytest.warns(UserWarning): + _mGroupA = type( + "_GroupA", (GroupBase,), {"__annotations__": {"x": CastDataset}} + ) + + assert set(GroupRegistry.groups.values()) == {_mGroupA} + + @pytest.fixture + def group_generator(self): + def _groupgen(): + groups = [] + for k, dtype in zip( + "ABCDE", + ["str", "float64", "bytes", "bool", ("int16", "int32", "int64")], + ): + groups.append( + type( + f"_Group{k}", + (GroupBase,), + {"__annotations__": {"x": condataset(dtype_constraint=dtype)}}, + ) + ) + return groups + + return _groupgen diff --git a/tests/test_table.py b/tests/test_table.py new file mode 100644 index 0000000..ca3d3b0 --- /dev/null +++ b/tests/test_table.py @@ -0,0 +1,372 @@ +# Copyright 2024-2025 Open Quantum Design + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# %% + +import numpy as np +import pytest + +from oqd_dataschema import Table +from oqd_dataschema.base import DTypes + +######################################################################################## + + +class TestTable: + def test_empty_table(self): + Table(columns=[], shape=(100,)) + + @pytest.mark.xfail(raises=ValueError) + @pytest.mark.parametrize( + ("column"), + [ + [("c1", "bool"), ("c1", "int16")], + [ + ("c1", "str"), + ("c2", "int16"), + ("c3", "float64"), + ("c1", "complex128"), + ], + ], + ) + def test_duplicate_column(self, column): + Table(columns=column, shape=(100,)) + + +class TestTableDType: + @pytest.mark.parametrize( + ("dtype", "np_dtype"), + [ + ("bool", np.dtypes.BoolDType()), + ("int16", np.dtypes.Int16DType()), + ("int32", np.dtypes.Int32DType()), + ("int64", np.dtypes.Int64DType()), + ("uint16", np.dtypes.UInt16DType()), + ("uint32", np.dtypes.UInt32DType()), + ("uint64", np.dtypes.UInt64DType()), + ("float16", np.dtypes.Float16DType()), + ("float32", np.dtypes.Float32DType()), + ("float64", np.dtypes.Float64DType()), + ("complex64", np.dtypes.Complex64DType()), + ("complex128", np.dtypes.Complex128DType()), + ("str", np.dtypes.StrDType(16)), + ("bytes", np.dtypes.BytesDType(16)), + ], + ) + def test_dtypes(self, dtype, np_dtype): + tbl = Table(columns=[("c", dtype)], shape=(100,)) + + data = np.rec.fromarrays( + np.random.rand(1, 100), + dtype=np.dtype( + [ + ("c", np_dtype), + ] + ), + ) + tbl.data = data + + @pytest.mark.parametrize( + ("column", "np_dtype"), + [ + ( + [("c1", "bool"), ("c2", "int16")], + np.dtype( + [("c1", np.dtypes.BoolDType()), ("c2", np.dtypes.Int16DType())] + ), + ), + ( + [ + ("c1", "str"), + ("c2", "int16"), + ("c3", "float64"), + ("c4", "complex128"), + ], + np.dtype( + [ + ("c1", np.dtypes.StrDType(16)), + ("c2", np.dtypes.Int16DType()), + ("c3", np.dtypes.Float64DType()), + ("c4", np.dtypes.Complex128DType()), + ] + ), + ), + ], + ) + def test_multi_column_dtypes(self, column, np_dtype): + tbl = Table(columns=column, shape=(100,)) + + data = np.rec.fromarrays(np.random.rand(len(column), 100), dtype=np_dtype) + tbl.data = data + + @pytest.mark.xfail(raises=ValueError) + @pytest.mark.parametrize( + "dtype", + [ + "bool", + "int16", + "int32", + "int64", + "uint16", + "uint32", + "uint64", + "float16", + "float32", + "float64", + "complex64", + "complex128", + "str", + "bytes", + ], + ) + def test_unmatched_dtype_data(self, dtype): + tbl = Table(columns=[("c", dtype)], shape=(100,)) + + data = np.rec.fromarrays( + np.random.rand(1, 100), + dtype=np.dtype( + [ + ("c", "O"), + ] + ), + ) + tbl.data = data + + @pytest.mark.parametrize( + "np_dtype", + [ + np.dtypes.BoolDType(), + np.dtypes.Int16DType(), + np.dtypes.Int32DType(), + np.dtypes.Int64DType(), + np.dtypes.UInt16DType(), + np.dtypes.UInt32DType(), + np.dtypes.UInt64DType(), + np.dtypes.Float16DType(), + np.dtypes.Float32DType(), + np.dtypes.Float64DType(), + np.dtypes.Complex64DType(), + np.dtypes.Complex128DType(), + np.dtypes.StrDType(16), + np.dtypes.BytesDType(16), + ], + ) + def test_flexible_dtype(self, np_dtype): + tbl = Table(columns=[("c", None)], shape=(100,)) + + data = np.rec.fromarrays( + np.random.rand(1, 100), + dtype=np.dtype( + [ + ("c", np_dtype), + ] + ), + ) + tbl.data = data + + assert ( + dict(tbl.columns)["c"] + == DTypes(type(tbl.data.dtype.fields["c"][0])).name.lower() + ) + + def test_dtype_mutation(self): + tbl = Table(columns=[("c", "float32")], shape=(100,)) + + tbl.columns[0] = ("c", "float64") + + data = np.rec.fromarrays( + np.random.rand(1, 100), + dtype=np.dtype( + [ + ("c", "float64"), + ] + ), + ) + tbl.data = data + + +# class TestDatasetShape: +# @pytest.mark.xfail(raises=ValueError) +# @pytest.mark.parametrize( +# ("shape", "data_shape"), +# [ +# ((0,), (100,)), +# ((1,), (100,)), +# ((99,), (100,)), +# ((1, 1), (100,)), +# ((100, None), (100,)), +# ((None, None), (100,)), +# ((None, 100), (100,)), +# ], +# ) +# def test_unmatched_shape_data(self, shape, data_shape): +# ds = Dataset(dtype="float64", shape=shape) + +# data = np.random.rand(*data_shape) +# ds.data = data + +# @pytest.mark.parametrize( +# ("shape", "data_shape"), +# [ +# ((None,), (0,)), +# ((None,), (1,)), +# ((None,), (100,)), +# ((None, 0), (0, 0)), +# ((None, 1), (1, 1)), +# ((None, None), (1, 1)), +# ((None, None), (10, 100)), +# ((None, None, 1), (1, 1, 1)), +# ], +# ) +# def test_flexible_shape(self, shape, data_shape): +# ds = Dataset(dtype="float64", shape=shape) + +# data = np.random.rand(*data_shape) +# ds.data = data + +# assert ds.shape == ds.data.shape + +# def test_shape_mutation(self): +# ds = Dataset(dtype="float64", shape=(1,)) + +# ds.shape = (100,) + +# data = np.random.rand(100) +# ds.data = data + + +# class TestCastDataset: +# @pytest.fixture +# def adapter(self): +# return TypeAdapter(CastDataset) + +# @pytest.mark.parametrize( +# ("data", "dtype", "shape"), +# [ +# (np.random.rand(100), "float64", (100,)), +# (np.random.rand(10).astype("str"), "str", (10,)), +# (np.random.rand(1, 10, 100).astype("bytes"), "bytes", (1, 10, 100)), +# ], +# ) +# def test_cast(self, adapter, data, shape, dtype): +# ds = adapter.validate_python(data) + +# assert ds.shape == shape and ds.dtype == dtype + + +# class TestConstrainedDataset: +# @pytest.mark.parametrize( +# ("cds", "data"), +# [ +# (condataset(dtype_constraint="float64"), np.random.rand(10)), +# (condataset(dtype_constraint="str"), np.random.rand(10).astype(str)), +# ( +# condataset(dtype_constraint=("float16", "float32", "float64")), +# np.random.rand(10), +# ), +# ( +# condataset(dtype_constraint=("float16", "float32", "float64")), +# np.random.rand(10).astype("float16"), +# ), +# ( +# condataset(dtype_constraint=("float16", "float32", "float64")), +# np.random.rand(10).astype("float32"), +# ), +# ], +# ) +# def test_constrained_dataset_dtype(self, cds, data): +# adapter = TypeAdapter(cds) + +# adapter.validate_python(data) + +# @pytest.mark.xfail(raises=ValueError) +# @pytest.mark.parametrize( +# ("cds", "data"), +# [ +# (condataset(dtype_constraint="float64"), np.random.rand(10).astype(str)), +# (condataset(dtype_constraint="str"), np.random.rand(10)), +# ( +# condataset(dtype_constraint=("float16", "float32", "float64")), +# np.random.rand(10).astype(str), +# ), +# ], +# ) +# def test_violate_dtype_constraint(self, cds, data): +# adapter = TypeAdapter(cds) + +# adapter.validate_python(data) + +# @pytest.mark.parametrize( +# ("cds", "data"), +# [ +# (condataset(min_dim=1, max_dim=1), np.random.rand(10)), +# (condataset(min_dim=0, max_dim=1), np.random.rand(10)), +# (condataset(max_dim=2), np.random.rand(10)), +# (condataset(max_dim=3), np.random.rand(10, 10, 10)), +# (condataset(min_dim=2), np.random.rand(10, 10)), +# (condataset(min_dim=2), np.random.rand(10, 10, 10, 10, 10)), +# (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10, 10, 10)), +# (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10, 10)), +# (condataset(min_dim=2, max_dim=4), np.random.rand(10, 10)), +# ], +# ) +# def test_constrained_dataset_dimension(self, cds, data): +# adapter = TypeAdapter(cds) + +# adapter.validate_python(data) + +# @pytest.mark.xfail(raises=ValueError) +# @pytest.mark.parametrize( +# ("cds", "data"), +# [ +# (condataset(min_dim=1, max_dim=1), np.random.rand(10, 10)), +# (condataset(min_dim=2, max_dim=3), np.random.rand(10)), +# (condataset(min_dim=2, max_dim=3), np.random.rand(10, 10, 10, 10)), +# ], +# ) +# def test_violate_dimension_constraint(self, cds, data): +# adapter = TypeAdapter(cds) + +# adapter.validate_python(data) + +# @pytest.mark.parametrize( +# ("cds", "data"), +# [ +# (condataset(shape_constraint=(None,)), np.random.rand(10)), +# (condataset(shape_constraint=(10,)), np.random.rand(10)), +# (condataset(shape_constraint=(None, None)), np.random.rand(1, 2)), +# (condataset(shape_constraint=(1, None)), np.random.rand(1, 2)), +# (condataset(shape_constraint=(1, 2)), np.random.rand(1, 2)), +# (condataset(shape_constraint=(1, None, 3)), np.random.rand(1, 10, 3)), +# ], +# ) +# def test_constrained_dataset_shape(self, cds, data): +# adapter = TypeAdapter(cds) + +# adapter.validate_python(data) + +# @pytest.mark.xfail(raises=ValueError) +# @pytest.mark.parametrize( +# ("cds", "data"), +# [ +# (condataset(shape_constraint=(1,)), np.random.rand(10)), +# (condataset(shape_constraint=(None,)), np.random.rand(10, 10)), +# (condataset(shape_constraint=(None, 1)), np.random.rand(10, 10)), +# (condataset(shape_constraint=(None, 1)), np.random.rand(1, 10)), +# ], +# ) +# def test_violate_shape_constraint(self, cds, data): +# adapter = TypeAdapter(cds) + +# adapter.validate_python(data) diff --git a/tests/test_typeadapt.py b/tests/test_typeadapt.py deleted file mode 100644 index 609f09c..0000000 --- a/tests/test_typeadapt.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2024-2025 Open Quantum Design - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# %% -import pathlib - -import numpy as np - -from oqd_dataschema.base import Dataset, GroupBase -from oqd_dataschema.datastore import Datastore -from oqd_dataschema.groups import ( - SinaraRawDataGroup, -) - - -# %% -def test_adapt(): - class TestNewGroup(GroupBase): - """ """ - - array: Dataset - - filepath = pathlib.Path("test.h5") - - data = np.ones([10, 10]).astype("int64") - group1 = TestNewGroup(array=Dataset(data=data)) - - data = np.ones([10, 10]).astype("int32") - group2 = SinaraRawDataGroup(camera_images=Dataset(data=data)) - - datastore = Datastore( - groups={ - "group1": group1, - "group2": group2, - } - ) - datastore.model_dump_hdf5(filepath, mode="w") - - Datastore.model_validate_hdf5(filepath) diff --git a/uv.lock b/uv.lock index 22accdc..e015315 100644 --- a/uv.lock +++ b/uv.lock @@ -2,7 +2,8 @@ version = 1 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.14'", - "python_full_version >= '3.11' and python_full_version < '3.14'", + "python_full_version >= '3.12' and python_full_version < '3.14'", + "python_full_version == '3.11.*'", "python_full_version < '3.11'", ] @@ -166,15 +167,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/eb/f4151e0c7377a6e08a38108609ba5cede57986802757848688aeedd1b9e8/beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a", size = 105113 }, ] -[[package]] -name = "bidict" -version = "0.23.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9a/6e/026678aa5a830e07cd9498a05d3e7e650a4f56a42f267a53d22bcda1bdc9/bidict-0.23.1.tar.gz", hash = "sha256:03069d763bc387bbd20e7d49914e75fc4132a41937fa3405417e1a5a2d006d71", size = 29093 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/99/37/e8730c3587a65eb5645d4aba2d27aae48e8003614d6aaf15dda67f702f1f/bidict-0.23.1-py3-none-any.whl", hash = "sha256:5dae8d4d79b552a71cbabc7deb25dfe8ce710b17ff41711e13010ead2abfc3e5", size = 32764 }, -] - [[package]] name = "bleach" version = "6.2.0" @@ -525,33 +517,33 @@ wheels = [ [[package]] name = "h5py" -version = "3.13.0" +version = "3.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/03/2e/a22d6a8bfa6f8be33e7febd985680fba531562795f0a9077ed1eb047bfb0/h5py-3.13.0.tar.gz", hash = "sha256:1870e46518720023da85d0895a1960ff2ce398c5671eac3b1a41ec696b7105c3", size = 414876 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/02/8a/bc76588ff1a254e939ce48f30655a8f79fac614ca8bd1eda1a79fa276671/h5py-3.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5540daee2b236d9569c950b417f13fd112d51d78b4c43012de05774908dff3f5", size = 3413286 }, - { url = "https://files.pythonhosted.org/packages/19/bd/9f249ecc6c517b2796330b0aab7d2351a108fdbd00d4bb847c0877b5533e/h5py-3.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:10894c55d46df502d82a7a4ed38f9c3fdbcb93efb42e25d275193e093071fade", size = 2915673 }, - { url = "https://files.pythonhosted.org/packages/72/71/0dd079208d7d3c3988cebc0776c2de58b4d51d8eeb6eab871330133dfee6/h5py-3.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb267ce4b83f9c42560e9ff4d30f60f7ae492eacf9c7ede849edf8c1b860e16b", size = 4283822 }, - { url = "https://files.pythonhosted.org/packages/d8/fa/0b6a59a1043c53d5d287effa02303bd248905ee82b25143c7caad8b340ad/h5py-3.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2cf6a231a07c14acd504a945a6e9ec115e0007f675bde5e0de30a4dc8d86a31", size = 4548100 }, - { url = "https://files.pythonhosted.org/packages/12/42/ad555a7ff7836c943fe97009405566dc77bcd2a17816227c10bd067a3ee1/h5py-3.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:851ae3a8563d87a5a0dc49c2e2529c75b8842582ccaefbf84297d2cfceeacd61", size = 2950547 }, - { url = "https://files.pythonhosted.org/packages/86/2b/50b15fdefb577d073b49699e6ea6a0a77a3a1016c2b67e2149fc50124a10/h5py-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8a8e38ef4ceb969f832cc230c0cf808c613cc47e31e768fd7b1106c55afa1cb8", size = 3422922 }, - { url = "https://files.pythonhosted.org/packages/94/59/36d87a559cab9c59b59088d52e86008d27a9602ce3afc9d3b51823014bf3/h5py-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f35640e81b03c02a88b8bf99fb6a9d3023cc52f7c627694db2f379e0028f2868", size = 2921619 }, - { url = "https://files.pythonhosted.org/packages/37/ef/6f80b19682c0b0835bbee7b253bec9c16af9004f2fd6427b1dd858100273/h5py-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:337af114616f3656da0c83b68fcf53ecd9ce9989a700b0883a6e7c483c3235d4", size = 4259366 }, - { url = "https://files.pythonhosted.org/packages/03/71/c99f662d4832c8835453cf3476f95daa28372023bda4aa1fca9e97c24f09/h5py-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:782ff0ac39f455f21fd1c8ebc007328f65f43d56718a89327eec76677ebf238a", size = 4509058 }, - { url = "https://files.pythonhosted.org/packages/56/89/e3ff23e07131ff73a72a349be9639e4de84e163af89c1c218b939459a98a/h5py-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:22ffe2a25770a2d67213a1b94f58006c14dce06933a42d2aaa0318c5868d1508", size = 2966428 }, - { url = "https://files.pythonhosted.org/packages/d8/20/438f6366ba4ded80eadb38f8927f5e2cd6d2e087179552f20ae3dbcd5d5b/h5py-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:477c58307b6b9a2509c59c57811afb9f598aedede24a67da808262dfa0ee37b4", size = 3384442 }, - { url = "https://files.pythonhosted.org/packages/10/13/cc1cb7231399617d9951233eb12fddd396ff5d4f7f057ee5d2b1ca0ee7e7/h5py-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:57c4c74f627c616f02b7aec608a8c706fe08cb5b0ba7c08555a4eb1dde20805a", size = 2917567 }, - { url = "https://files.pythonhosted.org/packages/9e/d9/aed99e1c858dc698489f916eeb7c07513bc864885d28ab3689d572ba0ea0/h5py-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:357e6dc20b101a805ccfd0024731fbaf6e8718c18c09baf3b5e4e9d198d13fca", size = 4669544 }, - { url = "https://files.pythonhosted.org/packages/a7/da/3c137006ff5f0433f0fb076b1ebe4a7bf7b5ee1e8811b5486af98b500dd5/h5py-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6f13f9b5ce549448c01e4dfe08ea8d1772e6078799af2c1c8d09e941230a90d", size = 4932139 }, - { url = "https://files.pythonhosted.org/packages/25/61/d897952629cae131c19d4c41b2521e7dd6382f2d7177c87615c2e6dced1a/h5py-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:21daf38171753899b5905f3d82c99b0b1ec2cbbe282a037cad431feb620e62ec", size = 2954179 }, - { url = "https://files.pythonhosted.org/packages/60/43/f276f27921919a9144074320ce4ca40882fc67b3cfee81c3f5c7df083e97/h5py-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e520ec76de00943dd017c8ea3f354fa1d2f542eac994811943a8faedf2a7d5cb", size = 3358040 }, - { url = "https://files.pythonhosted.org/packages/1b/86/ad4a4cf781b08d4572be8bbdd8f108bb97b266a14835c640dc43dafc0729/h5py-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e79d8368cd9295045956bfb436656bea3f915beaa11d342e9f79f129f5178763", size = 2892766 }, - { url = "https://files.pythonhosted.org/packages/69/84/4c6367d6b58deaf0fa84999ec819e7578eee96cea6cbd613640d0625ed5e/h5py-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56dd172d862e850823c4af02dc4ddbc308f042b85472ffdaca67f1598dff4a57", size = 4664255 }, - { url = "https://files.pythonhosted.org/packages/fd/41/bc2df86b72965775f6d621e0ee269a5f3ac23e8f870abf519de9c7d93b4d/h5py-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be949b46b7388074c5acae017fbbe3e5ba303fd9daaa52157fdfef30bbdacadd", size = 4927580 }, - { url = "https://files.pythonhosted.org/packages/97/34/165b87ea55184770a0c1fcdb7e017199974ad2e271451fd045cfe35f3add/h5py-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:4f97ecde7ac6513b21cd95efdfc38dc6d19f96f6ca6f2a30550e94e551458e0a", size = 2940890 }, +sdist = { url = "https://files.pythonhosted.org/packages/5d/57/dfb3c5c3f1bf5f5ef2e59a22dec4ff1f3d7408b55bfcefcfb0ea69ef21c6/h5py-3.14.0.tar.gz", hash = "sha256:2372116b2e0d5d3e5e705b7f663f7c8d96fa79a4052d250484ef91d24d6a08f4", size = 424323 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/89/06cbb421e01dea2e338b3154326523c05d9698f89a01f9d9b65e1ec3fb18/h5py-3.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:24df6b2622f426857bda88683b16630014588a0e4155cba44e872eb011c4eaed", size = 3332522 }, + { url = "https://files.pythonhosted.org/packages/c3/e7/6c860b002329e408348735bfd0459e7b12f712c83d357abeef3ef404eaa9/h5py-3.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ff2389961ee5872de697054dd5a033b04284afc3fb52dc51d94561ece2c10c6", size = 2831051 }, + { url = "https://files.pythonhosted.org/packages/fa/cd/3dd38cdb7cc9266dc4d85f27f0261680cb62f553f1523167ad7454e32b11/h5py-3.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:016e89d3be4c44f8d5e115fab60548e518ecd9efe9fa5c5324505a90773e6f03", size = 4324677 }, + { url = "https://files.pythonhosted.org/packages/b1/45/e1a754dc7cd465ba35e438e28557119221ac89b20aaebef48282654e3dc7/h5py-3.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1223b902ef0b5d90bcc8a4778218d6d6cd0f5561861611eda59fa6c52b922f4d", size = 4557272 }, + { url = "https://files.pythonhosted.org/packages/5c/06/f9506c1531645829d302c420851b78bb717af808dde11212c113585fae42/h5py-3.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:852b81f71df4bb9e27d407b43071d1da330d6a7094a588efa50ef02553fa7ce4", size = 2866734 }, + { url = "https://files.pythonhosted.org/packages/61/1b/ad24a8ce846cf0519695c10491e99969d9d203b9632c4fcd5004b1641c2e/h5py-3.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f30dbc58f2a0efeec6c8836c97f6c94afd769023f44e2bb0ed7b17a16ec46088", size = 3352382 }, + { url = "https://files.pythonhosted.org/packages/36/5b/a066e459ca48b47cc73a5c668e9924d9619da9e3c500d9fb9c29c03858ec/h5py-3.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:543877d7f3d8f8a9828ed5df6a0b78ca3d8846244b9702e99ed0d53610b583a8", size = 2852492 }, + { url = "https://files.pythonhosted.org/packages/08/0c/5e6aaf221557314bc15ba0e0da92e40b24af97ab162076c8ae009320a42b/h5py-3.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c497600c0496548810047257e36360ff551df8b59156d3a4181072eed47d8ad", size = 4298002 }, + { url = "https://files.pythonhosted.org/packages/21/d4/d461649cafd5137088fb7f8e78fdc6621bb0c4ff2c090a389f68e8edc136/h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:723a40ee6505bd354bfd26385f2dae7bbfa87655f4e61bab175a49d72ebfc06b", size = 4516618 }, + { url = "https://files.pythonhosted.org/packages/db/0c/6c3f879a0f8e891625817637fad902da6e764e36919ed091dc77529004ac/h5py-3.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:d2744b520440a996f2dae97f901caa8a953afc055db4673a993f2d87d7f38713", size = 2874888 }, + { url = "https://files.pythonhosted.org/packages/3e/77/8f651053c1843391e38a189ccf50df7e261ef8cd8bfd8baba0cbe694f7c3/h5py-3.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e0045115d83272090b0717c555a31398c2c089b87d212ceba800d3dc5d952e23", size = 3312740 }, + { url = "https://files.pythonhosted.org/packages/ff/10/20436a6cf419b31124e59fefc78d74cb061ccb22213226a583928a65d715/h5py-3.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6da62509b7e1d71a7d110478aa25d245dd32c8d9a1daee9d2a42dba8717b047a", size = 2829207 }, + { url = "https://files.pythonhosted.org/packages/3f/19/c8bfe8543bfdd7ccfafd46d8cfd96fce53d6c33e9c7921f375530ee1d39a/h5py-3.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:554ef0ced3571366d4d383427c00c966c360e178b5fb5ee5bb31a435c424db0c", size = 4708455 }, + { url = "https://files.pythonhosted.org/packages/86/f9/f00de11c82c88bfc1ef22633557bfba9e271e0cb3189ad704183fc4a2644/h5py-3.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cbd41f4e3761f150aa5b662df991868ca533872c95467216f2bec5fcad84882", size = 4929422 }, + { url = "https://files.pythonhosted.org/packages/7a/6d/6426d5d456f593c94b96fa942a9b3988ce4d65ebaf57d7273e452a7222e8/h5py-3.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:bf4897d67e613ecf5bdfbdab39a1158a64df105827da70ea1d90243d796d367f", size = 2862845 }, + { url = "https://files.pythonhosted.org/packages/6c/c2/7efe82d09ca10afd77cd7c286e42342d520c049a8c43650194928bcc635c/h5py-3.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:aa4b7bbce683379b7bf80aaba68e17e23396100336a8d500206520052be2f812", size = 3289245 }, + { url = "https://files.pythonhosted.org/packages/4f/31/f570fab1239b0d9441024b92b6ad03bb414ffa69101a985e4c83d37608bd/h5py-3.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ef9603a501a04fcd0ba28dd8f0995303d26a77a980a1f9474b3417543d4c6174", size = 2807335 }, + { url = "https://files.pythonhosted.org/packages/0d/ce/3a21d87896bc7e3e9255e0ad5583ae31ae9e6b4b00e0bcb2a67e2b6acdbc/h5py-3.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8cbaf6910fa3983c46172666b0b8da7b7bd90d764399ca983236f2400436eeb", size = 4700675 }, + { url = "https://files.pythonhosted.org/packages/e7/ec/86f59025306dcc6deee5fda54d980d077075b8d9889aac80f158bd585f1b/h5py-3.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d90e6445ab7c146d7f7981b11895d70bc1dd91278a4f9f9028bc0c95e4a53f13", size = 4921632 }, + { url = "https://files.pythonhosted.org/packages/3f/6d/0084ed0b78d4fd3e7530c32491f2884140d9b06365dac8a08de726421d4a/h5py-3.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:ae18e3de237a7a830adb76aaa68ad438d85fe6e19e0d99944a3ce46b772c69b3", size = 2852929 }, ] [[package]] @@ -665,7 +657,8 @@ version = "9.5.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14'", - "python_full_version >= '3.11' and python_full_version < '3.14'", + "python_full_version >= '3.12' and python_full_version < '3.14'", + "python_full_version == '3.11.*'", ] dependencies = [ { name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" }, @@ -1436,8 +1429,8 @@ name = "oqd-dataschema" version = "0.1.0" source = { editable = "." } dependencies = [ - { name = "bidict" }, { name = "h5py" }, + { name = "pandas" }, { name = "pydantic" }, ] @@ -1463,12 +1456,12 @@ dev = [ [package.metadata] requires-dist = [ - { name = "bidict", specifier = ">=0.23.1" }, - { name = "h5py", specifier = ">=3.13.0" }, + { name = "h5py", specifier = ">=3.14.0" }, { name = "mdx-truly-sane-lists", marker = "extra == 'docs'" }, { name = "mkdocs-material", marker = "extra == 'docs'" }, { name = "mkdocstrings", marker = "extra == 'docs'" }, { name = "mkdocstrings-python", marker = "extra == 'docs'" }, + { name = "pandas", specifier = ">=2.3.3" }, { name = "pydantic", specifier = ">=2.10.6" }, { name = "pymdown-extensions", marker = "extra == 'docs'" }, { name = "pytest", marker = "extra == 'tests'" }, @@ -1509,6 +1502,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/90/96/04b8e52da071d28f5e21a805b19cb9390aa17a47462ac87f5e2696b9566d/paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591", size = 13746 }, ] +[[package]] +name = "pandas" +version = "2.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/f7/f425a00df4fcc22b292c6895c6831c0c8ae1d9fac1e024d16f98a9ce8749/pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c", size = 11555763 }, + { url = "https://files.pythonhosted.org/packages/13/4f/66d99628ff8ce7857aca52fed8f0066ce209f96be2fede6cef9f84e8d04f/pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a", size = 10801217 }, + { url = "https://files.pythonhosted.org/packages/1d/03/3fc4a529a7710f890a239cc496fc6d50ad4a0995657dccc1d64695adb9f4/pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1", size = 12148791 }, + { url = "https://files.pythonhosted.org/packages/40/a8/4dac1f8f8235e5d25b9955d02ff6f29396191d4e665d71122c3722ca83c5/pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838", size = 12769373 }, + { url = "https://files.pythonhosted.org/packages/df/91/82cc5169b6b25440a7fc0ef3a694582418d875c8e3ebf796a6d6470aa578/pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250", size = 13200444 }, + { url = "https://files.pythonhosted.org/packages/10/ae/89b3283800ab58f7af2952704078555fa60c807fff764395bb57ea0b0dbd/pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4", size = 13858459 }, + { url = "https://files.pythonhosted.org/packages/85/72/530900610650f54a35a19476eca5104f38555afccda1aa11a92ee14cb21d/pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826", size = 11346086 }, + { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790 }, + { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831 }, + { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267 }, + { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281 }, + { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453 }, + { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361 }, + { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702 }, + { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846 }, + { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618 }, + { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212 }, + { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693 }, + { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002 }, + { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971 }, + { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722 }, + { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671 }, + { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807 }, + { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872 }, + { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371 }, + { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333 }, + { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120 }, + { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991 }, + { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227 }, + { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056 }, + { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189 }, + { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912 }, + { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160 }, + { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233 }, + { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635 }, + { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079 }, + { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049 }, + { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638 }, + { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834 }, + { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925 }, + { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071 }, + { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504 }, + { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702 }, + { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535 }, + { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582 }, + { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963 }, + { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175 }, +] + [[package]] name = "pandocfilters" version = "1.5.1" @@ -1796,6 +1850,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163 }, ] +[[package]] +name = "pytz" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, +] + [[package]] name = "pywin32" version = "311" @@ -2366,6 +2429,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, ] +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, +] + [[package]] name = "uri-template" version = "1.3.0"