diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9b64c97d0a..c36428b300 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,7 +3,7 @@ TODO: * [ ] Add unit tests and/or doctests in docstrings * [ ] Add docstrings and API docs for any new/modified user-facing classes and functions -* [ ] New/modified features documented in `docs/user-guide/*.rst` +* [ ] New/modified features documented in `docs/user-guide/*.md` * [ ] Changes documented as a new file in `changes/` * [ ] GitHub Actions have all passed * [ ] Test coverage is 100% (Codecov passes) diff --git a/.github/labeler.yml b/.github/labeler.yml index ede89c9d35..7eb74211ea 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,4 +1,4 @@ needs release notes: - all: - changed-files: - - all-globs-to-all-files: '!changes/*.rst' + - all-globs-to-all-files: '!changes/*.md' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 70303060ea..e58c8f9dc9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -129,11 +129,11 @@ jobs: pip install hatch - name: Set Up Hatch Env run: | - hatch env create doctest - hatch env run -e doctest list-env + hatch env create docs + hatch env run -e docs list-env - name: Run Tests run: | - hatch env run --env doctest run + hatch env run --env docs check test-complete: name: Test complete diff --git a/.gitignore b/.gitignore index 1b2b63e651..f2f41270ca 100644 --- a/.gitignore +++ b/.gitignore @@ -49,9 +49,9 @@ coverage.xml # Django stuff: *.log -# Sphinx documentation +# Documentation +site/ docs/_build/ -docs/api docs/data data data.zip diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3a75601daa..3e2826f10f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,6 +20,7 @@ repos: rev: v6.0.0 hooks: - id: check-yaml + exclude: mkdocs.yml - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.17.1 diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 6253a7196f..894778c5a4 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,12 +11,11 @@ build: then towncrier build --version Unreleased --yes; fi - -sphinx: - configuration: docs/conf.py - fail_on_warning: true - -formats: all + build: + html: + - mkdocs build --strict --site-dir $READTHEDOCS_OUTPUT/html +mkdocs: + configuration: mkdocs.yml python: install: @@ -24,3 +23,4 @@ python: path: . extra_requirements: - docs + - remote diff --git a/changes/README.md b/changes/README.md index 74ed9f94a9..889a52baa4 100644 --- a/changes/README.md +++ b/changes/README.md @@ -1,7 +1,7 @@ Writing a changelog entry ------------------------- -Please put a new file in this directory named `xxxx..rst`, where +Please put a new file in this directory named `xxxx..md`, where - `xxxx` is the pull request number associated with this entry - `` is one of: diff --git a/ci/check_changelog_entries.py b/ci/check_changelog_entries.py index 9f883f0be4..da2700e32a 100644 --- a/ci/check_changelog_entries.py +++ b/ci/check_changelog_entries.py @@ -25,13 +25,13 @@ def is_int(s: str) -> bool: print(f"Found {len(entries)} entries") print() - bad_suffix = [e for e in entries if e.suffix != ".rst"] + bad_suffix = [e for e in entries if e.suffix != ".md"] bad_issue_no = [e for e in entries if not is_int(e.name.split(".")[0])] bad_type = [e for e in entries if e.name.split(".")[1] not in VALID_CHANGELOG_TYPES] if len(bad_suffix) or len(bad_issue_no) or len(bad_type): if len(bad_suffix): - print("Changelog entries without .rst suffix") + print("Changelog entries without .md suffix") print("-------------------------------------") print("\n".join([p.name for p in bad_suffix])) print() diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index f42ee840e9..0000000000 --- a/docs/Makefile +++ /dev/null @@ -1,231 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -W --keep-going -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build - -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) - $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from https://www.sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " epub3 to make an epub3" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - @echo " dummy to check syntax errors of document sources" - -.PHONY: clean -clean: - rm -rf $(BUILDDIR)/* - rm -rf $(BUILDDIR)/../api - -.PHONY: html -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -.PHONY: dirhtml -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -.PHONY: singlehtml -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -.PHONY: pickle -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -.PHONY: json -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -.PHONY: htmlhelp -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -.PHONY: qthelp -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/zarr.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zarr.qhc" - -.PHONY: applehelp -applehelp: - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." - -.PHONY: devhelp -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/zarr" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/zarr" - @echo "# devhelp" - -.PHONY: epub -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -.PHONY: epub3 -epub3: - $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 - @echo - @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." - -.PHONY: latex -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -.PHONY: latexpdf -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: latexpdfja -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: text -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -.PHONY: man -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -.PHONY: texinfo -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -.PHONY: info -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -.PHONY: gettext -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -.PHONY: changes -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -.PHONY: linkcheck -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -.PHONY: doctest -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -.PHONY: coverage -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - -.PHONY: xml -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -.PHONY: pseudoxml -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." - -.PHONY: dummy -dummy: - $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy - @echo - @echo "Build finished. Dummy builder generates no files." diff --git a/docs/_static/custom.css b/docs/_static/custom.css deleted file mode 100644 index 1d32606f9a..0000000000 --- a/docs/_static/custom.css +++ /dev/null @@ -1,110 +0,0 @@ -@import url('https://fonts.googleapis.com/css2?family=Lato:ital,wght@0,400;0,700;0,900;1,400;1,700;1,900&family=Open+Sans:ital,wght@0,400;0,600;1,400;1,600&display=swap'); - -body { - font-family: 'Open Sans', sans-serif; -} - -pre, code { - font-size: 100%; - line-height: 155%; -} - -/* Style the active version button. - -- dev: orange -- stable: green -- old, PR: red - -Colors from: - -Wong, B. Points of view: Color blindness. -Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 -*/ - -/* If the active version has the name "dev", style it orange */ -#version_switcher_button[data-active-version-name*="dev"] { - background-color: #E69F00; - border-color: #E69F00; - color:#000000; -} - -/* green for `stable` */ -#version_switcher_button[data-active-version-name*="stable"] { - background-color: #009E73; - border-color: #009E73; -} - -/* red for `old` */ -#version_switcher_button:not([data-active-version-name*="stable"], [data-active-version-name*="dev"], [data-active-version-name=""]) { - background-color: #980F0F; - border-color: #980F0F; -} - -/* Main page overview cards */ - -.sd-card { - background: #fff; - border-radius: 0; - padding: 30px 10px 20px 10px; - margin: 10px 0px; -} - -.sd-card .sd-card-header { - text-align: center; -} - -.sd-card .sd-card-header .sd-card-text { - margin: 0px; -} - -.sd-card .sd-card-img-top { - height: 52px; - width: 52px; - margin-left: auto; - margin-right: auto; -} - -.sd-card .sd-card-header { - border: none; - background-color: white; - font-size: var(--pst-font-size-h5); - font-weight: bold; - padding: 2.5rem 0rem 0.5rem 0rem; -} - -.sd-card .sd-card-footer { - border: none; - background-color: white; -} - -.sd-card .sd-card-footer .sd-card-text { - max-width: 220px; - margin-left: auto; - margin-right: auto; -} - -/* Dark theme tweaking */ -html[data-theme=dark] .sd-card img[src*='.svg'] { - filter: invert(0.82) brightness(0.8) contrast(1.2); -} - -/* Main index page overview cards */ -html[data-theme=dark] .sd-card { - background-color:var(--pst-color-background); -} - -html[data-theme=dark] .sd-shadow-sm { - box-shadow: 0 .1rem 1rem rgba(250, 250, 250, .6) !important -} - -html[data-theme=dark] .sd-card .sd-card-header { - background-color:var(--pst-color-background); -} - -html[data-theme=dark] .sd-card .sd-card-footer { - background-color:var(--pst-color-background); -} - -html[data-theme=dark] h1 { - color: var(--pst-color-primary); -} diff --git a/docs/_static/custom.js b/docs/_static/custom.js deleted file mode 100644 index 52f1cba9e0..0000000000 --- a/docs/_static/custom.js +++ /dev/null @@ -1,17 +0,0 @@ -// handle redirects -(() => { - let anchorMap = { - "installation": "installation.html", - "getting-started": "getting_started.html#getting-started", - "highlights": "getting_started.html#highlights", - "contributing": "contributing.html", - "projects-using-zarr": "getting_started.html#projects-using-zarr", - "contents": "getting_started.html#contents", - "indices-and-tables": "api.html#indices-and-tables" - } - - let hash = window.location.hash.substring(1); - if (hash && hash in anchorMap) { - window.location.replace(anchorMap[hash]); - } -})(); diff --git a/docs/_static/index_api.svg b/docs/_static/index_api.svg deleted file mode 100644 index 69f7ba1d2d..0000000000 --- a/docs/_static/index_api.svg +++ /dev/null @@ -1,97 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - diff --git a/docs/_static/index_contribute.svg b/docs/_static/index_contribute.svg deleted file mode 100644 index de3d902379..0000000000 --- a/docs/_static/index_contribute.svg +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - diff --git a/docs/_static/index_getting_started.svg b/docs/_static/index_getting_started.svg deleted file mode 100644 index 2d36622cb7..0000000000 --- a/docs/_static/index_getting_started.svg +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - diff --git a/docs/_static/index_user_guide.svg b/docs/_static/index_user_guide.svg deleted file mode 100644 index bd17053517..0000000000 --- a/docs/_static/index_user_guide.svg +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - diff --git a/docs/about.rst b/docs/about.rst deleted file mode 100644 index d2844dc864..0000000000 --- a/docs/about.rst +++ /dev/null @@ -1,24 +0,0 @@ -About -===== - -Zarr is a format for the storage of chunked, compressed, N-dimensional arrays -inspired by `HDF5 `_, `h5py -`_ and `bcolz `_. - -These documents describe the Zarr-Python implementation. More information -about the Zarr format can be found on the `main website `_. - -Projects using Zarr -------------------- - -If you are using Zarr-Python, we would `love to hear about it -`_. - -Funding -------- -The project is fiscally sponsored by `NumFOCUS `_, a US -501(c)(3) public charity, and development is supported by the -`MRC Centre for Genomics and Global Health `_ -and the `Chan Zuckerberg Initiative `_. - -.. _NumCodecs: https://numcodecs.readthedocs.io/ diff --git a/docs/api/abc/buffer.md b/docs/api/abc/buffer.md new file mode 100644 index 0000000000..ac814d20b6 --- /dev/null +++ b/docs/api/abc/buffer.md @@ -0,0 +1,5 @@ +--- +title: buffer +--- + +::: zarr.abc.buffer diff --git a/docs/api/abc/codec.md b/docs/api/abc/codec.md new file mode 100644 index 0000000000..d4eaecabe9 --- /dev/null +++ b/docs/api/abc/codec.md @@ -0,0 +1,5 @@ +--- +title: codec +--- + +::: zarr.abc.codec diff --git a/docs/api/abc/metadata.md b/docs/api/abc/metadata.md new file mode 100644 index 0000000000..7cc1e00662 --- /dev/null +++ b/docs/api/abc/metadata.md @@ -0,0 +1,5 @@ +--- +title: metadata +--- + +::: zarr.abc.metadata diff --git a/docs/api/abc/store.md b/docs/api/abc/store.md new file mode 100644 index 0000000000..f711448541 --- /dev/null +++ b/docs/api/abc/store.md @@ -0,0 +1,5 @@ +--- +title: store +--- + +::: zarr.abc.store diff --git a/docs/api/api_async.md b/docs/api/api_async.md new file mode 100644 index 0000000000..f5df894134 --- /dev/null +++ b/docs/api/api_async.md @@ -0,0 +1,5 @@ +--- +title: asynchronous +--- + +::: zarr.api.asynchronous \ No newline at end of file diff --git a/docs/api/api_sync.md b/docs/api/api_sync.md new file mode 100644 index 0000000000..83ff118db5 --- /dev/null +++ b/docs/api/api_sync.md @@ -0,0 +1,5 @@ +--- +title: synchronous +--- + +::: zarr.api.synchronous \ No newline at end of file diff --git a/docs/api/array.md b/docs/api/array.md new file mode 100644 index 0000000000..ff61cb1fe2 --- /dev/null +++ b/docs/api/array.md @@ -0,0 +1,2 @@ +::: zarr.Array +::: zarr.AsyncArray diff --git a/docs/api/buffer.md b/docs/api/buffer.md new file mode 100644 index 0000000000..aa089957cf --- /dev/null +++ b/docs/api/buffer.md @@ -0,0 +1,3 @@ +::: zarr.buffer +::: zarr.buffer.cpu +::: zarr.buffer.gpu diff --git a/docs/api/codecs.md b/docs/api/codecs.md new file mode 100644 index 0000000000..5cf66b304e --- /dev/null +++ b/docs/api/codecs.md @@ -0,0 +1,5 @@ +--- +title: codecs +--- + +::: zarr.codecs \ No newline at end of file diff --git a/docs/api/config.md b/docs/api/config.md new file mode 100644 index 0000000000..30803918f5 --- /dev/null +++ b/docs/api/config.md @@ -0,0 +1,5 @@ +--- +title: config +--- + +::: zarr.config diff --git a/docs/api/convenience.md b/docs/api/convenience.md new file mode 100644 index 0000000000..f2614e3724 --- /dev/null +++ b/docs/api/convenience.md @@ -0,0 +1,10 @@ +--- +title: convenience +--- + +::: zarr.consolidate_metadata +::: zarr.copy +::: zarr.copy_all +::: zarr.copy_store +::: zarr.print_debug_info +::: zarr.tree diff --git a/docs/api/create.md b/docs/api/create.md new file mode 100644 index 0000000000..971e9c293c --- /dev/null +++ b/docs/api/create.md @@ -0,0 +1,19 @@ +--- +title: create +--- + +::: zarr.array +::: zarr.create +::: zarr.create_array +::: zarr.create_group +::: zarr.create_hierarchy +::: zarr.empty +::: zarr.empty_like +::: zarr.full +::: zarr.full_like +::: zarr.from_array +::: zarr.group +::: zarr.ones +::: zarr.ones_like +::: zarr.zeros +::: zarr.zeros_like diff --git a/docs/api/deprecated/convenience.md b/docs/api/deprecated/convenience.md new file mode 100644 index 0000000000..91bcb15f71 --- /dev/null +++ b/docs/api/deprecated/convenience.md @@ -0,0 +1 @@ +::: zarr.convenience \ No newline at end of file diff --git a/docs/api/deprecated/creation.md b/docs/api/deprecated/creation.md new file mode 100644 index 0000000000..5d18a06a4a --- /dev/null +++ b/docs/api/deprecated/creation.md @@ -0,0 +1 @@ +::: zarr.creation diff --git a/docs/api/dtype.md b/docs/api/dtype.md new file mode 100644 index 0000000000..c08910b97f --- /dev/null +++ b/docs/api/dtype.md @@ -0,0 +1,5 @@ +--- +title: dtype +--- + +::: zarr.dtype diff --git a/docs/api/errors.md b/docs/api/errors.md new file mode 100644 index 0000000000..2ba2213071 --- /dev/null +++ b/docs/api/errors.md @@ -0,0 +1,5 @@ +--- +title: errors +--- + +::: zarr.errors \ No newline at end of file diff --git a/docs/api/group.md b/docs/api/group.md new file mode 100644 index 0000000000..0cf9372de2 --- /dev/null +++ b/docs/api/group.md @@ -0,0 +1,2 @@ +::: zarr.Group +::: zarr.AsyncGroup diff --git a/docs/api/index.md b/docs/api/index.md new file mode 100644 index 0000000000..8e6be1058e --- /dev/null +++ b/docs/api/index.md @@ -0,0 +1,64 @@ +# API Reference + +Complete reference documentation for the Zarr-Python API. + +## Core API + +### Essential Classes and Functions + +- **[Array](array.md)** - The main Zarr array class for N-dimensional data +- **[Group](group.md)** - Hierarchical organization of arrays and subgroups +- **[Create](create.md)** - Functions for creating new arrays and groups +- **[Open](open.md)** - Opening existing Zarr stores and arrays + +### Data Operations + +- **[Load](load.md)** - Loading data from Zarr stores +- **[Save](save.md)** - Saving data to Zarr format +- **[Convenience](convenience.md)** - High-level convenience functions + +### Data Types and Configuration + +- **[Data Types](dtype.md)** - Supported NumPy data types and type handling +- **[Configuration](config.md)** - Runtime configuration and settings + +## Storage and Compression + +- **[Codecs](codecs.md)** - Compression and filtering codecs +- **[Storage](storage.md)** - Storage backend implementations and interfaces +- **[Registry](registry.md)** - Codec and storage backend registry + +## API Variants + +Zarr-Python provides both synchronous and asynchronous APIs: + +- **[Async API](api_async.md)** - Asynchronous operations for concurrent access +- **[Sync API](api_sync.md)** - Synchronous operations for simple usage + +## Abstract Base Classes + +The ABC module defines interfaces for extending Zarr: + +- **[Codec ABC](abc/codec.md)** - Interface for custom compression codecs +- **[Metadata ABC](abc/metadata.md)** - Interface for metadata handling +- **[Store ABC](abc/store.md)** - Interface for custom storage backends + +## Utilities + +- **[Errors](errors.md)** - Exception classes and error handling +- **[Testing](testing.md)** - Utilities for testing Zarr-based code + + +## Migration and Compatibility + +- **[Deprecated Functions](deprecated/convenience.md)** - Legacy convenience functions +- **[Deprecated Creation](deprecated/creation.md)** - Legacy array creation functions + +These deprecated modules are maintained for backward compatibility but should be avoided in new code. + +## Getting Help + +- Check the [User Guide](../user-guide/index.md) for tutorials and examples +- Browse function signatures and docstrings in the API reference +- Report issues on [GitHub](https://github.com/zarr-developers/zarr-python) +- Join discussions on the [Zarr community forum](https://github.com/zarr-developers/community) diff --git a/docs/api/load.md b/docs/api/load.md new file mode 100644 index 0000000000..d6463ca976 --- /dev/null +++ b/docs/api/load.md @@ -0,0 +1,5 @@ +--- +title: load +--- + +::: zarr.load diff --git a/docs/api/open.md b/docs/api/open.md new file mode 100644 index 0000000000..c59f896129 --- /dev/null +++ b/docs/api/open.md @@ -0,0 +1,9 @@ +--- +title: open +--- + +::: zarr.open +::: zarr.open_array +::: zarr.open_consolidated +::: zarr.open_group +::: zarr.open_like diff --git a/docs/api/registry.md b/docs/api/registry.md new file mode 100644 index 0000000000..d2c3769596 --- /dev/null +++ b/docs/api/registry.md @@ -0,0 +1,5 @@ +--- +title: registry +--- + +::: zarr.registry \ No newline at end of file diff --git a/docs/api/save.md b/docs/api/save.md new file mode 100644 index 0000000000..c611d10a4c --- /dev/null +++ b/docs/api/save.md @@ -0,0 +1,7 @@ +--- +title: save +--- + +::: zarr.save +::: zarr.save_array +::: zarr.save_group diff --git a/docs/api/storage.md b/docs/api/storage.md new file mode 100644 index 0000000000..33580d1d8a --- /dev/null +++ b/docs/api/storage.md @@ -0,0 +1,11 @@ +--- +title: storage +--- + +## Attributes + +::: zarr.storage.StoreLike + +## Classes + +::: zarr.storage diff --git a/docs/api/testing.md b/docs/api/testing.md new file mode 100644 index 0000000000..1412950ee3 --- /dev/null +++ b/docs/api/testing.md @@ -0,0 +1,23 @@ +--- +title: testing +--- + +## Buffer + +::: zarr.testing.buffer + +## Stateful + +::: zarr.testing.stateful + +## Store + +::: zarr.testing.store + +## Strategies + +::: zarr.testing.strategies + +## Utils + +::: zarr.testing.utils diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index 803d2c4255..0000000000 --- a/docs/conf.py +++ /dev/null @@ -1,387 +0,0 @@ -#!/usr/bin/env python3 -# -# zarr documentation build configuration file, created by -# sphinx-quickstart on Mon May 2 21:40:09 2016. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - - -import os -import sys -from importlib.metadata import version as get_version -from typing import Any - -import sphinx -import sphinx.application - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.append(os.path.abspath("..")) - - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.autosummary", - "sphinx.ext.intersphinx", - 'autoapi.extension', - "numpydoc", - "sphinx_issues", - "sphinx_copybutton", - "sphinx_design", - 'sphinx_reredirects', - "sphinx.ext.viewcode", -] - -issues_github_path = "zarr-developers/zarr-python" - -autoapi_dirs = ['../src/zarr'] -autoapi_add_toctree_entry = False -autoapi_generate_api_docs = True -autoapi_member_order = "groupwise" -autoapi_root = "api" -autoapi_keep_files = True -autoapi_options = [ 'members', 'undoc-members', 'show-inheritance', 'show-module-summary', 'imported-members', 'inherited-members'] - -def skip_submodules( - app: sphinx.application.Sphinx, - what: str, - name: str, - obj: object, - skip: bool, - options: dict[str, Any] - ) -> bool: - # Skip documenting zarr.codecs submodules - # codecs are documented in the main zarr.codecs namespace - if what == "module" and name.startswith("zarr.codecs.") or name.startswith("zarr.core"): - skip = True - return skip - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The main toctree document. -main_doc = "index" - -# General information about the project. -project = "zarr" -copyright = "2025, Zarr Developers" -author = "Zarr Developers" - -version = get_version("zarr") -release = get_version("zarr") - -redirects = { - "spec": "https://zarr-specs.readthedocs.io", - "spec/v1": 'https://zarr-specs.readthedocs.io/en/latest/v1/v1.0.html', - "spec/v2": "https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html", - "spec/v3": "https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html", - "license": "https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt", - "tutorial": "user-guide", - "getting-started": "quickstart", - "roadmap": "developers/roadmap.html", - "installation": "user-guide/installation.html", - "api": "api/zarr/index", - "release": "release-notes.html", -} - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = "en" - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "talks"] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -# keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = "pydata_sphinx_theme" - -html_favicon = "_static/logo1.png" - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -html_theme_options = { - "github_url": "https://github.com/zarr-developers/zarr-python", - "icon_links": [ - { - "name": "Bluesky", - "url": "https://bsky.app/profile/zarr.dev", - "icon": "fa-brands fa-bluesky", - }, - { - "name": "Mastodon", - "url": "https://fosstodon.org/@zarr", - "icon": "fa-brands fa-mastodon", - }, - { - "name": "Zarr Dev", - "url": "https://zarr.dev/", - "icon": "_static/logo1.png", - "type": "local", - }, - ], - "collapse_navigation": True, - "navigation_with_keys": False, -} - -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - -# The name for this set of Sphinx documents. -# " v documentation" by default. -# html_title = 'zarr v@@' - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -html_logo = "_static/logo_horizontal.svg" - - -def setup(app: sphinx.application.Sphinx) -> None: - app.add_css_file("custom.css") - app.connect("autoapi-skip-member", skip_submodules) - - -# The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] -html_js_files = [ - "custom.js", -] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -# html_extra_path = [] - -# If not None, a 'Last updated on:' timestamp is inserted at every page -# bottom, using the given strftime format. -# The empty string is equivalent to '%b %d, %Y'. -# html_last_updated_fmt = None - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -html_sidebars = {"tutorial": []} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# html_additional_pages = {} - -# If false, no module index is generated. -# html_domain_indices = True - -# If false, no index is generated. -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' -# html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# 'ja' uses this config value. -# 'zh' user can custom change `jieba` dictionary path. -# html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -# html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = "zarrdoc" - -maximum_signature_line_length = 80 - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - #'preamble': '', - # Latex figure (float) alignment - #'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (main_doc, "zarr.tex", "Zarr-Python", author, "manual"), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# latex_use_parts = False - -# If true, show page references after internal links. -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# latex_appendices = [] - -# If false, no module index is generated. -# latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [(main_doc, "zarr", "Zarr-Python", [author], 1)] - -# If true, show URL addresses after external links. -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - main_doc, - "zarr", - "Zarr-Python", - author, - "zarr", - "One line description of project.", - "Miscellaneous", - ), -] - -# Documents to append as an appendix to all manuals. -# texinfo_appendices = [] - -# If false, no module index is generated. -# texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -# texinfo_no_detailmenu = False - - -# Example configuration for intersphinx: refer to the Python standard library. -# use in refs e.g: -# :ref:`comparison manual ` -intersphinx_mapping = { - "python": ("https://docs.python.org/3/", None), - "numpy": ("https://numpy.org/doc/stable/", None), - "numcodecs": ("https://numcodecs.readthedocs.io/en/stable/", None), - "obstore": ("https://developmentseed.org/obstore/latest/", None), -} - - -# sphinx-copybutton configuration -copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " -copybutton_line_continuation_character = "\\" -copybutton_prompt_is_regexp = True diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000000..7bfa6f6a18 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,233 @@ +# Contributing + +Zarr is a community maintained project. We welcome contributions in the form of bug reports, bug fixes, documentation, enhancement proposals and more. This page provides information on how best to contribute. + +## Asking for help + +If you have a question about how to use Zarr, please post your question on StackOverflow using the ["zarr" tag](https://stackoverflow.com/questions/tagged/zarr). If you don't get a response within a day or two, feel free to raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) including a link to your StackOverflow question. We will try to respond to questions as quickly as possible, but please bear in mind that there may be periods where we have limited time to answer questions due to other commitments. + +## Bug reports + +If you find a bug, please raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new). Please include the following items in a bug report: + +1. A minimal, self-contained snippet of Python code reproducing the problem. You can format the code nicely using markdown, e.g.: + +```python +import zarr +g = zarr.group() +# etc. +``` + +2. An explanation of why the current behaviour is wrong/not desired, and what you expect instead. + +3. Information about the version of Zarr, along with versions of dependencies and the Python interpreter, and installation information. The version of Zarr can be obtained from the `zarr.__version__` property. Please also state how Zarr was installed, e.g., "installed via pip into a virtual environment", or "installed using conda". Information about other packages installed can be obtained by executing `pip freeze` (if using pip to install packages) or `conda env export` (if using conda to install packages) from the operating system command prompt. The version of the Python interpreter can be obtained by running a Python interactive session, e.g.: + +```console +python +``` + +```ansi +Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin +``` + +## Enhancement proposals + +If you have an idea about a new feature or some other improvement to Zarr, please raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) first to discuss. + +We very much welcome ideas and suggestions for how to improve Zarr, but please bear in mind that we are likely to be conservative in accepting proposals for new features. The reasons for this are that we would like to keep the Zarr code base lean and focused on a core set of functionalities, and available time for development, review and maintenance of new features is limited. But if you have a great idea, please don't let that stop you from posting it on GitHub, just please don't be offended if we respond cautiously. + +## Contributing code and/or documentation + +### Forking the repository + +The Zarr source code is hosted on GitHub at the following location: + +* [https://github.com/zarr-developers/zarr-python](https://github.com/zarr-developers/zarr-python) + +You will need your own fork to work on the code. Go to the link above and hit the ["Fork"](https://github.com/zarr-developers/zarr-python/fork) button. Then clone your fork to your local machine: + +```bash +git clone git@github.com:your-user-name/zarr-python.git +cd zarr-python +git remote add upstream git@github.com:zarr-developers/zarr-python.git +``` + +### Creating a development environment + +To work with the Zarr source code, it is recommended to use [hatch](https://hatch.pypa.io/latest/index.html) to create and manage development environments. Hatch will automatically install all Zarr dependencies using the same versions as are used by the core developers and continuous integration services. Assuming you have a Python 3 interpreter already installed, and you have cloned the Zarr source code and your current working directory is the root of the repository, you can do something like the following: + +```bash +pip install hatch +hatch env show # list all available environments +``` + +To verify that your development environment is working, you can run the unit tests for one of the test environments, e.g.: + +```bash +hatch env run --env test.py3.12-2.2-optional run-pytest +``` + +### Creating a branch + +Before you do any new work or submit a pull request, please open an issue on GitHub to report the bug or propose the feature you'd like to add. + +It's best to synchronize your fork with the upstream repository, then create a new, separate branch for each piece of work you want to do. E.g.: + +```bash +git checkout main +git fetch upstream +git checkout -b shiny-new-feature upstream/main +git push -u origin shiny-new-feature +``` + +This changes your working directory to the 'shiny-new-feature' branch. Keep any changes in this branch specific to one bug or feature so it is clear what the branch brings to Zarr. + +To update this branch with latest code from Zarr, you can retrieve the changes from the main branch and perform a rebase: + +```bash +git fetch upstream +git rebase upstream/main +``` + +This will replay your commits on top of the latest Zarr git main. If this leads to merge conflicts, these need to be resolved before submitting a pull request. Alternatively, you can merge the changes in from upstream/main instead of rebasing, which can be simpler: + +```bash +git pull upstream main +``` + +Again, any conflicts need to be resolved before submitting a pull request. + +### Running the test suite + +Zarr includes a suite of unit tests. The simplest way to run the unit tests is to activate your development environment (see [creating a development environment](#creating-a-development-environment) above) and invoke: + +```bash +hatch env run --env test.py3.12-2.2-optional run-pytest +``` + +All tests are automatically run via GitHub Actions for every pull request and must pass before code can be accepted. Test coverage is also collected automatically via the Codecov service. + +> **Note:** Previous versions of Zarr-Python made extensive use of doctests. These tests were not maintained during the 3.0 refactor but may be brought back in the future. See issue #2614 for more details. + +### Code standards - using pre-commit + +All code must conform to the PEP8 standard. Regarding line length, lines up to 100 characters are allowed, although please try to keep under 90 wherever possible. + +`Zarr` uses a set of `pre-commit` hooks and the `pre-commit` bot to format, type-check, and prettify the codebase. `pre-commit` can be installed locally by running: + +```bash +python -m pip install pre-commit +``` + +The hooks can be installed locally by running: + +```bash +pre-commit install +``` + +This would run the checks every time a commit is created locally. These checks will also run on every commit pushed to an open PR, resulting in some automatic styling fixes by the `pre-commit` bot. The checks will by default only run on the files modified by a commit, but the checks can be triggered for all the files by running: + +```bash +pre-commit run --all-files +``` + +If you would like to skip the failing checks and push the code for further discussion, use the `--no-verify` option with `git commit`. + +### Test coverage + +> **Note:** Test coverage for Zarr-Python 3 is currently not at 100%. This is a known issue and help is welcome to bring test coverage back to 100%. See issue #2613 for more details. + +Zarr strives to maintain 100% test coverage under the latest Python stable release. Both unit tests and docstring doctests are included when computing coverage. Running: + +```bash +hatch env run --env test.py3.12-2.2-optional run-coverage +``` + +will automatically run the test suite with coverage and produce a XML coverage report. This should be 100% before code can be accepted into the main code base. + +You can also generate an HTML coverage report by running: + +```bash +hatch env run --env test.py3.12-2.2-optional run-coverage-html +``` + +When submitting a pull request, coverage will also be collected across all supported Python versions via the Codecov service, and will be reported back within the pull request. Codecov coverage must also be 100% before code can be accepted. + +### Documentation + +Docstrings for user-facing classes and functions should follow the [numpydoc](https://numpydoc.readthedocs.io/en/stable/format.html#docstring-standard) standard, including sections for Parameters and Examples. All examples should run and pass as doctests under Python 3.11. + +Zarr uses mkdocs for documentation, hosted on readthedocs.org. Documentation is written in the Markdown markup language (.md files) in the `docs` folder. The documentation consists both of prose and API documentation. All user-facing classes and functions are included in the API documentation, under the `docs/api` folder using the [mkdocstrings](https://mkdocstrings.github.io/) extension. Add any new public functions or classes to the relevant markdown file in `docs/api/*.md`. Any new features or important usage information should be included in the user-guide (`docs/user-guide`). Any changes should also be included as a new file in the `changes` directory. + +The documentation can be built locally by running: + +```bash +hatch --env docs run build +``` + +The resulting built documentation will be available in the `docs/_build/html` folder. + +Hatch can also be used to serve continuously updating version of the documentation during development at [http://0.0.0.0:8000/](http://0.0.0.0:8000/). This can be done by running: + +```bash +hatch --env docs run serve +``` + +### Changelog + +zarr-python uses [towncrier](https://towncrier.readthedocs.io/en/stable/tutorial.html) to manage release notes. Most pull requests should include at least one news fragment describing the changes. To add a release note, you'll need the GitHub issue or pull request number and the type of your change (`feature`, `bugfix`, `doc`, `removal`, `misc`). With that, run `towncrier create` with your development environment, which will prompt you for the issue number, change type, and the news text: + +```bash +towncrier create +``` + +Alternatively, you can manually create the files in the `changes` directory using the naming convention `{issue-number}.{change-type}.md`. + +See the [towncrier](https://towncrier.readthedocs.io/en/stable/tutorial.html) docs for more. + +## Merging pull requests + +Pull requests submitted by an external contributor should be reviewed and approved by at least one core developer before being merged. Ideally, pull requests submitted by a core developer should be reviewed and approved by at least one other core developer before being merged. + +Pull requests should not be merged until all CI checks have passed (GitHub Actions, Codecov) against code that has had the latest main merged in. + +Before merging the milestone must be set either to decide whether a PR will be in the next patch, minor, or major release. The next section explains which types of changes go in each release. + +## Compatibility and versioning policies + +### Versioning + +Versions of this library are identified by a triplet of integers with the form `..`, for example `3.0.4`. A release of `zarr-python` is associated with a new version identifier. That new identifier is generated by incrementing exactly one of the components of the previous version identifier by 1. When incrementing the `major` component of the version identifier, the `minor` and `patch` components is reset to 0. When incrementing the minor component, the patch component is reset to 0. + +Releases are classified by the library changes contained in that release. This classification determines which component of the version identifier is incremented on release. + +* **major** releases (for example, `2.18.0` -> `3.0.0`) are for changes that will require extensive adaptation efforts from many users and downstream projects. For example, breaking changes to widely-used user-facing APIs should only be applied in a major release. + + Users and downstream projects should carefully consider the impact of a major release before adopting it. In advance of a major release, developers should communicate the scope of the upcoming changes, and help users prepare for them. + +* **minor** releases (for example, `3.0.0` -> `3.1.0`) are for changes that do not require significant effort from most users or downstream downstream projects to respond to. API changes are possible in minor releases if the burden on users imposed by those changes is sufficiently small. + + For example, a recently released API may need fixes or refinements that are breaking, but low impact due to the recency of the feature. Such API changes are permitted in a minor release. + + Minor releases are safe for most users and downstream projects to adopt. + +* **patch** releases (for example, `3.1.0` -> `3.1.1`) are for changes that contain no breaking or behaviour changes for downstream projects or users. Examples of changes suitable for a patch release are bugfixes and documentation improvements. + + Users should always feel safe upgrading to a the latest patch release. + +Note that this versioning scheme is not consistent with [Semantic Versioning](https://semver.org/). Contrary to SemVer, the Zarr library may release breaking changes in `minor` releases, or even `patch` releases under exceptional circumstances. But we should strive to avoid doing so. + +A better model for our versioning scheme is [Intended Effort Versioning](https://jacobtomlinson.dev/effver/), or "EffVer". The guiding principle off EffVer is to categorize releases based on the *expected effort required to upgrade to that release*. + +Zarr developers should make changes as smooth as possible for users. This means making backwards-compatible changes wherever possible. When a backwards-incompatible change is necessary, users should be notified well in advance, e.g. via informative deprecation warnings. + +### Data format compatibility + +The Zarr library is an implementation of a file format standard defined externally -- see the [Zarr specifications website](https://zarr-specs.readthedocs.io) for the list of Zarr file format specifications. + +If an existing Zarr format version changes, or a new version of the Zarr format is released, then the Zarr library will generally require changes. It is very likely that a new Zarr format will require extensive breaking changes to the Zarr library, and so support for a new Zarr format in the Zarr library will almost certainly come in new `major` release. When the Zarr library adds support for a new Zarr format, there may be a period of accelerated changes as developers refine newly added APIs and deprecate old APIs. In such a transitional phase breaking changes may be more frequent than usual. + +## Release procedure + +Open an issue on GitHub announcing the release using the release checklist template: +[https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md](https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md>). The release checklist includes all steps necessary for the release. \ No newline at end of file diff --git a/docs/developers/contributing.rst b/docs/developers/contributing.rst deleted file mode 100644 index 50bf52730b..0000000000 --- a/docs/developers/contributing.rst +++ /dev/null @@ -1,347 +0,0 @@ -.. _dev-guide-contributing: - -Contributing to Zarr -==================== - -Zarr is a community maintained project. We welcome contributions in the form of bug -reports, bug fixes, documentation, enhancement proposals and more. This page provides -information on how best to contribute. - -Asking for help ---------------- - -If you have a question about how to use Zarr, please post your question on -StackOverflow using the `"zarr" tag `_. -If you don't get a response within a day or two, feel free to raise a `GitHub issue -`_ including a link to your StackOverflow -question. We will try to respond to questions as quickly as possible, but please bear -in mind that there may be periods where we have limited time to answer questions -due to other commitments. - -Bug reports ------------ - -If you find a bug, please raise a `GitHub issue -`_. Please include the following items in -a bug report: - -1. A minimal, self-contained snippet of Python code reproducing the problem. You can - format the code nicely using markdown, e.g.:: - - - ```python - import zarr - g = zarr.group() - # etc. - ``` - -2. An explanation of why the current behaviour is wrong/not desired, and what you - expect instead. - -3. Information about the version of Zarr, along with versions of dependencies and the - Python interpreter, and installation information. The version of Zarr can be obtained - from the ``zarr.__version__`` property. Please also state how Zarr was installed, - e.g., "installed via pip into a virtual environment", or "installed using conda". - Information about other packages installed can be obtained by executing ``pip freeze`` - (if using pip to install packages) or ``conda env export`` (if using conda to install - packages) from the operating system command prompt. The version of the Python - interpreter can be obtained by running a Python interactive session, e.g.:: - - $ python - Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin - -Enhancement proposals ---------------------- - -If you have an idea about a new feature or some other improvement to Zarr, please raise a -`GitHub issue `_ first to discuss. - -We very much welcome ideas and suggestions for how to improve Zarr, but please bear in -mind that we are likely to be conservative in accepting proposals for new features. The -reasons for this are that we would like to keep the Zarr code base lean and focused on -a core set of functionalities, and available time for development, review and maintenance -of new features is limited. But if you have a great idea, please don't let that stop -you from posting it on GitHub, just please don't be offended if we respond cautiously. - -Contributing code and/or documentation --------------------------------------- - -Forking the repository -~~~~~~~~~~~~~~~~~~~~~~ - -The Zarr source code is hosted on GitHub at the following location: - -* `https://github.com/zarr-developers/zarr-python `_ - -You will need your own fork to work on the code. Go to the link above and hit -the `"Fork" `_ button. -Then clone your fork to your local machine:: - - $ git clone git@github.com:your-user-name/zarr-python.git - $ cd zarr-python - $ git remote add upstream git@github.com:zarr-developers/zarr-python.git - -Creating a development environment -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To work with the Zarr source code, it is recommended to use -`hatch `_ to create and manage development -environments. Hatch will automatically install all Zarr dependencies using the same -versions as are used by the core developers and continuous integration services. -Assuming you have a Python 3 interpreter already installed, and you have cloned the -Zarr source code and your current working directory is the root of the repository, -you can do something like the following:: - - $ pip install hatch - $ hatch env show # list all available environments - -To verify that your development environment is working, you can run the unit tests -for one of the test environments, e.g.:: - - $ hatch env run --env test.py3.12-2.2-optional run-pytest - -Creating a branch -~~~~~~~~~~~~~~~~~ - -Before you do any new work or submit a pull request, please open an issue on GitHub to -report the bug or propose the feature you'd like to add. - -It's best to synchronize your fork with the upstream repository, then create a -new, separate branch for each piece of work you want to do. E.g.:: - - git checkout main - git fetch upstream - git checkout -b shiny-new-feature upstream/main - git push -u origin shiny-new-feature - -This changes your working directory to the 'shiny-new-feature' branch. Keep any changes in -this branch specific to one bug or feature so it is clear what the branch brings to -Zarr. - -To update this branch with latest code from Zarr, you can retrieve the changes from -the main branch and perform a rebase:: - - git fetch upstream - git rebase upstream/main - -This will replay your commits on top of the latest Zarr git main. If this leads to -merge conflicts, these need to be resolved before submitting a pull request. -Alternatively, you can merge the changes in from upstream/main instead of rebasing, -which can be simpler:: - - git pull upstream main - -Again, any conflicts need to be resolved before submitting a pull request. - -Running the test suite -~~~~~~~~~~~~~~~~~~~~~~ - -Zarr includes a suite of unit tests. The simplest way to run the unit tests -is to activate your development environment -(see `creating a development environment`_ above) and invoke:: - - $ hatch env run --env test.py3.12-2.2-optional run-pytest - -All tests are automatically run via GitHub Actions for every pull -request and must pass before code can be accepted. Test coverage is -also collected automatically via the Codecov service. - -.. note:: - Previous versions of Zarr-Python made extensive use of doctests. These tests were - not maintained during the 3.0 refactor but may be brought back in the future. - See :issue:`2614` for more details. - -Code standards - using pre-commit -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -All code must conform to the PEP8 standard. Regarding line length, lines up to 100 -characters are allowed, although please try to keep under 90 wherever possible. - -``Zarr`` uses a set of ``pre-commit`` hooks and the ``pre-commit`` bot to format, -type-check, and prettify the codebase. ``pre-commit`` can be installed locally by -running:: - - $ python -m pip install pre-commit - -The hooks can be installed locally by running:: - - $ pre-commit install - -This would run the checks every time a commit is created locally. These checks will also run -on every commit pushed to an open PR, resulting in some automatic styling fixes by the -``pre-commit`` bot. The checks will by default only run on the files modified by a commit, -but the checks can be triggered for all the files by running:: - - $ pre-commit run --all-files - -If you would like to skip the failing checks and push the code for further discussion, use -the ``--no-verify`` option with ``git commit``. - - -Test coverage -~~~~~~~~~~~~~ - -.. note:: - Test coverage for Zarr-Python 3 is currently not at 100%. This is a known issue and help - is welcome to bring test coverage back to 100%. See :issue:`2613` for more details. - -Zarr strives to maintain 100% test coverage under the latest Python stable release -Both unit tests and docstring doctests are included when computing coverage. Running:: - - $ hatch env run --env test.py3.12-2.2-optional run-coverage - -will automatically run the test suite with coverage and produce a XML coverage report. -This should be 100% before code can be accepted into the main code base. - -You can also generate an HTML coverage report by running:: - - $ hatch env run --env test.py3.12-2.2-optional run-coverage-html - -When submitting a pull request, coverage will also be collected across all supported -Python versions via the Codecov service, and will be reported back within the pull -request. Codecov coverage must also be 100% before code can be accepted. - -Documentation -~~~~~~~~~~~~~ - -Docstrings for user-facing classes and functions should follow the -`numpydoc -`_ -standard, including sections for Parameters and Examples. All examples -should run and pass as doctests under Python 3.11. - -Zarr uses Sphinx for documentation, hosted on readthedocs.org. Documentation is -written in the RestructuredText markup language (.rst files) in the ``docs`` folder. -The documentation consists both of prose and API documentation. All user-facing classes -and functions are included in the API documentation, under the ``docs/api`` folder -using the `autodoc `_ -extension to sphinx. Any new features or important usage information should be included in the -user-guide (``docs/user-guide``). Any changes should also be included as a new file in the -:file:`changes` directory. - -The documentation can be built locally by running:: - - $ hatch --env docs run build - -The resulting built documentation will be available in the ``docs/_build/html`` folder. - -Hatch can also be used to serve continuously updating version of the documentation -during development at `http://0.0.0.0:8000/ `_. This can be done by running:: - - $ hatch --env docs run serve - -.. _changelog: - -Changelog -~~~~~~~~~ - -zarr-python uses `towncrier`_ to manage release notes. Most pull requests should -include at least one news fragment describing the changes. To add a release -note, you'll need the GitHub issue or pull request number and the type of your -change (``feature``, ``bugfix``, ``doc``, ``removal``, ``misc``). With that, run -```towncrier create``` with your development environment, which will prompt you -for the issue number, change type, and the news text:: - - towncrier create - -Alternatively, you can manually create the files in the ``changes`` directory -using the naming convention ``{issue-number}.{change-type}.rst``. - -See the `towncrier`_ docs for more. - -.. _towncrier: https://towncrier.readthedocs.io/en/stable/tutorial.html - -The following information is mainly for core developers, but may also be of interest to -contributors. - -Merging pull requests ---------------------- - -Pull requests submitted by an external contributor should be reviewed and approved by at least -one core developer before being merged. Ideally, pull requests submitted by a core developer -should be reviewed and approved by at least one other core developer before being merged. - -Pull requests should not be merged until all CI checks have passed (GitHub Actions, -Codecov) against code that has had the latest main merged in. - -Before merging the milestone must be set either to decide whether a PR will be in the next -patch, minor, or major release. The next section explains which types of changes go in each release. - -Compatibility and versioning policies -------------------------------------- - -Versioning -~~~~~~~~~~ -Versions of this library are identified by a triplet of integers with the form -``..``, for example ``3.0.4``. A release of ``zarr-python`` is associated with a new -version identifier. That new identifier is generated by incrementing exactly one of the components of -the previous version identifier by 1. When incrementing the ``major`` component of the version identifier, -the ``minor`` and ``patch`` components is reset to 0. When incrementing the minor component, -the patch component is reset to 0. - -Releases are classified by the library changes contained in that release. This classification -determines which component of the version identifier is incremented on release. - -* ``major`` releases (for example, ``2.18.0`` -> ``3.0.0``) are for changes that will - require extensive adaptation efforts from many users and downstream projects. - For example, breaking changes to widely-used user-facing APIs should only be applied in a major release. - - - Users and downstream projects should carefully consider the impact of a major release before - adopting it. - In advance of a major release, developers should communicate the scope of the upcoming changes, - and help users prepare for them. - -* ``minor`` releases (or example, ``3.0.0`` -> ``3.1.0``) are for changes that do not require - significant effort from most users or downstream downstream projects to respond to. API changes - are possible in minor releases if the burden on users imposed by those changes is sufficiently small. - - For example, a recently released API may need fixes or refinements that are breaking, but low impact - due to the recency of the feature. Such API changes are permitted in a minor release. - - - Minor releases are safe for most users and downstream projects to adopt. - - -* ``patch`` releases (for example, ``3.1.0`` -> ``3.1.1``) are for changes that contain no breaking - or behaviour changes for downstream projects or users. Examples of changes suitable for a patch release are - bugfixes and documentation improvements. - - - Users should always feel safe upgrading to a the latest patch release. - -Note that this versioning scheme is not consistent with `Semantic Versioning `_. -Contrary to SemVer, the Zarr library may release breaking changes in ``minor`` releases, or even -``patch`` releases under exceptional circumstances. But we should strive to avoid doing so. - -A better model for our versioning scheme is `Intended Effort Versioning `_, -or "EffVer". The guiding principle off EffVer is to categorize releases based on the *expected effort -required to upgrade to that release*. - -Zarr developers should make changes as smooth as possible for users. This means making -backwards-compatible changes wherever possible. When a backwards-incompatible change is necessary, -users should be notified well in advance, e.g. via informative deprecation warnings. - -Data format compatibility -""""""""""""""""""""""""" - -The Zarr library is an implementation of a file format standard defined externally -- -see the `Zarr specifications website `_ for the list of -Zarr file format specifications. - - -If an existing Zarr format version changes, or a new version of the Zarr format is released, then -the Zarr library will generally require changes. It is very likely that a new Zarr format will -require extensive breaking changes to the Zarr library, and so support for a new Zarr format in the -Zarr library will almost certainly come in new ``major`` release. -When the Zarr library adds support for a new Zarr format, there may be a period of accelerated -changes as developers refine newly added APIs and deprecate old APIs. In such a transitional phase -breaking changes may be more frequent than usual. - - -Release procedure ------------------ - -Open an issue on GitHub announcing the release using the release checklist template: -`https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md `_. -The release checklist includes all steps necessary for the release. diff --git a/docs/developers/index.rst b/docs/developers/index.rst deleted file mode 100644 index 4bccb3a469..0000000000 --- a/docs/developers/index.rst +++ /dev/null @@ -1,9 +0,0 @@ - -Developer's Guide ------------------ - -.. toctree:: - :maxdepth: 1 - - contributing - roadmap diff --git a/docs/developers/roadmap.rst b/docs/developers/roadmap.rst deleted file mode 100644 index 716599bab0..0000000000 --- a/docs/developers/roadmap.rst +++ /dev/null @@ -1,696 +0,0 @@ -Roadmap -======= - -- Status: active -- Author: Joe Hamman -- Created On: October 31, 2023 -- Input from: - - - Davis Bennett / @d-v-b - - Norman Rzepka / @normanrz - - Deepak Cherian @dcherian - - Brian Davis / @monodeldiablo - - Oliver McCormack / @olimcc - - Ryan Abernathey / @rabernat - - Jack Kelly / @JackKelly - - Martin Durrant / @martindurant - -.. note:: - - This document was written in the early stages of the 3.0 refactor. Some - aspects of the design have changed since this was originally written. - Questions and discussion about the contents of this document should be directed to - `this GitHub Discussion `__. - -Introduction ------------- - -This document lays out a design proposal for version 3.0 of the -`Zarr-Python `__ package. A -specific focus of the design is to bring Zarr-Python’s API up to date -with the `Zarr V3 -specification `__, -with the hope of enabling the development of the many features and -extensions that motivated the V3 Spec. The ideas presented here are -expected to result in a major release of Zarr-Python (version 3.0) -including significant a number of breaking API changes. For clarity, -“V3” will be used to describe the version of the Zarr specification and -“3.0” will be used to describe the release tag of the Zarr-Python -project. - -Current status of V3 in Zarr-Python -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -During the development of the V3 Specification, a `prototype -implementation `__ -was added to the Zarr-Python library. Since that implementation, the V3 -spec evolved in significant ways and as a result, the Zarr-Python -library is now out of sync with the approved spec. Downstream libraries -(e.g. `Xarray `__) have added support -for this implementation and will need to migrate to the accepted spec -when its available in Zarr-Python. - -Goals ------ - -- Provide a complete implementation of Zarr V3 through the Zarr-Python - API -- Clear the way for exciting extensions / ZEPs - (i.e. `sharding `__, - `variable chunking `__, - etc.) -- Provide a developer API that can be used to implement and register V3 - extensions -- Improve the performance of Zarr-Python by streamlining the interface - between the Store layer and higher level APIs (e.g. Groups and - Arrays) -- Clean up the internal and user facing APIs -- Improve code quality and robustness (e.g. achieve 100% type hint - coverage) -- Align the Zarr-Python array API with the `array API - Standard `__ - -Examples of what 3.0 will enable? ---------------------------------- - -1. Reading and writing V3 spec-compliant groups and arrays -2. V3 extensions including sharding and variable chunking. -3. Improved performance by leveraging concurrency when - creating/reading/writing to stores (imagine a - ``create_hierarchy(zarr_objects)`` function). -4. User-developed extensions (e.g. storage-transformers) can be - registered with Zarr-Python at runtime - -Non-goals (of this document) ----------------------------- - -- Implementation of any unaccepted Zarr V3 extensions -- Major revisions to the Zarr V3 spec - -Requirements ------------- - -1. Read and write spec compliant V2 and V3 data -2. Limit unnecessary traffic to/from the store -3. Cleanly define the Array/Group/Store abstractions -4. Cleanly define how V2 will be supported going forward -5. Provide a clear roadmap to help users upgrade to 3.0 -6. Developer tools / hooks for registering extensions - -Design ------- - -Async API -~~~~~~~~~ - -Zarr-Python is an IO library. As such, supporting concurrent action -against the storage layer is critical to achieving acceptable -performance. The Zarr-Python 2 was not designed with asynchronous -computation in mind and as a result has struggled to effectively -leverage the benefits of concurrency. At one point, ``getitems`` and -``setitems`` support was added to the Zarr store model but that is only -used for operating on a set of chunks in a single variable. - -With Zarr-Python 3.0, we have the opportunity to revisit this design. -The proposal here is as follows: - -1. The ``Store`` interface will be entirely async. -2. On top of the async ``Store`` interface, we will provide an - ``AsyncArray`` and ``AsyncGroup`` interface. -3. Finally, the primary user facing API will be synchronous ``Array`` - and ``Group`` classes that wrap the async equivalents. - -**Examples** - -- **Store** - - .. code:: python - - class Store: - ... - async def get(self, key: str) -> bytes: - ... - async def get_partial_values(self, key_ranges: List[Tuple[str, Tuple[int, Optional[int]]]]) -> bytes: - ... - # (no sync interface here) - -- **Array** - - .. code:: python - - class AsyncArray: - ... - - async def getitem(self, selection: Selection) -> np.ndarray: - # the core logic for getitem goes here - - class Array: - _async_array: AsyncArray - - def __getitem__(self, selection: Selection) -> np.ndarray: - return sync(self._async_array.getitem(selection)) - -- **Group** - - .. code:: python - - class AsyncGroup: - ... - - async def create_group(self, path: str, **kwargs) -> AsyncGroup: - # the core logic for create_group goes here - - class Group: - _async_group: AsyncGroup - - def create_group(self, path: str, **kwargs) -> Group: - return sync(self._async_group.create_group(path, **kwargs)) - - **Internal Synchronization API** - -With the ``Store`` and core ``AsyncArray``/ ``AsyncGroup`` classes being -predominantly async, Zarr-Python will need an internal API to provide a -synchronous API. The proposal here is to use the approach in -`fsspec `__ -to provide a high-level ``sync`` function that takes an ``awaitable`` -and runs it in its managed IO Loop / thread. - -| **FAQ** 1. Why two levels of Arrays/groups? a. First, this is an - intentional decision and departure from the current Zarrita - implementation b. The idea is that users rarely want to mix - interfaces. Either they are working within an async context (currently - quite rare) or they are in a typical synchronous context. c. Splitting - the two will allow us to clearly define behavior on the ``AsyncObj`` - and simply wrap it in the ``SyncObj``. 2. What if a store is only has - a synchronous backend? a. First off, this is expected to be a fairly - rare occurrence. Most storage backends have async interfaces. b. But - in the event a storage backend doesn’t have a async interface, there - is nothing wrong with putting synchronous code in ``async`` methods. - There are approaches to enabling concurrent action through wrappers - like AsyncIO’s ``loop.run_in_executor`` (`ref - 1 `__, - `ref 2 `__, `ref - 3 `__, - `ref - 4 `__. -| 3. Will Zarr help manage the async contexts encouraged by some - libraries - (e.g. `AioBotoCore `__)? - a. Many async IO libraries require entering an async context before - interacting with the API. We expect some experimentation to be needed - here but the initial design will follow something close to what fsspec - does (`example in - s3fs `__). - 4. Why not provide a synchronous Store interface? a. We could but this - design is simpler. It would mean supporting it in the ``AsyncGroup`` - and ``AsyncArray`` classes which, may be more trouble than its worth. - Storage backends that do not have an async API will be encouraged to - wrap blocking calls in an async wrapper - (e.g. ``loop.run_in_executor``). - -Store API -~~~~~~~~~ - -The ``Store`` API is specified directly in the V3 specification. All V3 -stores should implement this abstract API, omitting Write and List -support as needed. As described above, all stores will be expected to -expose the required methods as async methods. - -**Example** - -.. code:: python - - class ReadWriteStore: - ... - async def get(self, key: str) -> bytes: - ... - - async def get_partial_values(self, key_ranges: List[Tuple[str, int, int]) -> bytes: - ... - - async def set(self, key: str, value: Union[bytes, bytearray, memoryview]) -> None: - ... # required for writable stores - - async def set_partial_values(self, key_start_values: List[Tuple[str, int, Union[bytes, bytearray, memoryview]]]) -> None: - ... # required for writable stores - - async def list(self) -> List[str]: - ... # required for listable stores - - async def list_prefix(self, prefix: str) -> List[str]: - ... # required for listable stores - - async def list_dir(self, prefix: str) -> List[str]: - ... # required for listable stores - - # additional (optional methods) - async def getsize(self, prefix: str) -> int: - ... - - async def rename(self, src: str, dest: str) -> None - ... - - -Recognizing that there are many Zarr applications today that rely on the -``MutableMapping`` interface supported by Zarr-Python 2, a wrapper store -will be developed to allow existing stores to plug directly into this -API. - -Array API -~~~~~~~~~ - -The user facing array interface will implement a subset of the `Array -API Standard `__. Most of the -computational parts of the Array API Standard don’t fit into Zarr right -now. That’s okay. What matters most is that we ensure we can give -downstream applications a compliant API. - -*Note, Zarr already does most of this so this is more about formalizing -the relationship than a substantial change in API.* - -+------------------------+------------------------+-------------------------+-------------------------+ -| | Included | Not Included | Unknown / Maybe Possible| -+========================+========================+=========================+=========================+ -| **Attributes** | ``dtype`` | ``mT`` | ``device`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``ndim`` | ``T`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``shape`` | | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``size`` | | | -+------------------------+------------------------+-------------------------+-------------------------+ -| **Methods** | ``__getitem__`` | ``__array_namespace__`` | ``to_device`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``__setitem__`` | ``__abs__`` | ``__bool__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``__eq__`` | ``__add__`` | ``__complex__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``__bool__`` | ``__and__`` | ``__dlpack__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__floordiv__`` | ``__dlpack_device__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__ge__`` | ``__float__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__gt__`` | ``__index__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__invert__`` | ``__int__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__le__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__lshift__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__lt__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__matmul__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__mod__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__mul__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__ne__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__neg__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__or__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__pos__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__pow__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__rshift__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__sub__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__truediv__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__xor__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| **Creation functions** | ``zeros`` | | ``arange`` | -| (``zarr.creation``) | | | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``zeros_like`` | | ``asarray`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``ones`` | | ``eye`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``ones_like`` | | ``from_dlpack`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``full`` | | ``linspace`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``full_like`` | | ``meshgrid`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``empty`` | | ``tril`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``empty_like`` | | ``triu`` | -+------------------------+------------------------+-------------------------+-------------------------+ - -In addition to the core array API defined above, the Array class should -have the following Zarr specific properties: - -- ``.metadata`` (see Metadata Interface below) -- ``.attrs`` - (pulled from metadata object) -- ``.info`` - (repolicated from existing property †) - -*† In Zarr-Python 2, the info property listed the store to identify -initialized chunks. By default this will be turned off in 3.0 but will -be configurable.* - -**Indexing** - -Zarr-Python currently supports ``__getitem__`` style indexing and the -special ``oindex`` and ``vindex`` indexers. These are not part of the -current Array API standard (see -`data-apis/array-api#669 `__) -but they have been `proposed as a -NEP `__. -Zarr-Python will maintain these in 3.0. - -We are also exploring a new high-level indexing API that will enabled -optimized batch/concurrent loading of many chunks. We expect this to be -important to enable performant loading of data in the context of -sharding. See `this -discussion `__ -for more detail. - -Concurrent indexing across multiple arrays will be possible using the -AsyncArray API. - -**Async and Sync Array APIs** - -Most the logic to support Zarr Arrays will live in the ``AsyncArray`` -class. There are a few notable differences that should be called out. - -=============== ============ -Sync Method Async Method -=============== ============ -``__getitem__`` ``getitem`` -``__setitem__`` ``setitem`` -``__eq__`` ``equals`` -=============== ============ - -**Metadata interface** - -Zarr-Python 2.\* closely mirrors the V2 spec metadata schema in the -Array and Group classes. In 3.0, we plan to move the underlying metadata -representation to a separate interface (e.g. ``Array.metadata``). This -interface will return either a ``V2ArrayMetadata`` or -``V3ArrayMetadata`` object (both will inherit from a parent -``ArrayMetadataABC`` class. The ``V2ArrayMetadata`` and -``V3ArrayMetadata`` classes will be responsible for producing valid JSON -representations of their metadata, and yielding a consistent view to the -``Array`` or ``Group`` class. - -Group API -~~~~~~~~~ - -The main question is how closely we should follow the existing -Zarr-Python implementation / ``MutableMapping`` interface. The table -below shows the primary ``Group`` methods in Zarr-Python 2 and attempts -to identify if and how they would be implemented in 3.0. - -+---------------------+------------------+------------------+-----------------------+ -| V2 Group Methods | ``AsyncGroup`` | ``Group`` | ``h5py_compat.Group`` | -+=====================+==================+==================+=======================+ -| ``__len__`` | ``length`` | ``__len__`` | ``__len__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__iter__`` | ``__aiter__`` | ``__iter__`` | ``__iter__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__contains__`` | ``contains`` | ``__contains__`` | ``__contains__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__getitem__`` | ``getitem`` | ``__getitem__`` | ``__getitem__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__enter__`` | N/A | N/A | ``__enter__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__exit__`` | N/A | N/A | ``__exit__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``group_keys`` | ``group_keys`` | ``group_keys`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``groups`` | ``groups`` | ``groups`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``array_keys`` | ``array_key`` | ``array_keys`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``arrays`` | ``arrays`` | ``arrays`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``visit`` | ? | ? | ``visit`` | -+---------------------+------------------+------------------+-----------------------+ -| ``visitkeys`` | ? | ? | ? | -+---------------------+------------------+------------------+-----------------------+ -| ``visitvalues`` | ? | ? | ? | -+---------------------+------------------+------------------+-----------------------+ -| ``visititems`` | ? | ? | ``visititems`` | -+---------------------+------------------+------------------+-----------------------+ -| ``tree`` | ``tree`` | ``tree`` | ``Both`` | -+---------------------+------------------+------------------+-----------------------+ -| ``create_group`` | ``create_group`` | ``create_group`` | ``create_group`` | -+---------------------+------------------+------------------+-----------------------+ -| ``require_group`` | N/A | N/A | ``require_group`` | -+---------------------+------------------+------------------+-----------------------+ -| ``create_groups`` | ? | ? | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``require_groups`` | ? | ? | ? | -+---------------------+------------------+------------------+-----------------------+ -| ``create_dataset`` | N/A | N/A | ``create_dataset`` | -+---------------------+------------------+------------------+-----------------------+ -| ``require_dataset`` | N/A | N/A | ``require_dataset`` | -+---------------------+------------------+------------------+-----------------------+ -| ``create`` | ``create_array`` | ``create_array`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``empty`` | ``empty`` | ``empty`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``zeros`` | ``zeros`` | ``zeros`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``ones`` | ``ones`` | ``ones`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``full`` | ``full`` | ``full`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``array`` | ``create_array`` | ``create_array`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``empty_like`` | ``empty_like`` | ``empty_like`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``zeros_like`` | ``zeros_like`` | ``zeros_like`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``ones_like`` | ``ones_like`` | ``ones_like`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``full_like`` | ``full_like`` | ``full_like`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``move`` | ``move`` | ``move`` | ``move`` | -+---------------------+------------------+------------------+-----------------------+ - -**``zarr.h5compat.Group``** --- -Zarr-Python 2.\* made an attempt to align its API with that of -`h5py `__. With 3.0, we will -relax this alignment in favor of providing an explicit compatibility -module (``zarr.h5py_compat``). This module will expose the ``Group`` and -``Dataset`` APIs that map to Zarr-Python’s ``Group`` and ``Array`` -objects. - -Creation API -~~~~~~~~~~~~ - -Zarr-Python 2.\* bundles together the creation and serialization of Zarr -objects. Zarr-Python 3.\* will make it possible to create objects in -memory separate from serializing them. This will specifically enable -writing hierarchies of Zarr objects in a single batch step. For example: - -.. code:: python - - - arr1 = Array(shape=(10, 10), path="foo/bar", dtype="i4", store=store) - arr2 = Array(shape=(10, 10), path="foo/spam", dtype="f8", store=store) - - arr1.save() - arr2.save() - - # or equivalently - - zarr.save_many([arr1 ,arr2]) - -*Note: this batch creation API likely needs additional design effort -prior to implementation.* - -Plugin API -~~~~~~~~~~ - -Zarr V3 was designed to be extensible at multiple layers. Zarr-Python -will support these extensions through a combination of `Abstract Base -Classes `__ (ABCs) and -`Entrypoints `__. - -**ABCs** - -Zarr V3 will expose Abstract base classes for the following objects: - -- ``Store``, ``ReadStore``, ``ReadWriteStore``, ``ReadListStore``, and - ``ReadWriteListStore`` -- ``BaseArray``, ``SynchronousArray``, and ``AsynchronousArray`` -- ``BaseGroup``, ``SynchronousGroup``, and ``AsynchronousGroup`` -- ``Codec``, ``ArrayArrayCodec``, ``ArrayBytesCodec``, - ``BytesBytesCodec`` - -**Entrypoints** - -Lots more thinking here but the idea here is to provide entrypoints for -``data type``, ``chunk grid``, ``chunk key encoding``, ``codecs``, -``storage_transformers`` and ``stores``. These might look something -like: - -:: - - entry_points=""" - [zarr.codecs] - blosc_codec=codec_plugin:make_blosc_codec - zlib_codec=codec_plugin:make_zlib_codec - """ - -Python type hints and static analysis -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Target 100% Mypy coverage in 3.0 source. - -Observability -~~~~~~~~~~~~~ - -A persistent problem in Zarr-Python is diagnosing problems that span -many parts of the stack. To address this in 3.0, we will add a basic -logging framework that can be used to debug behavior at various levels -of the stack. We propose to add the separate loggers for the following -namespaces: - -- ``array`` -- ``group`` -- ``store`` -- ``codec`` - -These should be documented such that users know how to activate them and -developers know how to use them when developing extensions. - -Dependencies -~~~~~~~~~~~~ - -Today, Zarr-Python has the following required dependencies: - -.. code:: python - - dependencies = [ - 'asciitree', - 'numpy>=1.20,!=1.21.0', - 'fasteners', - 'numcodecs>=0.10.0', - ] - -What other dependencies should be considered? - -1. Attrs - Zarrita makes extensive use of the Attrs library -2. Fsspec - Zarrita has a hard dependency on Fsspec. This could be - easily relaxed though. - -Breaking changes relative to Zarr-Python 2.\* ---------------------------------------------- - -1. H5py compat moved to a stand alone module? -2. ``Group.__getitem__`` support moved to ``Group.members.__getitem__``? -3. Others? - -Open questions --------------- - -1. How to treat V2 - - a. Note: Zarrita currently implements a separate ``V2Array`` and - ``V3Array`` classes. This feels less than ideal. - b. We could easily convert metadata from v2 to the V3 Array, but what - about writing? - c. Ideally, we don’t have completely separate code paths. But if its - too complicated to support both within one interface, its probably - better. - -2. How and when to remove the current implementation of V3. - - a. It’s hidden behind a hard-to-use feature flag so we probably don’t - need to do anything. - -3. How to model runtime configuration? -4. Which extensions belong in Zarr-Python and which belong in separate - packages? - - a. We don’t need to take a strong position on this here. It’s likely - that someone will want to put Sharding in. That will be useful to - develop in parallel because it will give us a good test case for - the plugin interface. - -Testing -------- - -Zarr-python 3.0 adds a major new dimension to Zarr: Async support. This -also comes with a compatibility risk, we will need to thoroughly test -support in key execution environments. Testing plan: - Reuse the -existing test suite for testing the ``v3`` API. - ``xfail`` tests that -expose breaking changes with ``3.0 - breaking change`` description. This -will help identify additional and/or unintentional breaking changes - -Rework tests that were only testing internal APIs. - Add a set of -functional / integration tests targeting real-world workflows in various -contexts (e.g. w/ Dask) - -Development process -------------------- - -Zarr-Python 3.0 will introduce a number of new APIs and breaking changes -to existing APIs. In order to facilitate ongoing support for Zarr-Python -2.*, we will take on the following development process: - -- Create a ``v3`` branch that can be use for developing the core - functionality apart from the ``main`` branch. This will allow us to - support ongoing work and bug fixes on the ``main`` branch. -- Put the ``3.0`` APIs inside a ``zarr.v3`` module. Imports from this - namespace will all be new APIs that users can develop and test - against once the ``v3`` branch is merged to ``main``. -- Kickstart the process by pulling in the current state of ``zarrita`` - - which has many of the features described in this design. -- Release a series of 2.\* releases with the ``v3`` namespace -- When ``v3`` is complete, move contents of ``v3`` to the package root - -**Milestones** - -Below are a set of specific milestones leading toward the completion of -this process. As work begins, we expect this list to grow in -specificity. - -1. Port current version of Zarrita to Zarr-Python -2. Formalize Async interface by splitting ``Array`` and ``Group`` - objects into Sync and Async versions -3. Implement “fancy” indexing operations on the ``AsyncArray`` -4. Implement an abstract base class for the ``Store`` interface and a - wrapper ``Store`` to make use of existing ``MutableMapping`` stores. -5. Rework the existing unit test suite to use the ``v3`` namespace. -6. Develop a plugin interface for extensions -7. Develop a set of functional and integration tests -8. Work with downstream libraries (Xarray, Dask, etc.) to test new APIs - -TODOs ------ - -The following subjects are not covered in detail above but perhaps -should be. Including them here so they are not forgotten. - -1. [Store] Should Zarr provide an API for caching objects after first - read/list/etc. Read only stores? -2. [Array] buffer protocol support -3. [Array] ``meta_array`` support -4. [Extensions] Define how Zarr-Python will consume the various plugin - types -5. [Misc] H5py compatibility requires a bit more work and a champion to - drive it forward. -6. [Misc] Define ``chunk_store`` API in 3.0 -7. [Misc] Define ``synchronizer`` API in 3.0 - -References ----------- - -1. `Zarr-Python - repository `__ -2. `Zarr core specification (version 3.0) — Zarr specs - documentation `__ -3. `Zarrita repository `__ -4. `Async-Zarr `__ -5. `Zarr-Python Discussion - Topic `__ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000000..8508d8e0cf --- /dev/null +++ b/docs/index.md @@ -0,0 +1,88 @@ +# Zarr-Python + +**Useful links**: +[Source Repository](https://github.com/zarr-developers/zarr-python) | +[Issue Tracker](https://github.com/zarr-developers/zarr-python/issues) | +[Developer Chat](https://ossci.zulipchat.com/) | +[Zarr specifications](https://zarr-specs.readthedocs.io) + + +Zarr is a powerful library for storage of n-dimensional arrays, supporting chunking, +compression, and various backends, making it a versatile choice for scientific and +large-scale data. + +Zarr-Python is a Python library for reading and writing Zarr groups and arrays. Highlights include: + +* Specification support for both Zarr format 2 and 3. +* Create and read from N-dimensional arrays using NumPy-like semantics. +* Flexible storage enables reading and writing from local, cloud and in-memory stores. +* High performance: Enables fast I/O with support for asynchronous I/O and multi-threading. +* Extensible: Customizable with user-defined codecs and stores. + +## Installation + +Zarr requires Python 3.11 or higher. You can install it via `pip`: + +```bash +pip install zarr +``` + +or `conda`: + +```bash +conda install --channel conda-forge zarr +``` + +## Navigating the documentation + +
+ +- [:material-clock-fast:{ .lg .middle } __Quick start__](quick-start.md) + + --- + + New to Zarr? Check out the quick start guide. It contains a brief + introduction to Zarr's main concepts and links to additional tutorials. + + +- [:material-book-open:{ .lg .middle } __User guide__](user-guide/installation.md) + + --- + + A detailed guide for how to use Zarr-Python. + + +- [:material-api:{ .lg .middle } __API Reference__](api/open.md) + + --- + + The reference guide contains a detailed description of the functions, modules, + and objects included in Zarr. The reference describes how the methods work and + which parameters can be used. It assumes that you have an understanding of the + key concepts. + + +- [:material-account-group:{ .lg .middle } __Contributor's Guide__](contributing.md) + + --- + + Want to contribute to Zarr? We welcome contributions in the form of bug reports, + bug fixes, documentation, enhancement proposals and more. The contributing guidelines + will guide you through the process of improving Zarr. + +
+ + +## Project Status + +More information about the Zarr format can be found on the [main website](https://zarr.dev). + +If you are using Zarr-Python, we would [love to hear about it](https://github.com/zarr-developers/community/issues/19). + +### Funding and Support +The project is fiscally sponsored by [NumFOCUS](https://numfocus.org/), a US +501(c)(3) public charity, and development has been supported by the +[MRC Centre for Genomics and Global Health](https://github.com/cggh/) +and the [Chan Zuckerberg Initiative](https://chanzuckerberg.com/). + +[Donate to Zarr](https://numfocus.org/donate-to-zarr) to support the project! diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 83d427e290..0000000000 --- a/docs/index.rst +++ /dev/null @@ -1,113 +0,0 @@ -.. _zarr_docs_mainpage: - -*********** -Zarr-Python -*********** - -.. toctree:: - :maxdepth: 1 - :hidden: - - quickstart - user-guide/index - API reference - release-notes - developers/index - about - -**Version**: |version| - -**Useful links**: -`Source Repository `_ | -`Issue Tracker `_ | -`Developer Chat `_ | -`Zarr specifications `_ - -Zarr-Python is a Python library for reading and writing Zarr groups and arrays. Highlights include: - -* Specification support for both Zarr format 2 and 3. -* Create and read from N-dimensional arrays using NumPy-like semantics. -* Flexible storage enables reading and writing from local, cloud and in-memory stores. -* High performance: Enables fast I/O with support for asynchronous I/O and multi-threading. -* Extensible: Customizable with user-defined codecs and stores. - -.. grid:: 2 - - .. grid-item-card:: - :img-top: _static/index_getting_started.svg - - Quick Start - ^^^^^^^^^^^ - - New to Zarr? Check out the quick start guide. It contains a brief - introduction to Zarr's main concepts and links to additional tutorials. - - +++ - - .. button-ref:: quickstart - :expand: - :color: dark - :click-parent: - - To the Quick Start - - .. grid-item-card:: - :img-top: _static/index_user_guide.svg - - Guide - ^^^^^ - - A detailed guide for how to use Zarr-Python. - - +++ - - .. button-ref:: user-guide/index - :expand: - :color: dark - :click-parent: - - To the user guide - - .. grid-item-card:: - :img-top: _static/index_api.svg - - API Reference - ^^^^^^^^^^^^^ - - The reference guide contains a detailed description of the functions, - modules, and objects included in Zarr. The reference describes how the - methods work and which parameters can be used. It assumes that you have an - understanding of the key concepts. - - +++ - - .. button-ref:: api/zarr/index - :expand: - :color: dark - :click-parent: - - To the API reference - - .. grid-item-card:: - :img-top: _static/index_contribute.svg - - Contributor's Guide - ^^^^^^^^^^^^^^^^^^^ - - Want to contribute to Zarr? We welcome contributions in the form of bug reports, - bug fixes, documentation, enhancement proposals and more. The contributing guidelines - will guide you through the process of improving Zarr. - - +++ - - .. button-ref:: developers/contributing - :expand: - :color: dark - :click-parent: - - To the contributor's guide - - -**Download documentation**: `PDF/Zipped HTML `_ - -.. _NumCodecs: https://numcodecs.readthedocs.io diff --git a/docs/overrides/main.html b/docs/overrides/main.html new file mode 100644 index 0000000000..d61a1f54dc --- /dev/null +++ b/docs/overrides/main.html @@ -0,0 +1,9 @@ + +{% extends "base.html" %} + +{% block outdated %} + You're not viewing the latest version. + + Click here to go to latest. + +{% endblock %} diff --git a/docs/overrides/stylesheets/extra.css b/docs/overrides/stylesheets/extra.css new file mode 100644 index 0000000000..3e2ef3d330 --- /dev/null +++ b/docs/overrides/stylesheets/extra.css @@ -0,0 +1,52 @@ +:root, +[data-md-color-scheme="default"] { + /* --md-primary-fg-color: #cf3f02; + --md-default-fg-color: #443f3f; */ + --boxShadowD: 0px 12px 24px 0px rgba(68, 63, 63, 0.08), + 0px 0px 4px 0px rgba(68, 63, 63, 0.08); +} +body { + margin: 0; + padding: 0; + /* font-size: 16px; */ +} +h1, +h2, +h3, +h4, +h5, +h6 { + font-family: var(--md-heading-font); + font-weight: bold; +} +.md-typeset h1, +.md-typeset h2 { + font-weight: normal; + color: var(--md-default-fg-color); +} +.md-typeset h3, +.md-typeset h4 { + font-weight: bold; + color: var(--md-default-fg-color); +} +.md-button, +.md-typeset .md-button { + font-family: var(--md-heading-font); +} +.md-content .supheading { + font-family: var(--md-heading-font); + text-transform: uppercase; + color: var(--md-primary-fg-color); + font-size: 0.75rem; + font-weight: bold; +} + +.md-header__button.md-logo img, +.md-header__button.md-logo svg { + height: 2rem; + width: auto; +} + +.md-header { + padding: 0.2rem 0; +} diff --git a/docs/quick-start.md b/docs/quick-start.md new file mode 100644 index 0000000000..42ac95d169 --- /dev/null +++ b/docs/quick-start.md @@ -0,0 +1,176 @@ +This section will help you get up and running with +the Zarr library in Python to efficiently manage and analyze multi-dimensional arrays. + +### Creating an Array + +To get started, you can create a simple Zarr array: + +```python exec="true" session="quickstart" +import shutil +shutil.rmtree('data', ignore_errors=True) +import numpy as np +from pprint import pprint +import io +import warnings + +warnings.filterwarnings( + "ignore", + message="Numcodecs codecs are not in the Zarr version 3 specification*", + category=UserWarning +) +np.random.seed(0) +``` + +```python exec="true" session="quickstart" source="above" result="ansi" +import zarr +import numpy as np + +# Create a 2D Zarr array +z = zarr.create_array( + store="data/example-1.zarr", + shape=(100, 100), + chunks=(10, 10), + dtype="f4" +) + +# Assign data to the array +z[:, :] = np.random.random((100, 100)) +print(z.info) +``` + +Here, we created a 2D array of shape `(100, 100)`, chunked into blocks of +`(10, 10)`, and filled it with random floating-point data. This array was +written to a `LocalStore` in the `data/example-1.zarr` directory. + +#### Compression and Filters + +Zarr supports data compression and filters. For example, to use Blosc compression: + + +```python exec="true" session="quickstart" source="above" result="code" + +# Create a 2D Zarr array with Blosc compression +z = zarr.create_array( + store="data/example-2.zarr", + shape=(100, 100), + chunks=(10, 10), + dtype="f4", + compressors=zarr.codecs.BloscCodec( + cname="zstd", + clevel=3, + shuffle=zarr.codecs.BloscShuffle.shuffle + ) +) + +# Assign data to the array +z[:, :] = np.random.random((100, 100)) +print(z.info) +``` + +This compresses the data using the Blosc codec with shuffle enabled for better compression. + + +### Hierarchical Groups + +Zarr allows you to create hierarchical groups, similar to directories: + +```python exec="true" session="quickstart" source="above" result="ansi" + +# Create nested groups and add arrays +root = zarr.group("data/example-3.zarr") +foo = root.create_group(name="foo") +bar = root.create_array( + name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" +) +spam = foo.create_array(name="spam", shape=(10,), dtype="i4") + +# Assign values +bar[:, :] = np.random.random((100, 10)) +spam[:] = np.arange(10) + +# print the hierarchy +print(root.tree()) +``` + +This creates a group with two datasets: `foo` and `bar`. + +#### Batch Hierarchy Creation + +Zarr provides tools for creating a collection of arrays and groups with a single function call. +Suppose we want to copy existing groups and arrays into a new storage backend: + +```python exec="true" session="quickstart" source="above" result="html" + +# Create nested groups and add arrays +root = zarr.group("data/example-4.zarr", attributes={'name': 'root'}) +foo = root.create_group(name="foo") +bar = root.create_array( + name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" +) +nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} +# Report nodes +output = io.StringIO() +pprint(nodes, stream=output, width=60, depth=3) +result = output.getvalue() +print(result) +# Create new hierarchy from nodes +new_nodes = dict(zarr.create_hierarchy(store=zarr.storage.MemoryStore(), nodes=nodes)) +new_root = new_nodes[''] +assert new_root.attrs == root.attrs +``` + +Note that [`zarr.create_hierarchy`][] will only initialize arrays and groups -- copying array data must +be done in a separate step. + +### Persistent Storage + +Zarr supports persistent storage to disk or cloud-compatible backends. While examples above +utilized a [`zarr.storage.LocalStore`][], a number of other storage options are available. + +Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage +using external libraries like [s3fs](https://s3fs.readthedocs.io) or +[gcsfs](https://gcsfs.readthedocs.io): + +```python + +import s3fs + +z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") +z[:, :] = np.random.random((100, 100)) +``` + +A single-file store can also be created using the [`zarr.storage.ZipStore`][]: + +```python exec="true" session="quickstart" source="above" + +# Store the array in a ZIP file +store = zarr.storage.ZipStore("data/example-5.zip", mode="w") + +z = zarr.create_array( + store=store, + shape=(100, 100), + chunks=(10, 10), + dtype="f4" +) + +# write to the array +z[:, :] = np.random.random((100, 100)) + +# the ZipStore must be explicitly closed +store.close() +``` + +To open an existing array from a ZIP file: + +```python exec="true" session="quickstart" source="above" result="code" + +# Open the ZipStore in read-only mode +store = zarr.storage.ZipStore("data/example-5.zip", read_only=True) + +z = zarr.open_array(store, mode='r') + +# read the data as a NumPy Array +print(z[:]) +``` + +Read more about Zarr's storage options in the [User Guide](user-guide/index.md). diff --git a/docs/quickstart.rst b/docs/quickstart.rst deleted file mode 100644 index 66bdae2a2e..0000000000 --- a/docs/quickstart.rst +++ /dev/null @@ -1,209 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - >>> - >>> import numpy as np - >>> np.random.seed(0) - -Quickstart -========== - -Welcome to the Zarr-Python Quickstart guide! This page will help you get up and running with -the Zarr library in Python to efficiently manage and analyze multi-dimensional arrays. - -Zarr is a powerful library for storage of n-dimensional arrays, supporting chunking, -compression, and various backends, making it a versatile choice for scientific and -large-scale data. - -Installation ------------- - -Zarr requires Python 3.11 or higher. You can install it via `pip`: - -.. code-block:: bash - - pip install zarr - -or `conda`: - -.. code-block:: bash - - conda install --channel conda-forge zarr - -Creating an Array ------------------ - -To get started, you can create a simple Zarr array:: - - >>> import zarr - >>> import numpy as np - >>> - >>> # Create a 2D Zarr array - >>> z = zarr.create_array( - ... store="data/example-1.zarr", - ... shape=(100, 100), - ... chunks=(10, 10), - ... dtype="f4" - ... ) - >>> - >>> # Assign data to the array - >>> z[:, :] = np.random.random((100, 100)) - >>> z.info - Type : Array - Zarr format : 3 - Data type : DataType.float32 - Shape : (100, 100) - Chunk shape : (10, 10) - Order : C - Read-only : False - Store type : LocalStore - Codecs : [{'endian': }, {'level': 0, 'checksum': False}] - No. bytes : 40000 (39.1K) - -Here, we created a 2D array of shape ``(100, 100)``, chunked into blocks of -``(10, 10)``, and filled it with random floating-point data. This array was -written to a ``LocalStore`` in the ``data/example-1.zarr`` directory. - -Compression and Filters -~~~~~~~~~~~~~~~~~~~~~~~ - -Zarr supports data compression and filters. For example, to use Blosc compression:: - - >>> z = zarr.create_array( - ... "data/example-3.zarr", - ... mode="w", shape=(100, 100), - ... chunks=(10, 10), dtype="f4", - ... compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) - ... ) - >>> z[:, :] = np.random.random((100, 100)) - >>> - >>> z.info - Type : Array - Zarr format : 3 - Data type : DataType.float32 - Shape : (100, 100) - Chunk shape : (10, 10) - Order : C - Read-only : False - Store type : LocalStore - Codecs : [{'endian': }, {'level': 0, 'checksum': False}] - No. bytes : 40000 (39.1K) - -This compresses the data using the Zstandard codec with shuffle enabled for better compression. - -Hierarchical Groups -------------------- - -Zarr allows you to create hierarchical groups, similar to directories:: - - >>> # Create nested groups and add arrays - >>> root = zarr.group("data/example-2.zarr") - >>> foo = root.create_group(name="foo") - >>> bar = root.create_array( - ... name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" - ... ) - >>> spam = foo.create_array(name="spam", shape=(10,), dtype="i4") - >>> - >>> # Assign values - >>> bar[:, :] = np.random.random((100, 10)) - >>> spam[:] = np.arange(10) - >>> - >>> # print the hierarchy - >>> root.tree() - / - ├── bar (100, 10) float32 - └── foo - └── spam (10,) int32 - - -This creates a group with two datasets: ``foo`` and ``bar``. - -Batch Hierarchy Creation -~~~~~~~~~~~~~~~~~~~~~~~~ - -Zarr provides tools for creating a collection of arrays and groups with a single function call. -Suppose we want to copy existing groups and arrays into a new storage backend: - - >>> # Create nested groups and add arrays - >>> root = zarr.group("data/example-3.zarr", attributes={'name': 'root'}) - >>> foo = root.create_group(name="foo") - >>> bar = root.create_array( - ... name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" - ... ) - >>> nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} - >>> print(nodes) - >>> from zarr.storage import MemoryStore - >>> new_nodes = dict(zarr.create_hierarchy(store=MemoryStore(), nodes=nodes)) - >>> new_root = new_nodes[''] - >>> assert new_root.attrs == root.attrs - -Note that :func:`zarr.create_hierarchy` will only initialize arrays and groups -- copying array data must -be done in a separate step. - -Persistent Storage ------------------- - -Zarr supports persistent storage to disk or cloud-compatible backends. While examples above -utilized a :class:`zarr.storage.LocalStore`, a number of other storage options are available. - -Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage -using external libraries like `s3fs `_ or -`gcsfs `_:: - - >>> import s3fs # doctest: +SKIP - >>> - >>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") # doctest: +SKIP - >>> z[:, :] = np.random.random((100, 100)) # doctest: +SKIP - -A single-file store can also be created using the the :class:`zarr.storage.ZipStore`:: - - >>> # Store the array in a ZIP file - >>> store = zarr.storage.ZipStore("data/example-3.zip", mode='w') - >>> - >>> z = zarr.create_array( - ... store=store, - ... mode="w", - ... shape=(100, 100), - ... chunks=(10, 10), - ... dtype="f4" - ... ) - >>> - >>> # write to the array - >>> z[:, :] = np.random.random((100, 100)) - >>> - >>> # the ZipStore must be explicitly closed - >>> store.close() - -To open an existing array from a ZIP file:: - - >>> # Open the ZipStore in read-only mode - >>> store = zarr.storage.ZipStore("data/example-3.zip", read_only=True) - >>> - >>> z = zarr.open_array(store, mode='r') - >>> - >>> # read the data as a NumPy Array - >>> z[:] - array([[0.66734236, 0.15667458, 0.98720884, ..., 0.36229587, 0.67443246, - 0.34315267], - [0.65787303, 0.9544212 , 0.4830079 , ..., 0.33097172, 0.60423803, - 0.45621237], - [0.27632037, 0.9947008 , 0.42434934, ..., 0.94860053, 0.6226942 , - 0.6386924 ], - ..., - [0.12854576, 0.934397 , 0.19524333, ..., 0.11838563, 0.4967675 , - 0.43074256], - [0.82029045, 0.4671437 , 0.8090906 , ..., 0.7814118 , 0.42650765, - 0.95929915], - [0.4335856 , 0.7565437 , 0.7828931 , ..., 0.48119593, 0.66220033, - 0.6652362 ]], shape=(100, 100), dtype=float32) - -Read more about Zarr's storage options in the :ref:`User Guide `. - -Next Steps ----------- - -Now that you're familiar with the basics, explore the following resources: - -- `User Guide `_ -- `API Reference `_ diff --git a/docs/release-notes.md b/docs/release-notes.md new file mode 100644 index 0000000000..2fa947d5eb --- /dev/null +++ b/docs/release-notes.md @@ -0,0 +1,465 @@ +# Release notes + + + +# zarr 3.1.3 (2025-09-18) + +## Features + +- Add a command-line interface to migrate v2 Zarr metadata to v3. Corresponding functions are also provided under zarr.metadata. ([#1798](https://github.com/zarr-developers/zarr-python/issues/1798)) +- Add obstore implementation of delete_dir. ([#3310](https://github.com/zarr-developers/zarr-python/issues/3310)) +- Adds a registry for chunk key encodings for extensibility. This allows users to implement a custom `ChunkKeyEncoding`, which can be registered via `register_chunk_key_encoding` or as an entry point under `zarr.chunk_key_encoding`. ([#3436](https://github.com/zarr-developers/zarr-python/issues/3436)) +- Trying to open a group at a path were a array already exists now raises a helpful error. ([#3444](https://github.com/zarr-developers/zarr-python/issues/3444)) + +## Bugfixes + +- Prevents creation of groups (.create_group) or arrays (.create_array) as children of an existing array. ([#2582](https://github.com/zarr-developers/zarr-python/issues/2582)) +- Fix a bug preventing `ones_like`, `full_like`, `empty_like`, `zeros_like` and `open_like` functions from accepting an explicit specification of array attributes like shape, dtype, chunks etc. The functions `full_like`, `empty_like`, and `open_like` now also more consistently infer a `fill_value` parameter from the provided array. ([#2992](https://github.com/zarr-developers/zarr-python/issues/2992)) +- LocalStore now uses atomic writes, which should prevent some cases of corrupted data. ([#3411](https://github.com/zarr-developers/zarr-python/issues/3411)) +- Fix a potential race condition when using `zarr.create_array` with the `data` parameter set to a NumPy array. Previously Zarr was iterating over the newly created array with a granularity that was too low. Now Zarr chooses a granularity that matches the size of the stored objects for that array. ([#3422](https://github.com/zarr-developers/zarr-python/issues/3422)) +- Fix ChunkGrid definition (broken in 3.1.2) ([#3425](https://github.com/zarr-developers/zarr-python/issues/3425)) +- Ensure syntax like `root['/subgroup']` works equivalently to `root['subgroup']` when using consolidated metadata. ([#3428](https://github.com/zarr-developers/zarr-python/issues/3428)) +- Creating a new group with `zarr.group` no longer errors. This fixes a regression introduced in version 3.1.2. ([#3431](https://github.com/zarr-developers/zarr-python/issues/3431)) +- Setting `fill_value` to a float like `0.0` when the data type of the array is an integer is a common mistake. This change lets Zarr Python read arrays with this erroneous metadata, although Zarr Python will not create such arrays. ([#3448](https://github.com/zarr-developers/zarr-python/issues/3448)) + +## Deprecations and Removals + +- The `Store.set_partial_writes` method, which was not used by Zarr-Python, has been removed. `store.supports_partial_writes` is now always `False`. ([#2859](https://github.com/zarr-developers/zarr-python/issues/2859)) + +## Misc + +- [#3376](https://github.com/zarr-developers/zarr-python/issues/3376), [#3390](https://github.com/zarr-developers/zarr-python/issues/3390), [#3403](https://github.com/zarr-developers/zarr-python/issues/3403), [#3449](https://github.com/zarr-developers/zarr-python/issues/3449) + +## 3.1.2 (2025-08-25) + +### Features + +- Added support for async vectorized and orthogonal indexing. ([#3083](https://github.com/zarr-developers/zarr-python/issues/3083)) +- Make config param optional in init_array ([#3391](https://github.com/zarr-developers/zarr-python/issues/3391)) + +### Bugfixes + +- Ensure that -0.0 is not considered equal to 0.0 when checking if all the values in a chunk are equal to an array's fill value. ([#3144](https://github.com/zarr-developers/zarr-python/issues/3144)) +- Fix a bug in `create_array` caused by iterating over chunk-aligned regions instead of shard-aligned regions when writing data. Additionally, the behavior of `nchunks_initialized` has been adjusted. This function consistently reports the number of chunks present in stored objects, even when the array uses the sharding codec. ([#3299](https://github.com/zarr-developers/zarr-python/issues/3299)) +- Opening an array or group with `mode="r+"` will no longer create new arrays or groups. ([#3307](https://github.com/zarr-developers/zarr-python/issues/3307)) +- Added `zarr.errors.ArrayNotFoundError`, which is raised when attempting to open a zarr array that does not exist, and `zarr.errors.NodeNotFoundError`, which is raised when failing to open an array or a group in a context where either an array or a group was expected. ([#3367](https://github.com/zarr-developers/zarr-python/issues/3367)) +- Ensure passing `config` is handled properly when `open`ing an existing array. ([#3378](https://github.com/zarr-developers/zarr-python/issues/3378)) +- Raise a Zarr-specific error class when a codec can't be found by name when deserializing the given codecs. This avoids hiding this error behind a "not part of a zarr hierarchy" warning. ([#3395](https://github.com/zarr-developers/zarr-python/issues/3395)) + +### Misc + +- [#3098](https://github.com/zarr-developers/zarr-python/issues/3098), [#3288](https://github.com/zarr-developers/zarr-python/issues/3288), [#3318](https://github.com/zarr-developers/zarr-python/issues/3318), [#3368](https://github.com/zarr-developers/zarr-python/issues/3368), [#3371](https://github.com/zarr-developers/zarr-python/issues/3371), [#3372](https://github.com/zarr-developers/zarr-python/issues/3372), [#3374](https://github.com/zarr-developers/zarr-python/issues/3374) + +## 3.1.1 (2025-07-28) + +### Features + +- Add lightweight implementations of `.getsize()` and `.getsize_prefix()` for ObjectStore. ([#3227](https://github.com/zarr-developers/zarr-python/issues/3227)) + +### Bugfixes + +- Creating a Zarr format 2 array with the `order` keyword argument no longer raises a warning. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- Fixed the error message when passing both `config` and `write_empty_chunks` arguments to reflect the current behaviour (`write_empty_chunks` takes precedence). ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- Creating a Zarr format 3 array with the `order` argument now consistently ignores this argument and raises a warning. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- When using [`from_array`][zarr.api.asynchronous.from_array] to copy a Zarr format 2 array to a Zarr format 3 array, if the memory order of the input array is `"F"` a warning is raised and the order ignored. This is because Zarr format 3 arrays are always stored in "C" order. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- The `config` argument to [`zarr.create`][zarr.create] (and functions that create arrays) is now used - previously it had no effect. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- Ensure that all abstract methods of [`ZDType`][zarr.core.dtype.ZDType] raise a `NotImplementedError` when invoked. ([#3251](https://github.com/zarr-developers/zarr-python/issues/3251)) +- Register 'gpu' marker with pytest for downstream StoreTests. ([#3258](https://github.com/zarr-developers/zarr-python/issues/3258)) +- Expand the range of types accepted by `parse_data_type` to include strings and Sequences. +- Move the functionality of `zarr.core.dtype.parse_data_type` to a new function called `zarr.dtype.parse_dtype`. This change ensures that nomenclature is consistent across the codebase. `zarr.core.dtype.parse_data_type` remains, so this change is not breaking. ([#3264](https://github.com/zarr-developers/zarr-python/issues/3264)) +- Fix a regression introduced in 3.1.0 that prevented `inf`, `-inf`, and `nan` values from being stored in `attributes`. ([#3280](https://github.com/zarr-developers/zarr-python/issues/3280)) +- Fixes [`Group.nmembers()`][zarr.Group.nmembers] ignoring depth when using consolidated metadata. ([#3287](https://github.com/zarr-developers/zarr-python/issues/3287)) + +### Improved Documentation + +- Expand the data type docs to include a demonstration of the `parse_data_type` function. Expand the docstring for the `parse_data_type` function. ([#3249](https://github.com/zarr-developers/zarr-python/issues/3249)) +- Add a section on codecs to the migration guide. ([#3273](https://github.com/zarr-developers/zarr-python/issues/3273)) + +### Misc + +- Remove warnings about vlen-utf8 and vlen-bytes codecs ([#3268](https://github.com/zarr-developers/zarr-python/issues/3268)) + +## 3.1.0 (2025-07-14) + +### Features + +- Ensure that invocations of `create_array` use consistent keyword arguments, with consistent defaults. + + [`zarr.api.synchronous.create_array`][] now takes a `write_data` keyword argument + The `Group.create_array` method takes `data` and `write_data` keyword arguments. + The functions [`zarr.api.asynchronous.create`][], [`zarr.api.asynchronous.create_array`] + and the methods `Group.create_array`, `Group.array`, had the default + `fill_value` changed from `0` to the `DEFAULT_FILL_VALUE` value, which instructs Zarr to + use the default scalar value associated with the array's data type as the fill value. These are + all functions or methods for array creation that mirror, wrap or are wrapped by, another function + that already has a default `fill_value` set to `DEFAULT_FILL_VALUE`. This change is necessary + to make these functions consistent across the entire codebase, but as this changes default values, + new data might have a different fill value than expected after this change. + + For data types where 0 is meaningful, like integers or floats, the default scalar is 0, so this + change should not be noticeable. For data types where 0 is ambiguous, like fixed-length unicode + strings, the default fill value might be different after this change. Users who were relying on how + Zarr interpreted `0` as a non-numeric scalar value should set their desired fill value explicitly + after this change. + +- Added public API for Buffer ABCs and implementations. + + Use `zarr.buffer` to access buffer implementations, and + `zarr.abc.buffer` for the interface to implement new buffer types. + + Users previously importing buffer from `zarr.core.buffer` should update their + imports to use `zarr.buffer`. As a reminder, all of `zarr.core` is + considered a private API that's not covered by zarr-python's versioning policy. ([#2871](https://github.com/zarr-developers/zarr-python/issues/2871)) + +- Adds zarr-specific data type classes. + + This change adds a `ZDType` base class for Zarr V2 and Zarr V3 data types. Child classes are + defined for each NumPy data type. Each child class defines routines for `JSON` serialization. + New data types can be created and registered dynamically. + + Prior to this change, Zarr Python had two streams for handling data types. For Zarr V2 arrays, + we used NumPy data type identifiers. For Zarr V3 arrays, we used a fixed set of string enums. Both + of these systems proved hard to extend. + + This change is largely internal, but it does change the type of the `dtype` and `data_type` + fields on the `ArrayV2Metadata` and `ArrayV3Metadata` classes. Previously, `ArrayV2Metadata.dtype` + was a NumPy `dtype` object, and `ArrayV3Metadata.data_type` was an internally-defined `enum`. + After this change, both `ArrayV2Metadata.dtype` and `ArrayV3Metadata.data_type` are instances of + `ZDType`. A NumPy data type can be generated from a `ZDType` via the `ZDType.to_native_dtype()` + method. The internally-defined Zarr V3 `enum` class is gone entirely, but the `ZDType.to_json(zarr_format=3)` + method can be used to generate either a string, or dictionary that has a string `name` field, that + represents the string value previously associated with that `enum`. + + For more on this new feature, see the [documentation](user-guide/data_types.md) ([#2874](https://github.com/zarr-developers/zarr-python/issues/2874)) + +- Added `NDBuffer.empty` method for faster ndbuffer initialization. ([#3191](https://github.com/zarr-developers/zarr-python/issues/3191)) + +- The minimum version of NumPy has increased to 1.26. ([#3226](https://github.com/zarr-developers/zarr-python/issues/3226)) + +- Add an alternate `from_array_metadata_and_store` constructor to `CodecPipeline`. ([#3233](https://github.com/zarr-developers/zarr-python/issues/3233)) + +### Bugfixes + +- Fixes a variety of issues related to string data types. + + - Brings the `VariableLengthUTF8` data type Zarr V3 identifier in alignment with Zarr Python 3.0.8 + - Disallows creation of 0-length fixed-length data types + - Adds a regression test for the `VariableLengthUTF8` data type that checks against version 3.0.8 + - Allows users to request the `VariableLengthUTF8` data type with `str`, `"str"`, or `"string"`. ([#3170](https://github.com/zarr-developers/zarr-python/issues/3170)) + +- Add human readable size for No. bytes stored to `info_complete` ([#3190](https://github.com/zarr-developers/zarr-python/issues/3190)) + +- Restores the ability to create a Zarr V2 array with a `null` fill value by introducing a new + class `DefaultFillValue`, and setting the default value of the `fill_value` parameter in array + creation routines to an instance of `DefaultFillValue`. For Zarr V3 arrays, `None` will act as an + alias for a `DefaultFillValue` instance, thus preserving compatibility with existing code. ([#3198](https://github.com/zarr-developers/zarr-python/issues/3198)) + +- Fix the type of `ArrayV2Metadata.codec` to constrain it to `numcodecs.abc.Codec | None`. + Previously the type was more permissive, allowing objects that can be parsed into Codecs (e.g., the codec name). + The constructor of `ArrayV2Metadata` still allows the permissive input when creating new objects. ([#3232](https://github.com/zarr-developers/zarr-python/issues/3232)) + +### Improved Documentation + +- Add a self-contained example of data type extension to the `examples` directory, and expanded + the documentation for data types. ([#3157](https://github.com/zarr-developers/zarr-python/issues/3157)) + +- Add a description on how to create a RemoteStore of a specific filesystem to the `Remote Store` section in `docs/user-guide/storage.md`. + State in the docstring of `FsspecStore.from_url` that the filesystem type is inferred from the URL scheme. + + It should help a user handling the case when the type of FsspecStore doesn't match the URL scheme. ([#3212](https://github.com/zarr-developers/zarr-python/issues/3212)) + +### Deprecations and Removals + +- Removes default chunk encoding settings (filters, serializer, compressors) from the global + configuration object. + + This removal is justified on the basis that storing chunk encoding settings in the config required + a brittle, confusing, and inaccurate categorization of array data types, which was particularly + unsuitable after the recent addition of new data types that didn't fit naturally into the + pre-existing categories. + + The default chunk encoding is the same (Zstandard compression, and the required object codecs for + variable length data types), but the chunk encoding is now generated by functions that cannot be + reconfigured at runtime. Users who relied on setting the default chunk encoding via the global configuration object should + instead specify the desired chunk encoding explicitly when creating an array. + + This change also adds an extra validation step to the creation of Zarr V2 arrays, which ensures that + arrays with a `VariableLengthUTF8` or `VariableLengthBytes` data type cannot be created without the + correct "object codec". ([#3228](https://github.com/zarr-developers/zarr-python/issues/3228)) + +- Removes support for passing keyword-only arguments positionally to the following functions and methods: + `save_array`, `open`, `group`, `open_group`, `create`, `get_basic_selection`, `set_basic_selection`, + `get_orthogonal_selection`, `set_orthogonal_selection`, `get_mask_selection`, `set_mask_selection`, + `get_coordinate_selection`, `set_coordinate_selection`, `get_block_selection`, `set_block_selection`, + `Group.create_array`, `Group.empty`, `Group.zeroes`, `Group.ones`, `Group.empty_like`, `Group.full`, + `Group.zeros_like`, `Group.ones_like`, `Group.full_like`, `Group.array`. Prior to this change, + passing a keyword-only argument positionally to one of these functions or methods would raise a + deprecation warning. That warning is now gone. Passing keyword-only arguments to these functions + and methods positionally is now an error. + +## 3.0.10 (2025-07-03) + +### Bugfixes + +- Removed an unnecessary check from `_fsspec._make_async` that would raise an exception when + creating a read-only store backed by a local file system with `auto_mkdir` set to `False`. ([#3193](https://github.com/zarr-developers/zarr-python/issues/3193)) + +- Add missing import for AsyncFileSystemWrapper for _make_async in _fsspec.py ([#3195](https://github.com/zarr-developers/zarr-python/issues/3195)) + +## 3.0.9 (2025-06-30) + +### Features + +- Add `zarr.storage.FsspecStore.from_mapper()` so that `zarr.open()` supports stores of type `fsspec.mapping.FSMap`. ([#2774](https://github.com/zarr-developers/zarr-python/issues/2774)) + +- Implemented `move` for `LocalStore` and `ZipStore`. This allows users to move the store to a different root path. ([#3021](https://github.com/zarr-developers/zarr-python/issues/3021)) + +- Added `zarr.errors.GroupNotFoundError`, which is raised when attempting to open a group that does not exist. ([#3066](https://github.com/zarr-developers/zarr-python/issues/3066)) + +- Adds `fill_value` to the list of attributes displayed in the output of the `AsyncArray.info()` method. ([#3081](https://github.com/zarr-developers/zarr-python/issues/3081)) + +- Use `numpy.zeros` instead of `np.full` for a performance speedup when creating a `zarr.core.buffer.NDBuffer` with `fill_value=0`. ([#3082](https://github.com/zarr-developers/zarr-python/issues/3082)) + +- Port more stateful testing actions from [Icechunk](https://icechunk.io). ([#3130](https://github.com/zarr-developers/zarr-python/issues/3130)) + +- Adds a `with_read_only` convenience method to the `Store` abstract base class (raises `NotImplementedError`) and implementations to the `MemoryStore`, `ObjectStore`, `LocalStore`, and `FsspecStore` classes. ([#3138](https://github.com/zarr-developers/zarr-python/issues/3138)) + +### Bugfixes + +- Ignore stale child metadata when reconsolidating metadata. ([#2921](https://github.com/zarr-developers/zarr-python/issues/2921)) + +- For Zarr format 2, allow fixed-length string arrays to be created without automatically inserting a + `Vlen-UT8` codec in the array of filters. Fixed-length string arrays do not need this codec. This + change fixes a regression where fixed-length string arrays created with Zarr Python 3 could not be read with Zarr Python 2.18. ([#3100](https://github.com/zarr-developers/zarr-python/issues/3100)) + +- When creating arrays without explicitly specifying a chunk size using `zarr.create` and other + array creation routines, the chunk size will now set automatically instead of defaulting to the data shape. + For large arrays this will result in smaller default chunk sizes. + To retain previous behaviour, explicitly set the chunk shape to the data shape. + + This fix matches the existing chunking behaviour of + `zarr.save_array` and `zarr.api.asynchronous.AsyncArray.create`. ([#3103](https://github.com/zarr-developers/zarr-python/issues/3103)) + +- When `zarr.save` has an argument `path=some/path/` and multiple arrays in `args`, the path resulted in `some/path/some/path` due to using the `path` + argument twice while building the array path. This is now fixed. ([#3127](https://github.com/zarr-developers/zarr-python/issues/3127)) + +- Fix `zarr.open` default for argument `mode` when `store` is `read_only` ([#3128](https://github.com/zarr-developers/zarr-python/issues/3128)) + +- Suppress `FileNotFoundError` when deleting non-existent keys in the `obstore` adapter. + + When writing empty chunks (i.e. chunks where all values are equal to the array's fill value) to a zarr array, zarr + will delete those chunks from the underlying store. For zarr arrays backed by the `obstore` adapter, this will potentially + raise a `FileNotFoundError` if the chunk doesn't already exist. + Since whether or not a delete of a non-existing object raises an error depends on the behavior of the underlying store, + suppressing the error in all cases results in consistent behavior across stores, and is also what `zarr` seems to expect + from the store. ([#3140](https://github.com/zarr-developers/zarr-python/issues/3140)) + +- Trying to open a StorePath/Array with `mode='r'` when the store is not read-only creates a read-only copy of the store. ([#3156](https://github.com/zarr-developers/zarr-python/issues/3156)) + +## 3.0.8 (2025-05-19) + +!!! warning + + In versions 3.0.0 to 3.0.7 opening arrays or groups with `mode='a'` (the default for many builtin functions) would cause any existing paths in the store to be deleted. This is fixed in 3.0.8, and we recommend all users upgrade to avoid this bug that could cause unintentional data loss. + +### Features + +- Added a `print_debug_info` function for bug reports. ([#2913](https://github.com/zarr-developers/zarr-python/issues/2913)) + +### Bugfixes + +- Fix a bug that prevented the number of initialized chunks being counted properly. ([#2862](https://github.com/zarr-developers/zarr-python/issues/2862)) +- Fixed sharding with GPU buffers. ([#2978](https://github.com/zarr-developers/zarr-python/issues/2978)) +- Fix structured `dtype` fill value serialization for consolidated metadata ([#2998](https://github.com/zarr-developers/zarr-python/issues/2998)) +- It is now possible to specify no compressor when creating a zarr format 2 array. + This can be done by passing `compressor=None` to the various array creation routines. + + The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given. + To reproduce the behaviour in previous zarr-python versions when `compressor=None` was passed, pass `compressor='auto'` instead. ([#3039](https://github.com/zarr-developers/zarr-python/issues/3039)) +- Fixed the typing of `dimension_names` arguments throughout so that it now accepts iterables that contain `None` alongside `str`. ([#3045](https://github.com/zarr-developers/zarr-python/issues/3045)) +- Using various functions to open data with `mode='a'` no longer deletes existing data in the store. ([#3062](https://github.com/zarr-developers/zarr-python/issues/3062)) +- Internally use `typesize` constructor parameter for `numcodecs.blosc.Blosc` to improve compression ratios back to the v2-package levels. ([#2962](https://github.com/zarr-developers/zarr-python/issues/2962)) +- Specifying the memory order of Zarr format 2 arrays using the `order` keyword argument has been fixed. ([#2950](https://github.com/zarr-developers/zarr-python/issues/2950)) + +### Misc + +- [#2972](https://github.com/zarr-developers/zarr-python/issues/2972), [#3027](https://github.com/zarr-developers/zarr-python/issues/3027), [#3049](https://github.com/zarr-developers/zarr-python/issues/3049) + +## 3.0.7 (2025-04-22) + +### Features + +- Add experimental ObjectStore storage class based on obstore. ([#1661](https://github.com/zarr-developers/zarr-python/issues/1661)) +- Add `zarr.from_array` using concurrent streaming of source data ([#2622](https://github.com/zarr-developers/zarr-python/issues/2622)) + +### Bugfixes + +- 0-dimensional arrays are now returning a scalar. Therefore, the return type of `__getitem__` changed + to NDArrayLikeOrScalar. This change is to make the behavior of 0-dimensional arrays consistent with + `numpy` scalars. ([#2718](https://github.com/zarr-developers/zarr-python/issues/2718)) +- Fix `fill_value` serialization for `NaN` in `ArrayV2Metadata` and add property-based testing of round-trip serialization ([#2802](https://github.com/zarr-developers/zarr-python/issues/2802)) +- Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be + consistent with the behavior of `ArrayMetadata`. ([#2996](https://github.com/zarr-developers/zarr-python/issues/2996)) + +### Improved Documentation + +- Updated the 3.0 migration guide to include the removal of "." syntax for getting group members. ([#2991](https://github.com/zarr-developers/zarr-python/issues/2991), [#2997](https://github.com/zarr-developers/zarr-python/issues/2997)) + +### Misc + +- Define a new versioning policy based on Effective Effort Versioning. This replaces the old Semantic + Versioning-based policy. ([#2924](https://github.com/zarr-developers/zarr-python/issues/2924), [#2910](https://github.com/zarr-developers/zarr-python/issues/2910)) +- Make warning filters in the tests more specific, so warnings emitted by tests added in the future + are more likely to be caught instead of ignored. ([#2714](https://github.com/zarr-developers/zarr-python/issues/2714)) +- Avoid an unnecessary memory copy when writing Zarr to a local file ([#2944](https://github.com/zarr-developers/zarr-python/issues/2944)) + +## 3.0.6 (2025-03-20) + +### Bugfixes + +- Restore functionality of `del z.attrs['key']` to actually delete the key. ([#2908](https://github.com/zarr-developers/zarr-python/issues/2908)) + +## 3.0.5 (2025-03-07) + +### Bugfixes + +- Fixed a bug where `StorePath` creation would not apply standard path normalization to the `path` parameter, + which led to the creation of arrays and groups with invalid keys. ([#2850](https://github.com/zarr-developers/zarr-python/issues/2850)) +- Prevent update_attributes calls from deleting old attributes ([#2870](https://github.com/zarr-developers/zarr-python/issues/2870)) + +### Misc + +- [#2796](https://github.com/zarr-developers/zarr-python/issues/2796) + +## 3.0.4 (2025-02-23) + +### Features + +- Adds functions for concurrently creating multiple arrays and groups. ([#2665](https://github.com/zarr-developers/zarr-python/issues/2665)) + +### Bugfixes + +- Fixed a bug where `ArrayV2Metadata` could save `filters` as an empty array. ([#2847](https://github.com/zarr-developers/zarr-python/issues/2847)) +- Fix a bug when setting values of a smaller last chunk. ([#2851](https://github.com/zarr-developers/zarr-python/issues/2851)) + +### Misc + +- [#2828](https://github.com/zarr-developers/zarr-python/issues/2828) + +## 3.0.3 (2025-02-14) + +### Features + +- Improves performance of FsspecStore.delete_dir for remote filesystems supporting concurrent/batched deletes, e.g., s3fs. ([#2661](https://github.com/zarr-developers/zarr-python/issues/2661)) +- Added `zarr.config.enable_gpu` to update Zarr's configuration to use GPUs. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) +- Avoid reading chunks during writes where possible. [#757](https://github.com/zarr-developers/zarr-python/issues/757) ([#2784](https://github.com/zarr-developers/zarr-python/issues/2784)) +- `LocalStore` learned to `delete_dir`. This makes array and group deletes more efficient. ([#2804](https://github.com/zarr-developers/zarr-python/issues/2804)) +- Add `zarr.testing.strategies.array_metadata` to generate ArrayV2Metadata and ArrayV3Metadata instances. ([#2813](https://github.com/zarr-developers/zarr-python/issues/2813)) +- Add arbitrary `shards` to Hypothesis strategy for generating arrays. ([#2822](https://github.com/zarr-developers/zarr-python/issues/2822)) + +### Bugfixes + +- Fixed bug with Zarr using device memory, instead of host memory, for storing metadata when using GPUs. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) +- The array returned by `zarr.empty` and an empty `zarr.core.buffer.cpu.NDBuffer` will now be filled with the + specified fill value, or with zeros if no fill value is provided. + This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes. ([#2755](https://github.com/zarr-developers/zarr-python/issues/2755)) +- Fix zip-store path checking for stores with directories listed as files. ([#2758](https://github.com/zarr-developers/zarr-python/issues/2758)) +- Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list` ([#2778](https://github.com/zarr-developers/zarr-python/issues/2778)) +- Enable automatic removal of `needs release notes` with labeler action ([#2781](https://github.com/zarr-developers/zarr-python/issues/2781)) +- Use the proper label config ([#2785](https://github.com/zarr-developers/zarr-python/issues/2785)) +- Alters the behavior of `create_array` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object. ([#2795](https://github.com/zarr-developers/zarr-python/issues/2795)) +- Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types ([#2799](https://github.com/zarr-developers/zarr-python/issues/2799)) +- Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests ([#2801](https://github.com/zarr-developers/zarr-python/issues/2801)) +- Fix pickling for ZipStore ([#2807](https://github.com/zarr-developers/zarr-python/issues/2807)) +- Update numcodecs to not overwrite codec configuration ever. Closes [#2800](https://github.com/zarr-developers/zarr-python/issues/2800). ([#2811](https://github.com/zarr-developers/zarr-python/issues/2811)) +- Fix fancy indexing (e.g. arr[5, [0, 1]]) with the sharding codec ([#2817](https://github.com/zarr-developers/zarr-python/issues/2817)) + +### Improved Documentation + +- Added new user guide on GPU. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) + +## 3.0.2 (2025-01-31) + +### Features + +- Test `getsize()` and `getsize_prefix()` in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Test that a `ValueError` is raised for invalid byte range syntax in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Separate instantiating and opening a store in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Add a test for using Stores as a context managers in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Implemented `LogingStore.open()`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- `LoggingStore` is now a generic class. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Change StoreTest's `test_store_repr`, `test_store_supports_writes`, + `test_store_supports_partial_writes`, and `test_store_supports_listing` + to to be implemented using `@abstractmethod`, rather raising `NotImplementedError`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Test the error raised for invalid buffer arguments in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Test that data can be written to a store that's not yet open using the store.set method in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Adds a new function `init_array` for initializing an array in storage, and refactors `create_array` + to use `init_array`. `create_array` takes two new parameters: `data`, an optional array-like object, and `write_data`, a bool which defaults to `True`. + If `data` is given to `create_array`, then the `dtype` and `shape` attributes of `data` are used to define the + corresponding attributes of the resulting Zarr array. Additionally, if `data` given and `write_data` is `True`, + then the values in `data` will be written to the newly created array. ([#2761](https://github.com/zarr-developers/zarr-python/issues/2761)) + +### Bugfixes + +- Wrap sync fsspec filesystems with `AsyncFileSystemWrapper`. ([#2533](https://github.com/zarr-developers/zarr-python/issues/2533)) +- Added backwards compatibility for Zarr format 2 structured arrays. ([#2681](https://github.com/zarr-developers/zarr-python/issues/2681)) +- Update equality for `LoggingStore` and `WrapperStore` such that 'other' must also be a `LoggingStore` or `WrapperStore` respectively, rather than only checking the types of the stores they wrap. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Ensure that `ZipStore` is open before getting or setting any values. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Use stdout rather than stderr as the default stream for `LoggingStore`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Match the errors raised by read only stores in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Fixed `ZipStore` to make sure the correct attributes are saved when instances are pickled. + This fixes a previous bug that prevent using `ZipStore` with a `ProcessPoolExecutor`. ([#2762](https://github.com/zarr-developers/zarr-python/issues/2762)) +- Updated the optional test dependencies to include `botocore` and `fsspec`. ([#2768](https://github.com/zarr-developers/zarr-python/issues/2768)) +- Fixed the fsspec tests to skip if `botocore` is not installed. + Previously they would have failed with an import error. ([#2768](https://github.com/zarr-developers/zarr-python/issues/2768)) +- Optimize full chunk writes. ([#2782](https://github.com/zarr-developers/zarr-python/issues/2782)) + +### Improved Documentation + +- Changed the machinery for creating changelog entries. + Now individual entries should be added as files to the `changes` directory in the `zarr-python` repository, instead of directly to the changelog file. ([#2736](https://github.com/zarr-developers/zarr-python/issues/2736)) + +### Other + +- Created a type alias `ChunkKeyEncodingLike` to model the union of `ChunkKeyEncoding` instances and the dict form of the + parameters of those instances. `ChunkKeyEncodingLike` should be used by high-level functions to provide a convenient + way for creating `ChunkKeyEncoding` objects. ([#2763](https://github.com/zarr-developers/zarr-python/issues/2763)) + +## 3.0.1 (Jan. 17, 2025) + +* Implement `zarr.from_array` using concurrent streaming ([#2622](https://github.com/zarr-developers/zarr-python/issues/2622)). + +### Bug fixes + +* Fixes `order` argument for Zarr format 2 arrays ([#2679](https://github.com/zarr-developers/zarr-python/issues/2679)). +* Fixes a bug that prevented reading Zarr format 2 data with consolidated + metadata written using `zarr-python` version 2 ([#2694](https://github.com/zarr-developers/zarr-python/issues/2694)). +* Ensure that compressor=None results in no compression when writing Zarr + format 2 data ([#2708](https://github.com/zarr-developers/zarr-python/issues/2708)). +* Fix for empty consolidated metadata dataset: backwards compatibility with + Zarr-Python 2 ([#2695](https://github.com/zarr-developers/zarr-python/issues/2695)). + +### Documentation + +* Add v3.0.0 release announcement banner ([#2677](https://github.com/zarr-developers/zarr-python/issues/2677)). +* Quickstart guide alignment with V3 API ([#2697](https://github.com/zarr-developers/zarr-python/issues/2697)). +* Fix doctest failures related to numcodecs 0.15 ([#2727](https://github.com/zarr-developers/zarr-python/issues/2727)). + +### Other + +* Removed some unnecessary files from the source distribution + to reduce its size. ([#2686](https://github.com/zarr-developers/zarr-python/issues/2686)). +* Enable codecov in GitHub actions ([#2682](https://github.com/zarr-developers/zarr-python/issues/2682)). +* Speed up hypothesis tests ([#2650](https://github.com/zarr-developers/zarr-python/issues/2650)). +* Remove multiple imports for an import name ([#2723](https://github.com/zarr-developers/zarr-python/issues/2723)). + +## 3.0.0 (Jan. 9, 2025) + +3.0.0 is a new major release of Zarr-Python, with many breaking changes. +See the [v3 migration guide](user-guide/v3_migration.md) for a listing of what's changed. + +Normal release note service will resume with further releases in the 3.0.0 +series. + +Release notes for the zarr-python 2.x and 1.x releases can be found here: +https://zarr.readthedocs.io/en/support-v2/release.html diff --git a/docs/release-notes.rst b/docs/release-notes.rst deleted file mode 100644 index 8a6061b40e..0000000000 --- a/docs/release-notes.rst +++ /dev/null @@ -1,561 +0,0 @@ -Release notes -============= - -.. towncrier release notes start - -zarr 3.1.3 (2025-09-18) ------------------------ - -Features -~~~~~~~~ - -- Add a command-line interface to migrate v2 Zarr metadata to v3. Corresponding functions are also - provided under zarr.metadata. (:issue:`1798`) -- Add obstore implementation of delete_dir. (:issue:`3310`) -- Adds a registry for chunk key encodings for extensibility. - This allows users to implement a custom `ChunkKeyEncoding`, which can be registered via `register_chunk_key_encoding` or as an entry point under `zarr.chunk_key_encoding`. (:issue:`3436`) -- Trying to open a group at a path were a array already exists now raises a helpful error. (:issue:`3444`) - - -Bugfixes -~~~~~~~~ - -- Prevents creation of groups (.create_group) or arrays (.create_array) as children - of an existing array. (:issue:`2582`) -- Fix a bug preventing ``ones_like``, ``full_like``, ``empty_like``, ``zeros_like`` and ``open_like`` functions from accepting - an explicit specification of array attributes like shape, dtype, chunks etc. The functions ``full_like``, - ``empty_like``, and ``open_like`` now also more consistently infer a ``fill_value`` parameter from the provided array. (:issue:`2992`) -- LocalStore now uses atomic writes, which should prevent some cases of corrupted data. (:issue:`3411`) -- Fix a potential race condition when using :func:`zarr.create_array` with the ``data`` parameter - set to a NumPy array. Previously Zarr was iterating over the newly created array with a granularity - that was too low. Now Zarr chooses a granularity that matches the size of the stored objects for - that array. (:issue:`3422`) -- Fix ChunkGrid definition (broken in 3.1.2) (:issue:`3425`) -- Ensure syntax like ``root['/subgroup']`` works equivalently to ``root['subgroup']`` when using consolidated metadata. (:issue:`3428`) -- Creating a new group with `zarr.group` no longer errors. - This fixes a regression introduced in version 3.1.2. (:issue:`3431`) -- Setting ``fill_value`` to a float like ``0.0`` when the data type of the array is an integer is a common - mistake. This change lets Zarr Python read arrays with this erroneous metadata, although Zarr Python - will not create such arrays. (:issue:`3448`) - - -Deprecations and Removals -~~~~~~~~~~~~~~~~~~~~~~~~~ - -- The ``Store.set_partial_writes`` method, which was not used by Zarr-Python, has been removed. - ``store.supports_partial_writes`` is now always ``False``. (:issue:`2859`) - - -Misc -~~~~ - -- :issue:`3376`, :issue:`3390`, :issue:`3403`, :issue:`3449` - - -3.1.2 (2025-08-25) ------------------- - -Features -~~~~~~~~ - -- Added support for async vectorized and orthogonal indexing. (:issue:`3083`) -- Make config param optional in init_array (:issue:`3391`) - - -Bugfixes -~~~~~~~~ - -- Ensure that -0.0 is not considered equal to 0.0 when checking if all the values in a chunk are equal to an array's fill value.``` (:issue:`3144`) -- Fix a bug in ``create_array`` caused by iterating over chunk-aligned regions instead of - shard-aligned regions when writing data. Additionally, the behavior of ``nchunks_initialized`` - has been adjusted. This function consistently reports the number of chunks present in stored objects, - even when the array uses the sharding codec. (:issue:`3299`) -- Opening an array or group with ``mode="r+"`` will no longer create new arrays or groups. (:issue:`3307`) -- Added `zarr.errors.ArrayNotFoundError`, which is raised when attempting to open a zarr array that does not exist, and `zarr.errors.NodeNotFoundError`, which is raised when failing to open an array or a group in a context where either an array or a group was expected. (:issue:`3367`) -- Ensure passing `config` is handled properly when `open`ing an existing - array. (:issue:`3378`) -- Raise a Zarr-specific error class when a codec can't be found by name when deserializing the given codecs. This avoids hiding this error behind a "not part of a zarr hierarchy" warning. (:issue:`3395`) - - -Misc -~~~~ - -- :issue:`3098`, :issue:`3288`, :issue:`3318`, :issue:`3368`, :issue:`3371`, :issue:`3372`, :issue:`3374` - - -3.1.1 (2025-07-28) ------------------- - -Features -~~~~~~~~ - -- Add lightweight implementations of .getsize() and .getsize_prefix() for ObjectStore. (:issue:`3227`) - - -Bugfixes -~~~~~~~~ - -- Creating a Zarr format 2 array with the ``order`` keyword argument no longer raises a warning. (:issue:`3112`) -- Fixed the error message when passing both ``config`` and ``write_empty_chunks`` arguments to reflect the current behaviour (``write_empty_chunks`` takes precedence). (:issue:`3112`) -- Creating a Zarr format 3 array with the ``order`` argument now conistently ignores this argument and raises a warning. (:issue:`3112`) -- When using ``from_array`` to copy a Zarr format 2 array to a Zarr format 3 array, if the memory order of the input array is ``"F"`` a warning is raised and the order ignored. - This is because Zarr format 3 arrays are always stored in "C" order. (:issue:`3112`) -- The ``config`` argument to `zarr.create` (and functions that create arrays) is now used - previously it had no effect. (:issue:`3112`) -- Ensure that all abstract methods of ``ZDType`` raise a ``NotImplementedError`` when invoked. (:issue:`3251`) -- Register 'gpu' marker with pytest for downstream StoreTests. (:issue:`3258`) -- Expand the range of types accepted by ``parse_data_type`` to include strings and Sequences. -- Move the functionality of ``parse_data_type`` to a new function called ``parse_dtype``. This change - ensures that nomenclature is consistent across the codebase. ``parse_data_type`` remains, so this - change is not breaking. (:issue:`3264`) -- Fix a regression introduced in 3.1.0 that prevented ``inf``, ``-inf``, and ``nan`` values - from being stored in ``attributes``. (:issue:`3280`) -- Fixes Group.nmembers() ignoring depth when using consolidated metadata. (:issue:`3287`) - - -Improved Documentation -~~~~~~~~~~~~~~~~~~~~~~ - -- Expand the data type docs to include a demonstration of the ``parse_data_type`` function. - Expand the docstring for the ``parse_data_type`` function. (:issue:`3249`) -- Add a section on codecs to the migration guide. (:issue:`3273`) - - -Misc -~~~~ - -- :issue:`3268` - - -3.1.0 (2025-07-14) ------------------- - -Features -~~~~~~~~ -- Ensure that invocations of ``create_array`` use consistent keyword arguments, with consistent defaults. - - ``zarr.api.synchronous.create_array`` now takes a ``write_data`` keyword argument - The ``Group.create_array`` method takes ``data`` and ``write_data`` keyword arguments. - The functions ``api.asynchronous.create``, ``api.asynchronous.create_array`` - and the methods ``Group.create_array``, ``Group.array``, had the default - ``fill_value`` changed from ``0`` to the ``DEFAULT_FILL_VALUE`` value, which instructs Zarr to - use the default scalar value associated with the array's data type as the fill value. These are - all functions or methods for array creation that mirror, wrap or are wrapped by, another function - that already has a default ``fill_value`` set to ``DEFAULT_FILL_VALUE``. This change is necessary - to make these functions consistent across the entire codebase, but as this changes default values, - new data might have a different fill value than expected after this change. - - For data types where 0 is meaningful, like integers or floats, the default scalar is 0, so this - change should not be noticeable. For data types where 0 is ambiguous, like fixed-length unicode - strings, the default fill value might be different after this change. Users who were relying on how - Zarr interpreted ``0`` as a non-numeric scalar value should set their desired fill value explicitly - after this change. -- Added public API for Buffer ABCs and implementations. - - Use :mod:`zarr.buffer` to access buffer implementations, and - :mod:`zarr.abc.buffer` for the interface to implement new buffer types. - - Users previously importing buffer from ``zarr.core.buffer`` should update their - imports to use :mod:`zarr.buffer`. As a reminder, all of ``zarr.core`` is - considered a private API that's not covered by zarr-python's versioning policy. (:issue:`2871`) -- Adds zarr-specific data type classes. - - This change adds a ``ZDType`` base class for Zarr V2 and Zarr V3 data types. Child classes are - defined for each NumPy data type. Each child class defines routines for ``JSON`` serialization. - New data types can be created and registered dynamically. - - Prior to this change, Zarr Python had two streams for handling data types. For Zarr V2 arrays, - we used NumPy data type identifiers. For Zarr V3 arrays, we used a fixed set of string enums. Both - of these systems proved hard to extend. - - This change is largely internal, but it does change the type of the ``dtype`` and ``data_type`` - fields on the ``ArrayV2Metadata`` and ``ArrayV3Metadata`` classes. Previously, ``ArrayV2Metadata.dtype`` - was a NumPy ``dtype`` object, and ``ArrayV3Metadata.data_type`` was an internally-defined ``enum``. - After this change, both ``ArrayV2Metadata.dtype`` and ``ArrayV3Metadata.data_type`` are instances of - ``ZDType``. A NumPy data type can be generated from a ``ZDType`` via the ``ZDType.to_native_dtype()`` - method. The internally-defined Zarr V3 ``enum`` class is gone entirely, but the ``ZDType.to_json(zarr_format=3)`` - method can be used to generate either a string, or dictionary that has a string ``name`` field, that - represents the string value previously associated with that ``enum``. - - For more on this new feature, see the `documentation `_ (:issue:`2874`) -- Added `NDBuffer.empty` method for faster ndbuffer initialization. (:issue:`3191`) -- The minimum version of NumPy has increased to 1.26. (:issue:`3226`) -- Add an alternate `from_array_metadata_and_store` constructor to `CodecPipeline`. (:issue:`3233`) - - -Bugfixes -~~~~~~~~ - -- Fixes a variety of issues related to string data types. - - - Brings the ``VariableLengthUTF8`` data type Zarr V3 identifier in alignment with Zarr Python 3.0.8 - - Disallows creation of 0-length fixed-length data types - - Adds a regression test for the ``VariableLengthUTF8`` data type that checks against version 3.0.8 - - Allows users to request the ``VariableLengthUTF8`` data type with ``str``, ``"str"``, or ``"string"``. (:issue:`3170`) -- Add human readable size for No. bytes stored to `info_complete` (:issue:`3190`) -- Restores the ability to create a Zarr V2 array with a ``null`` fill value by introducing a new - class ``DefaultFillValue``, and setting the default value of the ``fill_value`` parameter in array - creation routines to an instance of ``DefaultFillValue``. For Zarr V3 arrays, ``None`` will act as an - alias for a ``DefaultFillValue`` instance, thus preserving compatibility with existing code. (:issue:`3198`) -- Fix the type of ``ArrayV2Metadata.codec`` to constrain it to ``numcodecs.abc.Codec | None``. - Previously the type was more permissive, allowing objects that can be parsed into Codecs (e.g., the codec name). - The constructor of ``ArrayV2Metadata`` still allows the permissive input when creating new objects. (:issue:`3232`) - - -Improved Documentation -~~~~~~~~~~~~~~~~~~~~~~ - -- Add a self-contained example of data type extension to the ``examples`` directory, and expanded - the documentation for data types. (:issue:`3157`) -- - Add a description on how to create a RemoteStore of a specific filesystem to the `Remote Store` section in `docs\user-guide\storage.rst`. - - State in the docstring of `FsspecStore.from_url` that the filesystem type is inferred from the URL scheme. - - It should help a user handling the case when the type of FsspecStore doesn't match the URL scheme. (:issue:`3212`) - - -Deprecations and Removals -~~~~~~~~~~~~~~~~~~~~~~~~~ - -- Removes default chunk encoding settings (filters, serializer, compressors) from the global - configuration object. - - This removal is justified on the basis that storing chunk encoding settings in the config required - a brittle, confusing, and inaccurate categorization of array data types, which was particularly - unsuitable after the recent addition of new data types that didn't fit naturally into the - pre-existing categories. - - The default chunk encoding is the same (Zstandard compression, and the required object codecs for - variable length data types), but the chunk encoding is now generated by functions that cannot be - reconfigured at runtime. Users who relied on setting the default chunk encoding via the global configuration object should - instead specify the desired chunk encoding explicitly when creating an array. - - This change also adds an extra validation step to the creation of Zarr V2 arrays, which ensures that - arrays with a ``VariableLengthUTF8`` or ``VariableLengthBytes`` data type cannot be created without the - correct "object codec". (:issue:`3228`) -- Removes support for passing keyword-only arguments positionally to the following functions and methods: - ``save_array``, ``open``, ``group``, ``open_group``, ``create``, ``get_basic_selection``, ``set_basic_selection``, - ``get_orthogonal_selection``, ``set_orthogonal_selection``, ``get_mask_selection``, ``set_mask_selection``, - ``get_coordinate_selection``, ``set_coordinate_selection``, ``get_block_selection``, ``set_block_selection``, - ``Group.create_array``, ``Group.empty``, ``Group.zeroes``, ``Group.ones``, ``Group.empty_like``, ``Group.full``, - ``Group.zeros_like``, ``Group.ones_like``, ``Group.full_like``, ``Group.array``. Prior to this change, - passing a keyword-only argument positionally to one of these functions or methods would raise a - deprecation warning. That warning is now gone. Passing keyword-only arguments to these functions - and methods positionally is now an error. - -3.0.10 (2025-07-03) -------------------- - -Bugfixes -~~~~~~~~ - -- Removed an unnecessary check from ``_fsspec._make_async`` that would raise an exception when - creating a read-only store backed by a local file system with ``auto_mkdir`` set to ``False``. (:issue:`3193`) -- Add missing import for AsyncFileSystemWrapper for _make_async in _fsspec.py (:issue:`3195`) - - -3.0.9 (2025-06-30) ------------------- - -Features -~~~~~~~~ - -- Add `zarr.storage.FsspecStore.from_mapper()` so that `zarr.open()` supports stores of type `fsspec.mapping.FSMap`. (:issue:`2774`) -- Implemented ``move`` for ``LocalStore`` and ``ZipStore``. This allows users to move the store to a different root path. (:issue:`3021`) -- Added `~zarr.errors.GroupNotFoundError`, which is raised when attempting to open a group that does not exist. (:issue:`3066`) -- Adds ``fill_value`` to the list of attributes displayed in the output of the ``AsyncArray.info()`` method. (:issue:`3081`) -- Use :py:func:`numpy.zeros` instead of :py:func:`np.full` for a performance speedup when creating a `zarr.core.buffer.NDBuffer` with `fill_value=0`. (:issue:`3082`) -- Port more stateful testing actions from `Icechunk `_. (:issue:`3130`) -- Adds a `with_read_only` convenience method to the `Store` abstract base class (raises `NotImplementedError`) and implementations to the `MemoryStore`, `ObjectStore`, `LocalStore`, and `FsspecStore` classes. (:issue:`3138`) - - -Bugfixes -~~~~~~~~ - -- Ignore stale child metadata when reconsolidating metadata. (:issue:`2921`) -- For Zarr format 2, allow fixed-length string arrays to be created without automatically inserting a - ``Vlen-UT8`` codec in the array of filters. Fixed-length string arrays do not need this codec. This - change fixes a regression where fixed-length string arrays created with Zarr Python 3 could not be read with Zarr Python 2.18. (:issue:`3100`) -- When creating arrays without explicitly specifying a chunk size using `zarr.create` and other - array creation routines, the chunk size will now set automatically instead of defaulting to the data shape. - For large arrays this will result in smaller default chunk sizes. - To retain previous behaviour, explicitly set the chunk shape to the data shape. - - This fix matches the existing chunking behaviour of - `zarr.save_array` and `zarr.api.asynchronous.AsyncArray.create`. (:issue:`3103`) -- When `zarr.save` has an argument `path=some/path/` and multiple arrays in `args`, the path resulted in `some/path/some/path` due to using the `path` - argument twice while building the array path. This is now fixed. (:issue:`3127`) -- Fix `zarr.open` default for argument `mode` when `store` is `read_only` (:issue:`3128`) -- Suppress `FileNotFoundError` when deleting non-existent keys in the `obstore` adapter. - - When writing empty chunks (i.e. chunks where all values are equal to the array's fill value) to a zarr array, zarr - will delete those chunks from the underlying store. For zarr arrays backed by the `obstore` adapter, this will potentially - raise a `FileNotFoundError` if the chunk doesn't already exist. - Since whether or not a delete of a non-existing object raises an error depends on the behavior of the underlying store, - suppressing the error in all cases results in consistent behavior across stores, and is also what `zarr` seems to expect - from the store. (:issue:`3140`) -- Trying to open a StorePath/Array with ``mode='r'`` when the store is not read-only creates a read-only copy of the store. (:issue:`3156`) - - -3.0.8 (2025-05-19) ------------------- - -.. warning:: - - In versions 3.0.0 to 3.0.7 opening arrays or groups with ``mode='a'`` (the default for many builtin functions) - would cause any existing paths in the store to be deleted. This is fixed in 3.0.8, and - we recommend all users upgrade to avoid this bug that could cause unintentional data loss. - -Features -~~~~~~~~ - -- Added a `print_debug_info` function for bug reports. (:issue:`2913`) - - -Bugfixes -~~~~~~~~ - -- Fix a bug that prevented the number of initialized chunks being counted properly. (:issue:`2862`) -- Fixed sharding with GPU buffers. (:issue:`2978`) -- Fix structured `dtype` fill value serialization for consolidated metadata (:issue:`2998`) -- It is now possible to specify no compressor when creating a zarr format 2 array. - This can be done by passing ``compressor=None`` to the various array creation routines. - - The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given. - To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead. (:issue:`3039`) -- Fixed the typing of ``dimension_names`` arguments throughout so that it now accepts iterables that contain `None` alongside `str`. (:issue:`3045`) -- Using various functions to open data with ``mode='a'`` no longer deletes existing data in the store. (:issue:`3062`) -- Internally use `typesize` constructor parameter for :class:`numcodecs.blosc.Blosc` to improve compression ratios back to the v2-package levels. (:issue:`2962`) -- Specifying the memory order of Zarr format 2 arrays using the ``order`` keyword argument has been fixed. (:issue:`2950`) - - -Misc -~~~~ - -- :issue:`2972`, :issue:`3027`, :issue:`3049` - - -3.0.7 (2025-04-22) ------------------- - -Features -~~~~~~~~ - -- Add experimental ObjectStore storage class based on obstore. (:issue:`1661`) -- Add ``zarr.from_array`` using concurrent streaming of source data (:issue:`2622`) - - -Bugfixes -~~~~~~~~ - -- 0-dimensional arrays are now returning a scalar. Therefore, the return type of ``__getitem__`` changed - to NDArrayLikeOrScalar. This change is to make the behavior of 0-dimensional arrays consistent with - ``numpy`` scalars. (:issue:`2718`) -- Fix `fill_value` serialization for `NaN` in `ArrayV2Metadata` and add property-based testing of round-trip serialization (:issue:`2802`) -- Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be - consistent with the behavior of `ArrayMetadata`. (:issue:`2996`) - - -Improved Documentation -~~~~~~~~~~~~~~~~~~~~~~ - -- Updated the 3.0 migration guide to include the removal of "." syntax for getting group members. (:issue:`2991`, :issue:`2997`) - - -Misc -~~~~ -- Define a new versioning policy based on Effective Effort Versioning. This replaces the old Semantic - Versioning-based policy. (:issue:`2924`, :issue:`2910`) -- Make warning filters in the tests more specific, so warnings emitted by tests added in the future - are more likely to be caught instead of ignored. (:issue:`2714`) -- Avoid an unnecessary memory copy when writing Zarr to a local file (:issue:`2944`) - - -3.0.6 (2025-03-20) ------------------- - -Bugfixes -~~~~~~~~ - -- Restore functionality of `del z.attrs['key']` to actually delete the key. (:issue:`2908`) - - -3.0.5 (2025-03-07) ------------------- - -Bugfixes -~~~~~~~~ - -- Fixed a bug where ``StorePath`` creation would not apply standard path normalization to the ``path`` parameter, - which led to the creation of arrays and groups with invalid keys. (:issue:`2850`) -- Prevent update_attributes calls from deleting old attributes (:issue:`2870`) - - -Misc -~~~~ - -- :issue:`2796` - -3.0.4 (2025-02-23) ------------------- - -Features -~~~~~~~~ - -- Adds functions for concurrently creating multiple arrays and groups. (:issue:`2665`) - -Bugfixes -~~~~~~~~ - -- Fixed a bug where ``ArrayV2Metadata`` could save ``filters`` as an empty array. (:issue:`2847`) -- Fix a bug when setting values of a smaller last chunk. (:issue:`2851`) - -Misc -~~~~ - -- :issue:`2828` - - -3.0.3 (2025-02-14) ------------------- - -Features -~~~~~~~~ - -- Improves performance of FsspecStore.delete_dir for remote filesystems supporting concurrent/batched deletes, e.g., s3fs. (:issue:`2661`) -- Added :meth:`zarr.config.enable_gpu` to update Zarr's configuration to use GPUs. (:issue:`2751`) -- Avoid reading chunks during writes where possible. :issue:`757` (:issue:`2784`) -- :py:class:`LocalStore` learned to ``delete_dir``. This makes array and group deletes more efficient. (:issue:`2804`) -- Add `zarr.testing.strategies.array_metadata` to generate ArrayV2Metadata and ArrayV3Metadata instances. (:issue:`2813`) -- Add arbitrary `shards` to Hypothesis strategy for generating arrays. (:issue:`2822`) - - -Bugfixes -~~~~~~~~ - -- Fixed bug with Zarr using device memory, instead of host memory, for storing metadata when using GPUs. (:issue:`2751`) -- The array returned by ``zarr.empty`` and an empty ``zarr.core.buffer.cpu.NDBuffer`` will now be filled with the - specified fill value, or with zeros if no fill value is provided. - This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes. (:issue:`2755`) -- Fix zip-store path checking for stores with directories listed as files. (:issue:`2758`) -- Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list` (:issue:`2778`) -- Enable automatic removal of `needs release notes` with labeler action (:issue:`2781`) -- Use the proper label config (:issue:`2785`) -- Alters the behavior of ``create_array`` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object. (:issue:`2795`) -- Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types (:issue:`2799`) -- Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests (:issue:`2801`) -- Fix pickling for ZipStore (:issue:`2807`) -- Update numcodecs to not overwrite codec configuration ever. Closes :issue:`2800`. (:issue:`2811`) -- Fix fancy indexing (e.g. arr[5, [0, 1]]) with the sharding codec (:issue:`2817`) - - -Improved Documentation -~~~~~~~~~~~~~~~~~~~~~~ - -- Added new user guide on :ref:`user-guide-gpu`. (:issue:`2751`) - - -3.0.2 (2025-01-31) ------------------- - -Features -~~~~~~~~ - -- Test ``getsize()`` and ``getsize_prefix()`` in ``StoreTests``. (:issue:`2693`) -- Test that a ``ValueError`` is raised for invalid byte range syntax in ``StoreTests``. (:issue:`2693`) -- Separate instantiating and opening a store in ``StoreTests``. (:issue:`2693`) -- Add a test for using Stores as a context managers in ``StoreTests``. (:issue:`2693`) -- Implemented ``LogingStore.open()``. (:issue:`2693`) -- ``LoggingStore`` is now a generic class. (:issue:`2693`) -- Change StoreTest's ``test_store_repr``, ``test_store_supports_writes``, - ``test_store_supports_partial_writes``, and ``test_store_supports_listing`` - to to be implemented using ``@abstractmethod``, rather raising ``NotImplementedError``. (:issue:`2693`) -- Test the error raised for invalid buffer arguments in ``StoreTests``. (:issue:`2693`) -- Test that data can be written to a store that's not yet open using the store.set method in ``StoreTests``. (:issue:`2693`) -- Adds a new function ``init_array`` for initializing an array in storage, and refactors ``create_array`` - to use ``init_array``. ``create_array`` takes two new parameters: ``data``, an optional array-like object, and ``write_data``, a bool which defaults to ``True``. - If ``data`` is given to ``create_array``, then the ``dtype`` and ``shape`` attributes of ``data`` are used to define the - corresponding attributes of the resulting Zarr array. Additionally, if ``data`` given and ``write_data`` is ``True``, - then the values in ``data`` will be written to the newly created array. (:issue:`2761`) - - -Bugfixes -~~~~~~~~ - -- Wrap sync fsspec filesystems with ``AsyncFileSystemWrapper``. (:issue:`2533`) -- Added backwards compatibility for Zarr format 2 structured arrays. (:issue:`2681`) -- Update equality for ``LoggingStore`` and ``WrapperStore`` such that 'other' must also be a ``LoggingStore`` or ``WrapperStore`` respectively, rather than only checking the types of the stores they wrap. (:issue:`2693`) -- Ensure that ``ZipStore`` is open before getting or setting any values. (:issue:`2693`) -- Use stdout rather than stderr as the default stream for ``LoggingStore``. (:issue:`2693`) -- Match the errors raised by read only stores in ``StoreTests``. (:issue:`2693`) -- Fixed ``ZipStore`` to make sure the correct attributes are saved when instances are pickled. - This fixes a previous bug that prevent using ``ZipStore`` with a ``ProcessPoolExecutor``. (:issue:`2762`) -- Updated the optional test dependencies to include ``botocore`` and ``fsspec``. (:issue:`2768`) -- Fixed the fsspec tests to skip if ``botocore`` is not installed. - Previously they would have failed with an import error. (:issue:`2768`) -- Optimize full chunk writes. (:issue:`2782`) - - -Improved Documentation -~~~~~~~~~~~~~~~~~~~~~~ - -- Changed the machinery for creating changelog entries. - Now individual entries should be added as files to the `changes` directory in the `zarr-python` repository, instead of directly to the changelog file. (:issue:`2736`) - -Other -~~~~~ - -- Created a type alias ``ChunkKeyEncodingLike`` to model the union of ``ChunkKeyEncoding`` instances and the dict form of the - parameters of those instances. ``ChunkKeyEncodingLike`` should be used by high-level functions to provide a convenient - way for creating ``ChunkKeyEncoding`` objects. (:issue:`2763`) - - -3.0.1 (Jan. 17, 2025) ---------------------- - -* Implement ``zarr.from_array`` using concurrent streaming (:issue:`2622`). - -Bug fixes -~~~~~~~~~ -* Fixes ``order`` argument for Zarr format 2 arrays (:issue:`2679`). - -* Fixes a bug that prevented reading Zarr format 2 data with consolidated - metadata written using ``zarr-python`` version 2 (:issue:`2694`). - -* Ensure that compressor=None results in no compression when writing Zarr - format 2 data (:issue:`2708`). - -* Fix for empty consolidated metadata dataset: backwards compatibility with - Zarr-Python 2 (:issue:`2695`). - -Documentation -~~~~~~~~~~~~~ -* Add v3.0.0 release announcement banner (:issue:`2677`). - -* Quickstart guide alignment with V3 API (:issue:`2697`). - -* Fix doctest failures related to numcodecs 0.15 (:issue:`2727`). - -Other -~~~~~ -* Removed some unnecessary files from the source distribution - to reduce its size. (:issue:`2686`). - -* Enable codecov in GitHub actions (:issue:`2682`). - -* Speed up hypothesis tests (:issue:`2650`). - -* Remove multiple imports for an import name (:issue:`2723`). - - -.. _release_3.0.0: - -3.0.0 (Jan. 9, 2025) --------------------- - -3.0.0 is a new major release of Zarr-Python, with many breaking changes. -See the :ref:`v3 migration guide` for a listing of what's changed. - -Normal release note service will resume with further releases in the 3.0.0 -series. - -Release notes for the zarr-python 2.x and 1.x releases can be found here: -https://zarr.readthedocs.io/en/support-v2/release.html diff --git a/docs/talks/scipy2019/submission.rst b/docs/talks/scipy2019/submission.rst deleted file mode 100644 index 57fd925b1f..0000000000 --- a/docs/talks/scipy2019/submission.rst +++ /dev/null @@ -1,144 +0,0 @@ -Zarr - scalable storage of tensor data for use in parallel and distributed computing -==================================================================================== - -SciPy 2019 submission. - - -Short summary -------------- - -Many scientific problems involve computing over large N-dimensional -typed arrays of data, and reading or writing data is often the major -bottleneck limiting speed or scalability. The Zarr project is -developing a simple, scalable approach to storage of such data in a -way that is compatible with a range of approaches to distributed and -parallel computing. We describe the Zarr protocol and data storage -format, and the current state of implementations for various -programming languages including Python. We also describe current uses -of Zarr in malaria genomics, the Human Cell Atlas, and the Pangeo -project. - - -Abstract --------- - -Background -~~~~~~~~~~ - -Across a broad range of scientific disciplines, data are naturally -represented and stored as N-dimensional typed arrays, also known as -tensors. The volume of data being generated is outstripping our -ability to analyse it, and scientific communities are looking for ways -to leverage modern multi-core CPUs and distributed computing -platforms, including cloud computing. Retrieval and storage of data is -often the major bottleneck, and new approaches to data storage are -needed to accelerate distributed computations and enable them to scale -on a variety of platforms. - -Methods -~~~~~~~ - -We have designed a new storage format and protocol for tensor data -[1_], and have released an open source Python implementation [2_, -3_]. Our approach builds on data storage concepts from HDF5 [4_], -particularly chunking and compression, and hierarchical organisation -of datasets. Key design goals include: a simple protocol and format -that can be implemented in other programming languages; support for -multiple concurrent readers or writers; support for a variety of -parallel computing environments, from multi-threaded execution on a -single CPU to multi-process execution across a multi-node cluster; -pluggable storage subsystem with support for file systems, key-value -databases and cloud object stores; pluggable encoding subsystem with -support for a variety of modern compressors. - -Results -~~~~~~~ - -We illustrate the use of Zarr with examples from several scientific -domains. Zarr is being used within the Pangeo project [5_], which is -building a community platform for big data geoscience. The Pangeo -community have converted a number of existing climate modelling and -satellite observation datasets to Zarr [6_], and have demonstrated -their use in computations using HPC and cloud computing -environments. Within the MalariaGEN project [7_], Zarr is used to -store genome variation data from next-generation sequencing of natural -populations of malaria parasites and mosquitoes [8_] and these data -are used as input to analyses of the evolution of these organisms in -response to selective pressure from anti-malarial drugs and -insecticides. Zarr is being used within the Human Cell Atlas (HCA) -project [9_], which is building a reference atlas of healthy human -cell types. This project hopes to leverage this information to better -understand the dysregulation of cellular states that underly human -disease. The Human Cell Atlas uses Zarr as the output data format -because it enables the project to easily generate matrices containing -user-selected subsets of cells. - -Conclusions -~~~~~~~~~~~ - -Zarr is generating interest across a range of scientific domains, and -work is ongoing to establish a community process to support further -development of the specifications and implementations in other -programming languages [10_, 11_, 12_] and building interoperability -with a similar project called N5 [13_]. Other packages within the -PyData ecosystem, notably Dask [14_], Xarray [15_] and Intake [16_], -have added capability to read and write Zarr, and together these -packages provide a compelling solution for large scale data science -using Python [17_]. Zarr has recently been presented in several -venues, including a webinar for the ESIP Federation tech dive series -[18_], and a talk at the AGU Fall Meeting 2018 [19_]. - - -References -~~~~~~~~~~ - -.. _1: https://zarr.readthedocs.io/en/stable/spec/v2.html -.. _2: https://github.com/zarr-developers/zarr-python -.. _3: https://github.com/zarr-developers/numcodecs -.. _4: https://www.hdfgroup.org/solutions/hdf5/ -.. _5: https://pangeo.io/ -.. _6: https://pangeo.io/catalog.html -.. _7: https://www.malariagen.net/ -.. _8: http://alimanfoo.github.io/2016/09/21/genotype-compression-benchmark.html -.. _9: https://www.humancellatlas.org/ -.. _10: https://github.com/constantinpape/z5 -.. _11: https://github.com/lasersonlab/ndarray.scala -.. _12: https://github.com/meggart/ZarrNative.jl -.. _13: https://github.com/saalfeldlab/n5 -.. _14: http://docs.dask.org/en/latest/array-creation.html -.. _15: http://xarray.pydata.org/en/stable/io.html -.. _16: https://github.com/ContinuumIO/intake-xarray -.. _17: http://matthewrocklin.com/blog/work/2018/01/22/pangeo-2 -.. _18: http://wiki.esipfed.org/index.php/Interoperability_and_Technology/Tech_Dive_Webinar_Series#8_March.2C_2018:_.22Zarr:_A_simple.2C_open.2C_scalable_solution_for_big_NetCDF.2FHDF_data_on_the_Cloud.22:_Alistair_Miles.2C_University_of_Oxford. -.. _19: https://agu.confex.com/agu/fm18/meetingapp.cgi/Paper/390015 - - -Authors -------- - -Project contributors are listed in alphabetical order by surname. - -* `Ryan Abernathey `_, Columbia University -* `Stephan Balmer `_, Meteotest -* `Ambrose Carr `_, Chan Zuckerberg Initiative -* `Tim Crone `_, Columbia University -* `Martin Durant `_, Anaconda, inc. -* `Jan Funke `_, HHMI Janelia -* `Darren Gallagher `_, Satavia -* `Fabian Gans `_, Max Planck Institute for Biogeochemistry -* `Shikhar Goenka `_, Satavia -* `Joe Hamman `_, NCAR -* `Stephan Hoyer `_, Google -* `Jerome Kelleher `_, University of Oxford -* `John Kirkham `_, HHMI Janelia -* `Alistair Miles `_, University of Oxford -* `Josh Moore `_, University of Dundee -* `Charles Noyes `_, University of Southern California -* `Tarik Onalan `_ -* `Constantin Pape `_, University of Heidelberg -* `Zain Patel `_, University of Cambridge -* `Matthew Rocklin `_, NVIDIA -* `Stephan Saafeld `_, HHMI Janelia -* `Vincent Schut `_, Satelligence -* `Justin Swaney `_, MIT -* `Ryan Williams `_, Chan Zuckerberg Initiative diff --git a/docs/user-guide/arrays.md b/docs/user-guide/arrays.md new file mode 100644 index 0000000000..25a1347fe3 --- /dev/null +++ b/docs/user-guide/arrays.md @@ -0,0 +1,575 @@ +# Working with arrays + +## Creating an array + +Zarr has several functions for creating arrays. For example: + +```python exec="true" session="arrays" +import shutil +shutil.rmtree('data', ignore_errors=True) +import numpy as np + +np.random.seed(0) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +import zarr +store = zarr.storage.MemoryStore() +z = zarr.create_array(store=store, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +print(z) +``` + +The code above creates a 2-dimensional array of 32-bit integers with 10000 rows +and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000 +columns (and so there will be 100 chunks in total). The data is written to a +[`zarr.storage.MemoryStore`][] (e.g. an in-memory dict). See +[Persistent arrays](#persistent-arrays) for details on storing arrays in other stores, +and see [Data types](data_types.md) for an in-depth look at the data types supported +by Zarr. + +See the [creation API documentation](../api/create.md) for more detailed information about +creating arrays. + +## Reading and writing data + +Zarr arrays support a similar interface to [NumPy](https://numpy.org/doc/stable/) +arrays for reading and writing data. For example, the entire array can be filled +with a scalar value: + +```python exec="true" session="arrays" source="above" +z[:] = 42 +``` + +Regions of the array can also be written to, e.g.: + +```python exec="true" session="arrays" source="above" +import numpy as np + +z[0, :] = np.arange(10000) +z[:, 0] = np.arange(10000) +``` + +The contents of the array can be retrieved by slicing, which will load the +requested region into memory as a NumPy array, e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[0, 0]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[-1, -1]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[0, :]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[:, 0]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[:]) +``` + +Read more about NumPy-style indexing can be found in the +[NumPy documentation](https://numpy.org/doc/stable/user/basics.indexing.html). + +## Persistent arrays + +In the examples above, compressed data for each chunk of the array was stored in +main memory. Zarr arrays can also be stored on a file system, enabling +persistence of data between sessions. To do this, we can change the store +argument to point to a filesystem path: + +```python exec="true" session="arrays" source="above" +z1 = zarr.create_array(store='data/example-1.zarr', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +``` + +The array above will store its configuration metadata and all compressed chunk +data in a directory called `'data/example-1.zarr'` relative to the current working +directory. The [`zarr.create_array`][] function provides a convenient way +to create a new persistent array or continue working with an existing +array. Note, there is no need to close an array: data are automatically +flushed to disk, and files are automatically closed whenever an array is modified. + +Persistent arrays support the same interface for reading and writing data, +e.g.: + +```python exec="true" session="arrays" source="above" +z1[:] = 42 +z1[0, :] = np.arange(10000) +z1[:, 0] = np.arange(10000) +``` + +Check that the data have been written and can be read again: + +```python exec="true" session="arrays" source="above" result="ansi" +z2 = zarr.open_array('data/example-1.zarr', mode='r') +print(np.all(z1[:] == z2[:])) +``` + +If you are just looking for a fast and convenient way to save NumPy arrays to +disk then load back into memory later, the functions +[`zarr.save`][] and [`zarr.load`][] may be +useful. E.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +a = np.arange(10) +zarr.save('data/example-2.zarr', a) +print(zarr.load('data/example-2.zarr')) +``` + +Please note that there are a number of other options for persistent array +storage, see the [Storage Guide](storage.md) for more details. + +## Resizing and appending + +A Zarr array can be resized, which means that any of its dimensions can be +increased or decreased in length. For example: + +```python exec="true" session="arrays" source="above" result="ansi" +z = zarr.create_array(store='data/example-3.zarr', shape=(10000, 10000), dtype='int32',chunks=(1000, 1000)) +z[:] = 42 +print(f"Original shape: {z.shape}") +z.resize((20000, 10000)) +print(f"New shape: {z.shape}") +``` + +Note that when an array is resized, the underlying data are not rearranged in +any way. If one or more dimensions are shrunk, any chunks falling outside the +new array shape will be deleted from the underlying store. + +[`zarr.Array.append`][] is provided as a convenience function, which can be +used to append data to any axis. E.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +a = np.arange(10000000, dtype='int32').reshape(10000, 1000) +z = zarr.create_array(store='data/example-4.zarr', shape=a.shape, dtype=a.dtype, chunks=(1000, 100)) +z[:] = a +print(f"Original shape: {z.shape}") +z.append(a) +print(f"Shape after first append: {z.shape}") +z.append(np.vstack([a, a]), axis=1) +print(f"Shape after second append: {z.shape}") +``` + +## Compressors + +A number of different compressors can be used with Zarr. Zarr includes Blosc, +Zstandard and Gzip compressors. Additional compressors are available through +a separate package called [NumCodecs](https://numcodecs.readthedocs.io/) which provides various +compressor libraries including LZ4, Zlib, BZ2 and LZMA. +Different compressors can be provided via the `compressors` keyword +argument accepted by all array creation functions. For example: + +```python exec="true" session="arrays" source="above" result="ansi" +compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=3, shuffle=zarr.codecs.BloscShuffle.bitshuffle) +data = np.arange(100000000, dtype='int32').reshape(10000, 10000) +z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) +z[:] = data +print(z.compressors) +``` + +This array above will use Blosc as the primary compressor, using the Zstandard +algorithm (compression level 3) internally within Blosc, and with the +bit-shuffle filter applied. + +When using a compressor, it can be useful to get some diagnostics on the +compression ratio. Zarr arrays provide the [`zarr.Array.info`][] property +which can be used to print useful diagnostics, e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.info) +``` + +The [`zarr.Array.info_complete`][] method inspects the underlying store and +prints additional diagnostics, e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.info_complete()) +``` + +!!! note + [`zarr.Array.info_complete`][] will inspect the underlying store and may + be slow for large arrays. Use [`zarr.Array.info`][] if detailed storage + statistics are not needed. + +If you don't specify a compressor, by default Zarr uses the Zstandard +compressor. + +In addition to Blosc and Zstandard, other compression libraries can also be used. For example, +here is an array using Gzip compression, level 1: + +```python exec="true" session="arrays" source="above" result="ansi" +data = np.arange(100000000, dtype='int32').reshape(10000, 10000) +z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1)) +z[:] = data +print(f"Compressors: {z.compressors}") +``` + +Here is an example using LZMA from [NumCodecs](https://numcodecs.readthedocs.io/) with a custom filter pipeline including LZMA's +built-in delta filter: + +```python exec="true" session="arrays" source="above" result="ansi" +import lzma +from numcodecs.zarr3 import LZMA + +lzma_filters = [dict(id=lzma.FILTER_DELTA, dist=4), dict(id=lzma.FILTER_LZMA2, preset=1)] +compressors = LZMA(filters=lzma_filters) +data = np.arange(100000000, dtype='int32').reshape(10000, 10000) +z = zarr.create_array(store='data/example-7.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) +print(f"Compressors: {z.compressors}") +``` + +To disable compression, set `compressors=None` when creating an array, e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +z = zarr.create_array( + store='data/example-8.zarr', + shape=(100000000,), + chunks=(1000000,), + dtype='int32', + compressors=None +) +print(f"Compressors: {z.compressors}") +``` + +## Filters + +In some cases, compression can be improved by transforming the data in some +way. For example, if nearby values tend to be correlated, then shuffling the +bytes within each numerical value or storing the difference between adjacent +values may increase compression ratio. Some compressors provide built-in filters +that apply transformations to the data prior to compression. For example, the +Blosc compressor has built-in implementations of byte- and bit-shuffle filters, +and the LZMA compressor has a built-in implementation of a delta +filter. However, to provide additional flexibility for implementing and using +filters in combination with different compressors, Zarr also provides a +mechanism for configuring filters outside of the primary compressor. + +Here is an example using a delta filter with the Blosc compressor: + +```python exec="true" session="arrays" source="above" result="ansi" +from numcodecs.zarr3 import Delta + +filters = [Delta(dtype='int32')] +compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=1, shuffle=zarr.codecs.BloscShuffle.shuffle) +data = np.arange(100000000, dtype='int32').reshape(10000, 10000) +z = zarr.create_array(store='data/example-9.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), filters=filters, compressors=compressors) +print(z.info_complete()) +``` + +For more information about available filter codecs, see the [Numcodecs](https://numcodecs.readthedocs.io/) documentation. + +## Advanced indexing + +Zarr arrays support several methods for advanced or "fancy" +indexing, which enable a subset of data items to be extracted or updated in an +array without loading the entire array into memory. + +Note that although this functionality is similar to some of the advanced +indexing capabilities available on NumPy arrays and on h5py datasets, **the Zarr +API for advanced indexing is different from both NumPy and h5py**, so please +read this section carefully. For a complete description of the indexing API, +see the documentation for the [`zarr.Array`][] class. + +### Indexing with coordinate arrays + +Items from a Zarr array can be extracted by providing an integer array of +coordinates. E.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +data = np.arange(10) ** 2 +z = zarr.create_array(store='data/example-10.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +print(z[:]) +print(z.get_coordinate_selection([2, 5])) +``` + +Coordinate arrays can also be used to update data, e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +z.set_coordinate_selection([2, 5], [-1, -2]) +print(z[:]) +``` + +For multidimensional arrays, coordinates must be provided for each dimension, +e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-11.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +print(z[:]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.get_coordinate_selection(([0, 2], [1, 3]))) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +z.set_coordinate_selection(([0, 2], [1, 3]), [-1, -2]) +print(z[:]) +``` + +For convenience, coordinate indexing is also available via the `vindex` +property, as well as the square bracket operator, e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.vindex[[0, 2], [1, 3]]) +z.vindex[[0, 2], [1, 3]] = [-3, -4] +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[:]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[[0, 2], [1, 3]]) +``` + +When the indexing arrays have different shapes, they are broadcast together. +That is, the following two calls are equivalent: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[1, [1, 3]]) +print(z[[1, 1], [1, 3]]) +``` + +### Indexing with a mask array + +Items can also be extracted by providing a Boolean mask. E.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +data = np.arange(10) ** 2 +z = zarr.create_array(store='data/example-12.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +print(z[:]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +sel = np.zeros_like(z, dtype=bool) +sel[2] = True +sel[5] = True +print(z.get_mask_selection(sel)) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +z.set_mask_selection(sel, [-1, -2]) +print(z[:]) +``` + +Here's a multidimensional example: + +```python exec="true" session="arrays" source="above" result="ansi" +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-13.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +print(z[:]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +sel = np.zeros_like(z, dtype=bool) +sel[0, 1] = True +sel[2, 3] = True +print(z.get_mask_selection(sel)) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +z.set_mask_selection(sel, [-1, -2]) +print(z[:]) +``` + +For convenience, mask indexing is also available via the `vindex` property, +e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.vindex[sel]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" + +z.vindex[sel] = [-3, -4] +print(z[:]) +``` + +Mask indexing is conceptually the same as coordinate indexing, and is +implemented internally via the same machinery. Both styles of indexing allow +selecting arbitrary items from an array, also known as point selection. + +### Orthogonal indexing + +Zarr arrays also support methods for orthogonal indexing, which allows +selections to be made along each dimension of an array independently. For +example, this allows selecting a subset of rows and/or columns from a +2-dimensional array. E.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-14.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +print(z[:]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.get_orthogonal_selection(([0, 2], slice(None)))) # select first and third rows +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.get_orthogonal_selection((slice(None), [1, 3]))) # select second and fourth columns) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.get_orthogonal_selection(([0, 2], [1, 3]))) # select rows [0, 2] and columns [1, 4] +``` + +Data can also be modified, e.g.: + +```python exec="true" session="arrays" source="above" +z.set_orthogonal_selection(([0, 2], [1, 3]), [[-1, -2], [-3, -4]]) +``` + +For convenience, the orthogonal indexing functionality is also available via the +`oindex` property, e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-15.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +print(z.oindex[[0, 2], :]) # select first and third rows +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.oindex[:, [1, 3]]) # select second and fourth columns +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.oindex[[0, 2], [1, 3]]) # select rows [0, 2] and columns [1, 4] +``` + +```python exec="true" session="arrays" source="above" result="ansi" +z.oindex[[0, 2], [1, 3]] = [[-1, -2], [-3, -4]] +print(z[:]) +``` + +Any combination of integer, slice, 1D integer array and/or 1D Boolean array can +be used for orthogonal indexing. + +If the index contains at most one iterable, and otherwise contains only slices and integers, +orthogonal indexing is also available directly on the array: + +```python exec="true" session="arrays" source="above" result="ansi" +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-16.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +print(np.all(z.oindex[[0, 2], :] == z[[0, 2], :])) +``` + +### Block Indexing + +Zarr also support block indexing, which allows selections of whole chunks based on their +logical indices along each dimension of an array. For example, this allows selecting +a subset of chunk aligned rows and/or columns from a 2-dimensional array. E.g.: + +```python exec="true" session="arrays" source="above" +data = np.arange(100).reshape(10, 10) +z = zarr.create_array(store='data/example-17.zarr', shape=data.shape, dtype=data.dtype, chunks=(3, 3)) +z[:] = data +``` + +Retrieve items by specifying their block coordinates: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.get_block_selection(1)) +``` + +Equivalent slicing: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z[3:6]) +``` + +For convenience, the block selection functionality is also available via the +`blocks` property, e.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.blocks[1]) +``` + +Block index arrays may be multidimensional to index multidimensional arrays. +For example: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.blocks[0, 1:3]) +``` + +Data can also be modified. Let's start by a simple 2D array: + +```python exec="true" session="arrays" source="above" +z = zarr.create_array(store='data/example-18.zarr', shape=(6, 6), dtype=int, chunks=(2, 2)) +``` + +Set data for a selection of items: + +```python exec="true" session="arrays" source="above" result="ansi" +z.set_block_selection((1, 0), 1) +print(z[...]) +``` + +For convenience, this functionality is also available via the `blocks` property. +E.g.: + +```python exec="true" session="arrays" source="above" result="ansi" +z.blocks[:, 2] = 7 +print(z[...]) +``` + +Any combination of integer and slice can be used for block indexing: + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.blocks[2, 1:3]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +root = zarr.create_group('data/example-19.zarr') +foo = root.create_array(name='foo', shape=(1000, 100), chunks=(10, 10), dtype='float32') +bar = root.create_array(name='bar', shape=(100,), dtype='int32') +foo[:, :] = np.random.random((1000, 100)) +bar[:] = np.arange(100) +print(root.tree()) +``` + +## Sharding + +Using small chunk shapes in very large arrays can lead to a very large number of chunks. +This can become a performance issue for file systems and object storage. +With Zarr format 3, a new sharding feature has been added to address this issue. + +With sharding, multiple chunks can be stored in a single storage object (e.g. a file). +Within a shard, chunks are compressed and serialized separately. +This allows individual chunks to be read independently. +However, when writing data, a full shard must be written in one go for optimal +performance and to avoid concurrency issues. +That means that shards are the units of writing and chunks are the units of reading. +Users need to configure the chunk and shard shapes accordingly. + +Sharded arrays can be created by providing the `shards` parameter to [`zarr.create_array`][]. + +```python exec="true" session="arrays" source="above" result="ansi" +a = zarr.create_array('data/example-20.zarr', shape=(10000, 10000), shards=(1000, 1000), chunks=(100, 100), dtype='uint8') +a[:] = (np.arange(10000 * 10000) % 256).astype('uint8').reshape(10000, 10000) +print(a.info_complete()) +``` + +In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used. +This means that `10*10` chunks are stored in each shard, and there are `10*10` shards in total. +Without the `shards` argument, there would be 10,000 chunks stored as individual files. + +## Missing features in 3.0 + +The following features have not been ported to 3.0 yet. + +### Copying and migrating data + +See the Zarr-Python 2 documentation on [Copying and migrating data](https://zarr.readthedocs.io/en/support-v2/tutorial.html#copying-migrating-data) for more details. diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst deleted file mode 100644 index a498cb44a3..0000000000 --- a/docs/user-guide/arrays.rst +++ /dev/null @@ -1,632 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - -.. _user-guide-arrays: - -Working with arrays -=================== - -Creating an array ------------------ - -Zarr has several functions for creating arrays. For example:: - - >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> z = zarr.create_array(store=store, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - >>> z - - -The code above creates a 2-dimensional array of 32-bit integers with 10000 rows -and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000 -columns (and so there will be 100 chunks in total). The data is written to a -:class:`zarr.storage.MemoryStore` (e.g. an in-memory dict). See -:ref:`user-guide-persist` for details on storing arrays in other stores, and see -:ref:`user-guide-data-types` for an in-depth look at the data types supported by Zarr. - -For a complete list of array creation routines see the :mod:`zarr` -module documentation. - -.. _user-guide-array: - -Reading and writing data ------------------------- - -Zarr arrays support a similar interface to `NumPy `_ -arrays for reading and writing data. For example, the entire array can be filled -with a scalar value:: - - >>> z[:] = 42 - -Regions of the array can also be written to, e.g.:: - - >>> import numpy as np - >>> - >>> z[0, :] = np.arange(10000) - >>> z[:, 0] = np.arange(10000) - -The contents of the array can be retrieved by slicing, which will load the -requested region into memory as a NumPy array, e.g.:: - - >>> z[0, 0] - array(0, dtype=int32) - >>> z[-1, -1] - array(42, dtype=int32) - >>> z[0, :] - array([ 0, 1, 2, ..., 9997, 9998, 9999], - shape=(10000,), dtype=int32) - >>> z[:, 0] - array([ 0, 1, 2, ..., 9997, 9998, 9999], - shape=(10000,), dtype=int32) - >>> z[:] - array([[ 0, 1, 2, ..., 9997, 9998, 9999], - [ 1, 42, 42, ..., 42, 42, 42], - [ 2, 42, 42, ..., 42, 42, 42], - ..., - [9997, 42, 42, ..., 42, 42, 42], - [9998, 42, 42, ..., 42, 42, 42], - [9999, 42, 42, ..., 42, 42, 42]], - shape=(10000, 10000), dtype=int32) - -Read more about NumPy-style indexing can be found in the -`NumPy documentation `_. - -.. _user-guide-persist: - -Persistent arrays ------------------ - -In the examples above, compressed data for each chunk of the array was stored in -main memory. Zarr arrays can also be stored on a file system, enabling -persistence of data between sessions. To do this, we can change the store -argument to point to a filesystem path:: - - >>> z1 = zarr.create_array(store='data/example-1.zarr', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - -The array above will store its configuration metadata and all compressed chunk -data in a directory called ``'data/example-1.zarr'`` relative to the current working -directory. The :func:`zarr.create_array` function provides a convenient way -to create a new persistent array or continue working with an existing -array. Note, there is no need to close an array: data are automatically -flushed to disk, and files are automatically closed whenever an array is modified. - -Persistent arrays support the same interface for reading and writing data, -e.g.:: - - >>> z1[:] = 42 - >>> z1[0, :] = np.arange(10000) - >>> z1[:, 0] = np.arange(10000) - -Check that the data have been written and can be read again:: - - >>> z2 = zarr.open_array('data/example-1.zarr', mode='r') - >>> np.all(z1[:] == z2[:]) - np.True_ - -If you are just looking for a fast and convenient way to save NumPy arrays to -disk then load back into memory later, the functions -:func:`zarr.save` and :func:`zarr.load` may be -useful. E.g.:: - - >>> a = np.arange(10) - >>> zarr.save('data/example-2.zarr', a) - >>> zarr.load('data/example-2.zarr') - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - -Please note that there are a number of other options for persistent array -storage, see the :ref:`Storage Guide ` guide for more details. - -.. _user-guide-resize: - -Resizing and appending ----------------------- - -A Zarr array can be resized, which means that any of its dimensions can be -increased or decreased in length. For example:: - - >>> z = zarr.create_array(store='data/example-3.zarr', shape=(10000, 10000), dtype='int32',chunks=(1000, 1000)) - >>> z[:] = 42 - >>> z.shape - (10000, 10000) - >>> z.resize((20000, 10000)) - >>> z.shape - (20000, 10000) - -Note that when an array is resized, the underlying data are not rearranged in -any way. If one or more dimensions are shrunk, any chunks falling outside the -new array shape will be deleted from the underlying store. - -:func:`zarr.Array.append` is provided as a convenience function, which can be -used to append data to any axis. E.g.:: - - >>> a = np.arange(10000000, dtype='int32').reshape(10000, 1000) - >>> z = zarr.create_array(store='data/example-4.zarr', shape=a.shape, dtype=a.dtype, chunks=(1000, 100)) - >>> z[:] = a - >>> z.shape - (10000, 1000) - >>> z.append(a) - (20000, 1000) - >>> z.append(np.vstack([a, a]), axis=1) - (20000, 2000) - >>> z.shape - (20000, 2000) - -.. _user-guide-compress: - -Compressors ------------ - -A number of different compressors can be used with Zarr. Zarr includes Blosc, -Zstandard and Gzip compressors. Additional compressors are available through -a separate package called NumCodecs_ which provides various -compressor libraries including LZ4, Zlib, BZ2 and LZMA. -Different compressors can be provided via the ``compressors`` keyword -argument accepted by all array creation functions. For example:: - - >>> compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=3, shuffle=zarr.codecs.BloscShuffle.bitshuffle) - >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000) - >>> z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) - >>> z[:] = data - >>> z.compressors - (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) - -This array above will use Blosc as the primary compressor, using the Zstandard -algorithm (compression level 3) internally within Blosc, and with the -bit-shuffle filter applied. - -When using a compressor, it can be useful to get some diagnostics on the -compression ratio. Zarr arrays provide the :attr:`zarr.Array.info` property -which can be used to print useful diagnostics, e.g.:: - - >>> z.info - Type : Array - Zarr format : 3 - Data type : Int32(endianness='little') - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : C - Read-only : False - Store type : LocalStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) - No. bytes : 400000000 (381.5M) - -The :func:`zarr.Array.info_complete` method inspects the underlying store and -prints additional diagnostics, e.g.:: - - >>> z.info_complete() - Type : Array - Zarr format : 3 - Data type : Int32(endianness='little') - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : C - Read-only : False - Store type : LocalStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) - No. bytes : 400000000 (381.5M) - No. bytes stored : 3558573 (3.4M) - Storage ratio : 112.4 - Chunks Initialized : 100 - -.. note:: - :func:`zarr.Array.info_complete` will inspect the underlying store and may - be slow for large arrays. Use :attr:`zarr.Array.info` if detailed storage - statistics are not needed. - -If you don't specify a compressor, by default Zarr uses the Zstandard -compressor. - -In addition to Blosc and Zstandard, other compression libraries can also be used. For example, -here is an array using Gzip compression, level 1:: - - >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000) - >>> z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1)) - >>> z[:] = data - >>> z.compressors - (GzipCodec(level=1),) - -Here is an example using LZMA from NumCodecs_ with a custom filter pipeline including LZMA's -built-in delta filter:: - - >>> import lzma - >>> from numcodecs.zarr3 import LZMA - >>> import warnings - >>> warnings.filterwarnings("ignore", category=UserWarning) - >>> - >>> lzma_filters = [dict(id=lzma.FILTER_DELTA, dist=4), dict(id=lzma.FILTER_LZMA2, preset=1)] - >>> compressors = LZMA(filters=lzma_filters) - >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000) - >>> z = zarr.create_array(store='data/example-7.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) - >>> z.compressors - (LZMA(codec_name='numcodecs.lzma', codec_config={'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]}),) - -To disable compression, set ``compressors=None`` when creating an array, e.g.:: - - >>> z = zarr.create_array(store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None) - >>> z.compressors - () - -.. _user-guide-filters: - -Filters -------- - -In some cases, compression can be improved by transforming the data in some -way. For example, if nearby values tend to be correlated, then shuffling the -bytes within each numerical value or storing the difference between adjacent -values may increase compression ratio. Some compressors provide built-in filters -that apply transformations to the data prior to compression. For example, the -Blosc compressor has built-in implementations of byte- and bit-shuffle filters, -and the LZMA compressor has a built-in implementation of a delta -filter. However, to provide additional flexibility for implementing and using -filters in combination with different compressors, Zarr also provides a -mechanism for configuring filters outside of the primary compressor. - -Here is an example using a delta filter with the Blosc compressor:: - - >>> from numcodecs.zarr3 import Delta - >>> - >>> filters = [Delta(dtype='int32')] - >>> compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=1, shuffle=zarr.codecs.BloscShuffle.shuffle) - >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000) - >>> z = zarr.create_array(store='data/example-9.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), filters=filters, compressors=compressors) - >>> z.info_complete() - Type : Array - Zarr format : 3 - Data type : Int32(endianness='little') - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : C - Read-only : False - Store type : LocalStore - Filters : (Delta(codec_name='numcodecs.delta', codec_config={'dtype': 'int32'}),) - Serializer : BytesCodec(endian=) - Compressors : (BloscCodec(typesize=4, cname=, clevel=1, shuffle=, blocksize=0),) - No. bytes : 400000000 (381.5M) - No. bytes stored : 826 - Storage ratio : 484261.5 - Chunks Initialized : 0 - -For more information about available filter codecs, see the `Numcodecs -`_ documentation. - -.. _user-guide-indexing: - -Advanced indexing ------------------ - -Zarr arrays support several methods for advanced or "fancy" -indexing, which enable a subset of data items to be extracted or updated in an -array without loading the entire array into memory. - -Note that although this functionality is similar to some of the advanced -indexing capabilities available on NumPy arrays and on h5py datasets, **the Zarr -API for advanced indexing is different from both NumPy and h5py**, so please -read this section carefully. For a complete description of the indexing API, -see the documentation for the :class:`zarr.Array` class. - -Indexing with coordinate arrays -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Items from a Zarr array can be extracted by providing an integer array of -coordinates. E.g.:: - - >>> data = np.arange(10) ** 2 - >>> z = zarr.create_array(store='data/example-10.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) - >>> z.get_coordinate_selection([2, 5]) - array([ 4, 25]) - -Coordinate arrays can also be used to update data, e.g.:: - - >>> z.set_coordinate_selection([2, 5], [-1, -2]) - >>> z[:] - array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) - -For multidimensional arrays, coordinates must be provided for each dimension, -e.g.:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-11.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([[ 0, 1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, 13, 14]]) - >>> z.get_coordinate_selection(([0, 2], [1, 3])) - array([ 1, 13]) - >>> z.set_coordinate_selection(([0, 2], [1, 3]), [-1, -2]) - >>> z[:] - array([[ 0, -1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, -2, 14]]) - -For convenience, coordinate indexing is also available via the ``vindex`` -property, as well as the square bracket operator, e.g.:: - - >>> z.vindex[[0, 2], [1, 3]] - array([-1, -2]) - >>> z.vindex[[0, 2], [1, 3]] = [-3, -4] - >>> z[:] - array([[ 0, -3, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, -4, 14]]) - >>> z[[0, 2], [1, 3]] - array([-3, -4]) - -When the indexing arrays have different shapes, they are broadcast together. -That is, the following two calls are equivalent:: - - >>> z[1, [1, 3]] - array([6, 8]) - >>> z[[1, 1], [1, 3]] - array([6, 8]) - -Indexing with a mask array -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Items can also be extracted by providing a Boolean mask. E.g.:: - - >>> data = np.arange(10) ** 2 - >>> z = zarr.create_array(store='data/example-12.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) - >>> sel = np.zeros_like(z, dtype=bool) - >>> sel[2] = True - >>> sel[5] = True - >>> z.get_mask_selection(sel) - array([ 4, 25]) - >>> z.set_mask_selection(sel, [-1, -2]) - >>> z[:] - array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) - -Here's a multidimensional example:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-13.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([[ 0, 1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, 13, 14]]) - >>> sel = np.zeros_like(z, dtype=bool) - >>> sel[0, 1] = True - >>> sel[2, 3] = True - >>> z.get_mask_selection(sel) - array([ 1, 13]) - >>> z.set_mask_selection(sel, [-1, -2]) - >>> z[:] - array([[ 0, -1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, -2, 14]]) - -For convenience, mask indexing is also available via the ``vindex`` property, -e.g.:: - - >>> z.vindex[sel] - array([-1, -2]) - >>> z.vindex[sel] = [-3, -4] - >>> z[:] - array([[ 0, -3, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, -4, 14]]) - -Mask indexing is conceptually the same as coordinate indexing, and is -implemented internally via the same machinery. Both styles of indexing allow -selecting arbitrary items from an array, also known as point selection. - -Orthogonal indexing -~~~~~~~~~~~~~~~~~~~ - -Zarr arrays also support methods for orthogonal indexing, which allows -selections to be made along each dimension of an array independently. For -example, this allows selecting a subset of rows and/or columns from a -2-dimensional array. E.g.:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-14.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([[ 0, 1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, 13, 14]]) - >>> z.get_orthogonal_selection(([0, 2], slice(None))) # select first and third rows - array([[ 0, 1, 2, 3, 4], - [10, 11, 12, 13, 14]]) - >>> z.get_orthogonal_selection((slice(None), [1, 3])) # select second and fourth columns - array([[ 1, 3], - [ 6, 8], - [11, 13]]) - >>> z.get_orthogonal_selection(([0, 2], [1, 3])) # select rows [0, 2] and columns [1, 4] - array([[ 1, 3], - [11, 13]]) - -Data can also be modified, e.g.:: - - >>> z.set_orthogonal_selection(([0, 2], [1, 3]), [[-1, -2], [-3, -4]]) - -For convenience, the orthogonal indexing functionality is also available via the -``oindex`` property, e.g.:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-15.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z.oindex[[0, 2], :] # select first and third rows - array([[ 0, 1, 2, 3, 4], - [10, 11, 12, 13, 14]]) - >>> z.oindex[:, [1, 3]] # select second and fourth columns - array([[ 1, 3], - [ 6, 8], - [11, 13]]) - >>> z.oindex[[0, 2], [1, 3]] # select rows [0, 2] and columns [1, 4] - array([[ 1, 3], - [11, 13]]) - >>> z.oindex[[0, 2], [1, 3]] = [[-1, -2], [-3, -4]] - >>> z[:] - array([[ 0, -1, 2, -2, 4], - [ 5, 6, 7, 8, 9], - [10, -3, 12, -4, 14]]) - -Any combination of integer, slice, 1D integer array and/or 1D Boolean array can -be used for orthogonal indexing. - -If the index contains at most one iterable, and otherwise contains only slices and integers, -orthogonal indexing is also available directly on the array:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-16.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> np.all(z.oindex[[0, 2], :] == z[[0, 2], :]) - np.True_ - -Block Indexing -~~~~~~~~~~~~~~ - -Zarr also support block indexing, which allows selections of whole chunks based on their -logical indices along each dimension of an array. For example, this allows selecting -a subset of chunk aligned rows and/or columns from a 2-dimensional array. E.g.:: - - >>> data = np.arange(100).reshape(10, 10) - >>> z = zarr.create_array(store='data/example-17.zarr', shape=data.shape, dtype=data.dtype, chunks=(3, 3)) - >>> z[:] = data - -Retrieve items by specifying their block coordinates:: - - >>> z.get_block_selection(1) - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - -Equivalent slicing:: - - >>> z[3:6] - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - -For convenience, the block selection functionality is also available via the -`blocks` property, e.g.:: - - >>> z.blocks[1] - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - -Block index arrays may be multidimensional to index multidimensional arrays. -For example:: - - >>> z.blocks[0, 1:3] - array([[ 3, 4, 5, 6, 7, 8], - [13, 14, 15, 16, 17, 18], - [23, 24, 25, 26, 27, 28]]) - -Data can also be modified. Let's start by a simple 2D array:: - - >>> z = zarr.create_array(store='data/example-18.zarr', shape=(6, 6), dtype=int, chunks=(2, 2)) - -Set data for a selection of items:: - - >>> z.set_block_selection((1, 0), 1) - >>> z[...] - array([[0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0]]) - -For convenience, this functionality is also available via the ``blocks`` property. -E.g.:: - - >>> z.blocks[:, 2] = 7 - >>> z[...] - array([[0, 0, 0, 0, 7, 7], - [0, 0, 0, 0, 7, 7], - [1, 1, 0, 0, 7, 7], - [1, 1, 0, 0, 7, 7], - [0, 0, 0, 0, 7, 7], - [0, 0, 0, 0, 7, 7]]) - -Any combination of integer and slice can be used for block indexing:: - - >>> z.blocks[2, 1:3] - array([[0, 0, 7, 7], - [0, 0, 7, 7]]) - >>> - >>> root = zarr.create_group('data/example-19.zarr') - >>> foo = root.create_array(name='foo', shape=(1000, 100), chunks=(10, 10), dtype='float32') - >>> bar = root.create_array(name='bar', shape=(100,), dtype='int32') - >>> foo[:, :] = np.random.random((1000, 100)) - >>> bar[:] = np.arange(100) - >>> root.tree() - / - ├── bar (100,) int32 - └── foo (1000, 100) float32 - - -.. _user-guide-sharding: - -Sharding --------- - -Using small chunk shapes in very large arrays can lead to a very large number of chunks. -This can become a performance issue for file systems and object storage. -With Zarr format 3, a new sharding feature has been added to address this issue. - -With sharding, multiple chunks can be stored in a single storage object (e.g. a file). -Within a shard, chunks are compressed and serialized separately. -This allows individual chunks to be read independently. -However, when writing data, a full shard must be written in one go for optimal -performance and to avoid concurrency issues. -That means that shards are the units of writing and chunks are the units of reading. -Users need to configure the chunk and shard shapes accordingly. - -Sharded arrays can be created by providing the ``shards`` parameter to :func:`zarr.create_array`. - - >>> a = zarr.create_array('data/example-20.zarr', shape=(10000, 10000), shards=(1000, 1000), chunks=(100, 100), dtype='uint8') - >>> a[:] = (np.arange(10000 * 10000) % 256).astype('uint8').reshape(10000, 10000) - >>> a.info_complete() - Type : Array - Zarr format : 3 - Data type : UInt8() - Fill value : 0 - Shape : (10000, 10000) - Shard shape : (1000, 1000) - Chunk shape : (100, 100) - Order : C - Read-only : False - Store type : LocalStore - Filters : () - Serializer : BytesCodec(endian=None) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 100000000 (95.4M) - No. bytes stored : 3981473 (3.8M) - Storage ratio : 25.1 - Shards Initialized : 100 - -In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used. -This means that 10*10 chunks are stored in each shard, and there are 10*10 shards in total. -Without the ``shards`` argument, there would be 10,000 chunks stored as individual files. - -Missing features in 3.0 ------------------------ - - -The following features have not been ported to 3.0 yet. - -Copying and migrating data -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -See the Zarr-Python 2 documentation on `Copying and migrating data `_ for more details. diff --git a/docs/user-guide/attributes.md b/docs/user-guide/attributes.md new file mode 100644 index 0000000000..44d2f9fa87 --- /dev/null +++ b/docs/user-guide/attributes.md @@ -0,0 +1,37 @@ +# Working with attributes + +Zarr arrays and groups support custom key/value attributes, which can be useful for +storing application-specific metadata. For example: + +```python exec="true" session="arrays" source="above" result="ansi" +import zarr +store = zarr.storage.MemoryStore() +root = zarr.create_group(store=store) +root.attrs['foo'] = 'bar' +z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32') +z.attrs['baz'] = 42 +z.attrs['qux'] = [1, 4, 7, 12] +print(sorted(root.attrs)) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print('foo' in root.attrs) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(root.attrs['foo']) +``` +```python exec="true" session="arrays" source="above" result="ansi" +print(sorted(z.attrs)) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.attrs['baz']) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.attrs['qux']) +``` + +Internally Zarr uses JSON to store array attributes, so attribute values must be +JSON serializable. diff --git a/docs/user-guide/attributes.rst b/docs/user-guide/attributes.rst deleted file mode 100644 index ed48623e29..0000000000 --- a/docs/user-guide/attributes.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. _user-guide-attrs: - -Working with attributes -======================= - -Zarr arrays and groups support custom key/value attributes, which can be useful for -storing application-specific metadata. For example:: - - >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> root = zarr.create_group(store=store) - >>> root.attrs['foo'] = 'bar' - >>> z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32') - >>> z.attrs['baz'] = 42 - >>> z.attrs['qux'] = [1, 4, 7, 12] - >>> sorted(root.attrs) - ['foo'] - >>> 'foo' in root.attrs - True - >>> root.attrs['foo'] - 'bar' - >>> sorted(z.attrs) - ['baz', 'qux'] - >>> z.attrs['baz'] - 42 - >>> z.attrs['qux'] - [1, 4, 7, 12] - -Internally Zarr uses JSON to store array attributes, so attribute values must be -JSON serializable. diff --git a/docs/user-guide/cli.md b/docs/user-guide/cli.md new file mode 100644 index 0000000000..fc812c1a20 --- /dev/null +++ b/docs/user-guide/cli.md @@ -0,0 +1,113 @@ +# Command-line interface + +Zarr-Python provides a command-line interface that enables: + +- migration of Zarr v2 metadata to v3 +- removal of v2 or v3 metadata + +To see available commands run the following in a terminal: + +```bash +zarr --help +``` + +or to get help on individual commands: + +```bash +zarr migrate --help + +zarr remove-metadata --help +``` + +## Migrate metadata from v2 to v3 + +### Migrate to a separate location + +To migrate a Zarr array/group's metadata from v2 to v3 run: + +```bash +zarr migrate v3 path/to/input.zarr path/to/output.zarr +``` + +This will write new `zarr.json` files to `output.zarr`, leaving `input.zarr` un-touched. +Note - this will migrate the entire Zarr hierarchy, so if `input.zarr` contains multiple groups/arrays, +new `zarr.json` will be made for all of them. + +### Migrate in-place + +If you'd prefer to migrate the metadata in-place run: + +```bash +zarr migrate v3 path/to/input.zarr +``` + +This will write new `zarr.json` files to `input.zarr`, leaving the existing v2 metadata un-touched. + +To open the array/group using the new metadata use: + +```python +import zarr +zarr_with_v3_metadata = zarr.open('path/to/input.zarr', zarr_format=3) +``` + +Once you are happy with the conversion, you can run the following to remove the old v2 metadata: + +```bash +zarr remove-metadata v2 path/to/input.zarr +``` + +Note there is also a shortcut to migrate and remove v2 metadata in one step: + +```bash +zarr migrate v3 path/to/input.zarr --remove-v2-metadata +``` + +## Remove metadata + +Remove v2 metadata using: + +```bash +zarr remove-metadata v2 path/to/input.zarr +``` + +or v3 with: + +```bash +zarr remove-metadata v3 path/to/input.zarr +``` + +By default, this will only allow removal of metadata if a valid alternative exists. For example, you can't +remove v2 metadata unless v3 metadata exists at that location. + +To override this behaviour use `--force`: + +```bash +zarr remove-metadata v3 path/to/input.zarr --force +``` + +## Dry run + +All commands provide a `--dry-run` option that will log changes that would be made on a real run, without creating +or modifying any files. + +```bash +zarr migrate v3 path/to/input.zarr --dry-run + +Dry run enabled - no new files will be created or changed. Log of files that would be created on a real run: +Saving metadata to path/to/input.zarr/zarr.json +``` + +## Verbose + +You can also add `--verbose` **before** any command, to see a full log of its actions: + +```bash +zarr --verbose migrate v3 path/to/input.zarr + +zarr --verbose remove-metadata v2 path/to/input.zarr +``` + +## Equivalent functions + +All features of the command-line interface are also available via functions under +`zarr.metadata`. \ No newline at end of file diff --git a/docs/user-guide/cli.rst b/docs/user-guide/cli.rst deleted file mode 100644 index 822b60d389..0000000000 --- a/docs/user-guide/cli.rst +++ /dev/null @@ -1,127 +0,0 @@ -.. _user-guide-cli: - -Command-line interface -======================== - -Zarr-Python provides a command-line interface that enables: - -- migration of Zarr v2 metadata to v3 -- removal of v2 or v3 metadata - -To see available commands run the following in a terminal: - -.. code-block:: bash - - $ zarr --help - -or to get help on individual commands: - -.. code-block:: bash - - $ zarr migrate --help - - $ zarr remove-metadata --help - - -Migrate metadata from v2 to v3 ------------------------------- - -Migrate to a separate location -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To migrate a Zarr array/group's metadata from v2 to v3 run: - -.. code-block:: bash - - $ zarr migrate v3 path/to/input.zarr path/to/output.zarr - -This will write new ``zarr.json`` files to ``output.zarr``, leaving ``input.zarr`` un-touched. -Note - this will migrate the entire Zarr hierarchy, so if ``input.zarr`` contains multiple groups/arrays, -new ``zarr.json`` will be made for all of them. - -Migrate in-place -~~~~~~~~~~~~~~~~ - -If you'd prefer to migrate the metadata in-place run: - -.. code-block:: bash - - $ zarr migrate v3 path/to/input.zarr - -This will write new ``zarr.json`` files to ``input.zarr``, leaving the existing v2 metadata un-touched. - -To open the array/group using the new metadata use: - -.. code-block:: python - - >>> import zarr - >>> zarr_with_v3_metadata = zarr.open('path/to/input.zarr', zarr_format=3) - -Once you are happy with the conversion, you can run the following to remove the old v2 metadata: - -.. code-block:: bash - - $ zarr remove-metadata v2 path/to/input.zarr - -Note there is also a shortcut to migrate and remove v2 metadata in one step: - -.. code-block:: bash - - $ zarr migrate v3 path/to/input.zarr --remove-v2-metadata - - -Remove metadata ----------------- - -Remove v2 metadata using: - -.. code-block:: bash - - $ zarr remove-metadata v2 path/to/input.zarr - -or v3 with: - -.. code-block:: bash - - $ zarr remove-metadata v3 path/to/input.zarr - -By default, this will only allow removal of metadata if a valid alternative exists. For example, you can't -remove v2 metadata unless v3 metadata exists at that location. - -To override this behaviour use ``--force``: - -.. code-block:: bash - - $ zarr remove-metadata v3 path/to/input.zarr --force - - -Dry run --------- -All commands provide a ``--dry-run`` option that will log changes that would be made on a real run, without creating -or modifying any files. - -.. code-block:: bash - - $ zarr migrate v3 path/to/input.zarr --dry-run - - Dry run enabled - no new files will be created or changed. Log of files that would be created on a real run: - Saving metadata to path/to/input.zarr/zarr.json - - -Verbose --------- -You can also add ``--verbose`` **before** any command, to see a full log of its actions: - -.. code-block:: bash - - $ zarr --verbose migrate v3 path/to/input.zarr - - $ zarr --verbose remove-metadata v2 path/to/input.zarr - - -Equivalent functions --------------------- -All features of the command-line interface are also available via functions under -:mod:`zarr.metadata`. - - diff --git a/docs/user-guide/config.md b/docs/user-guide/config.md new file mode 100644 index 0000000000..21fe9b5def --- /dev/null +++ b/docs/user-guide/config.md @@ -0,0 +1,50 @@ +# Runtime configuration + +[`zarr.config`][] is responsible for managing the configuration of zarr and +is based on the [donfig](https://github.com/pytroll/donfig) Python library. + +Configuration values can be set using code like the following: + +```python exec="true" session="config" source="above" result="ansi" + +import zarr + +print(zarr.config.get('array.order')) +``` + +```python exec="true" session="config" source="above" result="ansi" +zarr.config.set({'array.order': 'F'}) + +print(zarr.config.get('array.order')) +``` + +Alternatively, configuration values can be set using environment variables, e.g. +`ZARR_ARRAY__ORDER=F`. + +The configuration can also be read from a YAML file in standard locations. +For more information, see the +[donfig documentation](https://donfig.readthedocs.io/en/latest/). + +Configuration options include the following: + +- Default Zarr format `default_zarr_version` +- Default array order in memory `array.order` +- Whether empty chunks are written to storage `array.write_empty_chunks` +- Async and threading options, e.g. `async.concurrency` and `threading.max_workers` +- Selections of implementations of codecs, codec pipelines and buffers +- Enabling GPU support with `zarr.config.enable_gpu()`. See GPU support for more. + +For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, +first register the implementations in the registry and then select them in the config. +For example, an implementation of the bytes codec in a class `'custompackage.NewBytesCodec'`, +requires the value of `codecs.bytes.name` to be `'custompackage.NewBytesCodec'`. + +This is the current default configuration: + +```python exec="true" session="config" source="above" result="ansi" +from pprint import pprint +import io +output = io.StringIO() +zarr.config.pprint(stream=output, width=60) +print(output.getvalue()) +``` diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst deleted file mode 100644 index 76210da791..0000000000 --- a/docs/user-guide/config.rst +++ /dev/null @@ -1,83 +0,0 @@ -.. _user-guide-config: - -Runtime configuration -===================== - -``zarr.config`` is responsible for managing the configuration of zarr and -is based on the `donfig `_ Python library. - -Configuration values can be set using code like the following:: - - >>> import zarr - >>> - >>> zarr.config.set({'array.order': 'F'}) - - >>> - >>> # revert this change so it doesn't impact the rest of the docs - >>> zarr.config.set({'array.order': 'C'}) - - -Alternatively, configuration values can be set using environment variables, e.g. -``ZARR_ARRAY__ORDER=F``. - -The configuration can also be read from a YAML file in standard locations. -For more information, see the -`donfig documentation `_. - -Configuration options include the following: - -- Default Zarr format ``default_zarr_version`` -- Default array order in memory ``array.order`` -- Whether empty chunks are written to storage ``array.write_empty_chunks`` -- Async and threading options, e.g. ``async.concurrency`` and ``threading.max_workers`` -- Selections of implementations of codecs, codec pipelines and buffers -- Enabling GPU support with ``zarr.config.enable_gpu()``. See :ref:`user-guide-gpu` for more. - -For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, -first register the implementations in the registry and then select them in the config. -For example, an implementation of the bytes codec in a class ``'custompackage.NewBytesCodec'``, -requires the value of ``codecs.bytes.name`` to be ``'custompackage.NewBytesCodec'``. - -This is the current default configuration:: - - >>> zarr.config.pprint() - {'array': {'order': 'C', 'write_empty_chunks': False}, - 'async': {'concurrency': 10, 'timeout': None}, - 'buffer': 'zarr.buffer.cpu.Buffer', - 'codec_pipeline': {'batch_size': 1, - 'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'}, - 'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec', - 'bytes': 'zarr.codecs.bytes.BytesCodec', - 'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec', - 'endian': 'zarr.codecs.bytes.BytesCodec', - 'gzip': 'zarr.codecs.gzip.GzipCodec', - 'numcodecs.adler32': 'zarr.codecs.numcodecs.Adler32', - 'numcodecs.astype': 'zarr.codecs.numcodecs.AsType', - 'numcodecs.bitround': 'zarr.codecs.numcodecs.BitRound', - 'numcodecs.blosc': 'zarr.codecs.numcodecs.Blosc', - 'numcodecs.bz2': 'zarr.codecs.numcodecs.BZ2', - 'numcodecs.crc32': 'zarr.codecs.numcodecs.CRC32', - 'numcodecs.crc32c': 'zarr.codecs.numcodecs.CRC32C', - 'numcodecs.delta': 'zarr.codecs.numcodecs.Delta', - 'numcodecs.fixedscaleoffset': 'zarr.codecs.numcodecs.FixedScaleOffset', - 'numcodecs.fletcher32': 'zarr.codecs.numcodecs.Fletcher32', - 'numcodecs.gzip': 'zarr.codecs.numcodecs.GZip', - 'numcodecs.jenkins_lookup3': 'zarr.codecs.numcodecs.JenkinsLookup3', - 'numcodecs.lz4': 'zarr.codecs.numcodecs.LZ4', - 'numcodecs.lzma': 'zarr.codecs.numcodecs.LZMA', - 'numcodecs.packbits': 'zarr.codecs.numcodecs.PackBits', - 'numcodecs.pcodec': 'zarr.codecs.numcodecs.PCodec', - 'numcodecs.quantize': 'zarr.codecs.numcodecs.Quantize', - 'numcodecs.shuffle': 'zarr.codecs.numcodecs.Shuffle', - 'numcodecs.zfpy': 'zarr.codecs.numcodecs.ZFPY', - 'numcodecs.zlib': 'zarr.codecs.numcodecs.Zlib', - 'numcodecs.zstd': 'zarr.codecs.numcodecs.Zstd', - 'sharding_indexed': 'zarr.codecs.sharding.ShardingCodec', - 'transpose': 'zarr.codecs.transpose.TransposeCodec', - 'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec', - 'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec', - 'zstd': 'zarr.codecs.zstd.ZstdCodec'}, - 'default_zarr_format': 3, - 'json_indent': 2, - 'ndbuffer': 'zarr.buffer.cpu.NDBuffer', - 'threading': {'max_workers': None}} diff --git a/docs/user-guide/consolidated_metadata.md b/docs/user-guide/consolidated_metadata.md new file mode 100644 index 0000000000..d4fc9d6bab --- /dev/null +++ b/docs/user-guide/consolidated_metadata.md @@ -0,0 +1,123 @@ +# Consolidated metadata + +!!! warning + The Consolidated Metadata feature in Zarr-Python is considered experimental for v3 + stores. [zarr-specs#309](https://github.com/zarr-developers/zarr-specs/pull/309) + has proposed a formal extension to the v3 specification to support consolidated metadata. + +Zarr-Python implements the [Consolidated Metadata](https://github.com/zarr-developers/zarr-specs/pull/309) for v2 and v3 stores. +Consolidated metadata can reduce the time needed to load the metadata for an +entire hierarchy, especially when the metadata is being served over a network. +Consolidated metadata essentially stores all the metadata for a hierarchy in the +metadata of the root Group. + +## Usage + +If consolidated metadata is present in a Zarr Group's metadata then it is used +by default. The initial read to open the group will need to communicate with +the store (reading from a file for a [`zarr.storage.LocalStore`][], making a +network request for a [`zarr.storage.FsspecStore`][]). After that, any subsequent +metadata reads get child Group or Array nodes will *not* require reads from the store. + +In Python, the consolidated metadata is available on the `.consolidated_metadata` +attribute of the `GroupMetadata` object. + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +import zarr +import warnings + +warnings.filterwarnings("ignore", category=UserWarning) +store = zarr.storage.MemoryStore() +group = zarr.create_group(store=store) +print(group) +array = group.create_array(shape=(1,), name='a', dtype='float64') +print(array) +``` + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +array = group.create_array(shape=(2, 2), name='b', dtype='float64') +print(array) +``` + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +array = group.create_array(shape=(3, 3, 3), name='c', dtype='float64') +print(array) +``` + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +result = zarr.consolidate_metadata(store) +print(result) +``` + +If we open that group, the Group's metadata has a `zarr.core.group.ConsolidatedMetadata` +that can be used.: + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +from pprint import pprint +import io + +consolidated = zarr.open_group(store=store) +consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata + +# Note: pprint can be users without capturing the output regularly +output = io.StringIO() +pprint(dict(sorted(consolidated_metadata.items())), stream=output, width=60) +print(output.getvalue()) +``` + +Operations on the group to get children automatically use the consolidated metadata.: + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +print(consolidated['a']) # no read / HTTP request to the Store is required +``` + +With nested groups, the consolidated metadata is available on the children, recursively.: + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +child = group.create_group('child', attributes={'kind': 'child'}) +grandchild = child.create_group('child', attributes={'kind': 'grandchild'}) +consolidated = zarr.consolidate_metadata(store) + +output = io.StringIO() +pprint(consolidated['child'].metadata.consolidated_metadata, stream=output, width=60) +print(output.getvalue()) +``` + +!!! info "Added in version 3.1.1" + The keys in the consolidated metadata are sorted prior to writing. Keys are + sorted in ascending order by path depth, where a path is defined as a sequence + of strings joined by `"/"`. For keys with the same path length, lexicographic + order is used to break the tie. This behaviour ensures deterministic metadata + output for a given group. + +## Synchronization and Concurrency + +Consolidated metadata is intended for read-heavy use cases on slowly changing +hierarchies. For hierarchies where new nodes are constantly being added, +removed, or modified, consolidated metadata may not be desirable. + +1. It will add some overhead to each update operation, since the metadata + would need to be re-consolidated to keep it in sync with the store. +2. Readers using consolidated metadata will regularly see a "past" version + of the metadata, at the time they read the root node with its consolidated + metadata. + + +## Stores Without Support for Consolidated Metadata + +Some stores may want to opt out of the consolidated metadata mechanism. This +may be for several reasons like: + +* They want to maintain read-write consistency, which is challenging with + consolidated metadata. +* They have their own consolidated metadata mechanism. +* They offer good enough performance without need for consolidation. + +This type of store can declare it doesn't want consolidation by implementing +`Store.supports_consolidated_metadata` and returning `False`. For stores that don't support +consolidation, Zarr will: + +* Raise an error on `consolidate_metadata` calls, maintaining the store in + its unconsolidated state. +* Raise an error in `AsyncGroup.open(..., use_consolidated=True)` +* Not use consolidated metadata in `AsyncGroup.open(..., use_consolidated=None)` diff --git a/docs/user-guide/consolidated_metadata.rst b/docs/user-guide/consolidated_metadata.rst deleted file mode 100644 index ae50c602ca..0000000000 --- a/docs/user-guide/consolidated_metadata.rst +++ /dev/null @@ -1,143 +0,0 @@ -.. _user-guide-consolidated-metadata: - -Consolidated metadata -===================== - -.. warning:: - The Consolidated Metadata feature in Zarr-Python is considered experimental for v3 - stores. `zarr-specs#309 `_ - has proposed a formal extension to the v3 specification to support consolidated metadata. - -Zarr-Python implements the `Consolidated Metadata`_ for v2 and v3 stores. -Consolidated metadata can reduce the time needed to load the metadata for an -entire hierarchy, especially when the metadata is being served over a network. -Consolidated metadata essentially stores all the metadata for a hierarchy in the -metadata of the root Group. - -Usage ------ - -If consolidated metadata is present in a Zarr Group's metadata then it is used -by default. The initial read to open the group will need to communicate with -the store (reading from a file for a :class:`zarr.storage.LocalStore`, making a -network request for a :class:`zarr.storage.FsspecStore`). After that, any subsequent -metadata reads get child Group or Array nodes will *not* require reads from the store. - -In Python, the consolidated metadata is available on the ``.consolidated_metadata`` -attribute of the ``GroupMetadata`` object. - - >>> import zarr - >>> import warnings - >>> warnings.filterwarnings("ignore", category=UserWarning) - >>> - >>> store = zarr.storage.MemoryStore() - >>> group = zarr.create_group(store=store) - >>> group.create_array(shape=(1,), name='a', dtype='float64') - - >>> group.create_array(shape=(2, 2), name='b', dtype='float64') - - >>> group.create_array(shape=(3, 3, 3), name='c', dtype='float64') - - >>> zarr.consolidate_metadata(store) - - -If we open that group, the Group's metadata has a :class:`zarr.core.group.ConsolidatedMetadata` -that can be used.: - - >>> consolidated = zarr.open_group(store=store) - >>> consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata - >>> from pprint import pprint - >>> pprint(dict(consolidated_metadata.items())) - {'a': ArrayV3Metadata(shape=(1,), - data_type=Float64(endianness='little'), - chunk_grid=RegularChunkGrid(chunk_shape=(1,)), - chunk_key_encoding=DefaultChunkKeyEncoding(separator='/'), - fill_value=np.float64(0.0), - codecs=(BytesCodec(endian=), - ZstdCodec(level=0, checksum=False)), - attributes={}, - dimension_names=None, - zarr_format=3, - node_type='array', - storage_transformers=()), - 'b': ArrayV3Metadata(shape=(2, 2), - data_type=Float64(endianness='little'), - chunk_grid=RegularChunkGrid(chunk_shape=(2, 2)), - chunk_key_encoding=DefaultChunkKeyEncoding(separator='/'), - fill_value=np.float64(0.0), - codecs=(BytesCodec(endian=), - ZstdCodec(level=0, checksum=False)), - attributes={}, - dimension_names=None, - zarr_format=3, - node_type='array', - storage_transformers=()), - 'c': ArrayV3Metadata(shape=(3, 3, 3), - data_type=Float64(endianness='little'), - chunk_grid=RegularChunkGrid(chunk_shape=(3, 3, 3)), - chunk_key_encoding=DefaultChunkKeyEncoding(separator='/'), - fill_value=np.float64(0.0), - codecs=(BytesCodec(endian=), - ZstdCodec(level=0, checksum=False)), - attributes={}, - dimension_names=None, - zarr_format=3, - node_type='array', - storage_transformers=())} - -Operations on the group to get children automatically use the consolidated metadata.: - - >>> consolidated['a'] # no read / HTTP request to the Store is required - - -With nested groups, the consolidated metadata is available on the children, recursively.: - - >>> child = group.create_group('child', attributes={'kind': 'child'}) - >>> grandchild = child.create_group('child', attributes={'kind': 'grandchild'}) - >>> consolidated = zarr.consolidate_metadata(store) - >>> - >>> consolidated['child'].metadata.consolidated_metadata - ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False) - -.. versionadded:: 3.1.1 - - The keys in the consolidated metadata are sorted prior to writing. Keys are - sorted in ascending order by path depth, where a path is defined as a sequence - of strings joined by ``"/"``. For keys with the same path length, lexicographic - order is used to break the tie. This behaviour ensures deterministic metadata - output for a given group. - -Synchronization and Concurrency -------------------------------- - -Consolidated metadata is intended for read-heavy use cases on slowly changing -hierarchies. For hierarchies where new nodes are constantly being added, -removed, or modified, consolidated metadata may not be desirable. - -1. It will add some overhead to each update operation, since the metadata - would need to be re-consolidated to keep it in sync with the store. -2. Readers using consolidated metadata will regularly see a "past" version - of the metadata, at the time they read the root node with its consolidated - metadata. - -.. _Consolidated Metadata: https://github.com/zarr-developers/zarr-specs/pull/309 - -Stores Without Support for Consolidated Metadata ------------------------------------------------- - -Some stores may want to opt out of the consolidated metadata mechanism. This -may be for several reasons like: - -* They want to maintain read-write consistency, which is challenging with - consolidated metadata. -* They have their own consolidated metadata mechanism. -* They offer good enough performance without need for consolidation. - -This type of store can declare it doesn't want consolidation by implementing -`Store.supports_consolidated_metadata` and returning `False`. For stores that don't support -consolidation, Zarr will: - -* Raise an error on `consolidate_metadata` calls, maintaining the store in - its unconsolidated state. -* Raise an error in `AsyncGroup.open(..., use_consolidated=True)` -* Not use consolidated metadata in `AsyncGroup.open(..., use_consolidated=None)` diff --git a/docs/user-guide/data_types.md b/docs/user-guide/data_types.md new file mode 100644 index 0000000000..82b7c89809 --- /dev/null +++ b/docs/user-guide/data_types.md @@ -0,0 +1,427 @@ +# Array data types + +## Zarr's Data Type Model + +Zarr is designed for interoperability with NumPy, so if you are familiar with NumPy or any other +N-dimensional array library, Zarr's model for array data types should seem familiar. However, Zarr +data types have some unique features that are described in this document. + +Zarr arrays operate under an essential design constraint: unlike NumPy arrays, Zarr arrays +are designed to be stored and accessed by other Zarr implementations. This means that, among other things, +Zarr data types must be serializable to metadata documents in accordance with the Zarr specifications, +which adds some unique aspects to the Zarr data type model. + +The following sections explain Zarr's data type model in greater detail and demonstrate the +Zarr Python APIs for working with Zarr data types. + +### Array Data Types + +Every Zarr array has a data type, which defines the meaning of the array's elements. An array's data +type is encoded in the JSON metadata for the array. This means that the data type of an array must be +JSON-serializable. + +In Zarr V2, the data type of an array is stored in the `dtype` field in array metadata. +Zarr V3 changed the name of this field to `data_type` and also defined new rules for the values +that can be assigned to the `data_type` field. + +For example, in Zarr V2, the boolean array data type was represented in array metadata as the +string `"|b1"`. In Zarr V3, the same type is represented as the string `"bool"`. + +### Scalars + +Zarr also specifies how array elements, i.e., scalars, are encoded in array metadata. This is necessary +because Zarr uses a field in array metadata to define a default value for chunks that are not stored. +This field, called `fill_value` in both Zarr V2 and Zarr V3 metadata documents, contains a +JSON value that can be decoded to a scalar value compatible with the array's data type. + +For the boolean data type, the scalar encoding is simple—booleans are natively supported by +JSON, so Zarr saves booleans as JSON booleans. Other scalars, like floats or raw bytes, have +more elaborate encoding schemes, and in some cases, this scheme depends on the Zarr format version. + +## Data Types in Zarr Version 2 + +Version 2 of the Zarr format defined its data types relative to +[NumPy's data types](https://numpy.org/doc/2.1/reference/arrays.dtypes.html#data-type-objects-dtype), +and added a few non-NumPy data types as well. With one exception ([structured data types](#structured-data-type)), the Zarr +V2 JSON identifier for a data type is just the NumPy `str` attribute of that data type: + +```python exec="true" session="data_types" source="above" result="ansi" +import zarr +import numpy as np +import json + +store = {} +np_dtype = np.dtype('int64') +print(np_dtype.str) +``` + +```python exec="true" session="data_types" source="above" result="ansi" +z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) +dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] +print(dtype_meta) +``` + +!!! note + + The `<` character in the data type metadata encodes the + [endianness](https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html), + or "byte order," of the data type. As per the NumPy model, + in Zarr version 2 each data type has an endianness where applicable. + However, Zarr version 3 data types do not store endianness information. + +There are two special cases to consider: ["structured" data types](#structured-data-type), and +["object"](#object-data-type) data types. + +### Structured Data Type + +NumPy allows the construction of a so-called "structured" data types comprised of ordered collections +of named fields, where each field is itself a distinct NumPy data type. See the NumPy documentation +[here](https://numpy.org/doc/stable/user/basics.rec.html). + +Crucially, NumPy does not use a special data type for structured data types—instead, NumPy +implements structured data types as an optional feature of the so-called "Void" data type, which models +arbitrary fixed-size byte strings. The `str` attribute of a regular NumPy void +data type is the same as the `str` of a NumPy structured data type. This means that the `str` +attribute does not convey information about the fields contained in a structured data type. +For these reasons, Zarr V2 uses a special data type encoding for structured data types. +They are stored in JSON as lists of pairs, where the first element is a string, and the second +element is a Zarr V2 data type specification. This representation supports recursion. + +For example: + +```python exec="true" session="data_types" source="above" result="ansi" +store = {} +np_dtype = np.dtype([('field_a', '>i2'), ('field_b', [('subfield_c', '>f4'), ('subfield_d', 'i2')])]) +print(np_dtype.str) +``` + +```python exec="true" session="data_types" source="above" result="ansi" +z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) +dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] +print(dtype_meta) +``` + +### Object Data Type + +The NumPy "object" type is essentially an array of references to arbitrary Python objects. +It can model arrays of variable-length UTF-8 strings, arrays of variable-length byte strings, or +even arrays of variable-length arrays, each with a distinct data type. This makes the "object" data +type expressive, but also complicated to store. + +Zarr Python cannot persistently store references to arbitrary Python objects. But if each of those Python +objects has a consistent type, then we can use a special encoding procedure to store the array. This +is how Zarr Python stores variable-length UTF-8 strings, or variable-length byte strings. + +Although these are separate data types in this library, they are both "object" arrays in NumPy, which means +they have the *same* Zarr V2 string representation: `"|O"`. + +So for Zarr V2 we have to disambiguate different "object" data type arrays on the basis of their +encoding procedure, i.e., the codecs declared in the `filters` and `compressor` attributes of array +metadata. + +If an array with data type "object" used the `"vlen-utf8"` codec, then it was interpreted as an +array of variable-length strings. If an array with data type "object" used the `"vlen-bytes"` +codec, then it was interpreted as an array of variable-length byte strings. + +This all means that the `dtype` field alone does not fully specify a data type in Zarr V2. +The name of the object codec used, if one was used, is also required. +Although this fact can be ignored for many simple numeric data types, any comprehensive approach to +Zarr V2 data types must either reject the "object" data types or include the "object codec" +identifier in the JSON form of the basic data type model. + +## Data Types in Zarr Version 3 + +The NumPy-based Zarr V2 data type representation was effective for simple data types but struggled +with more complex data types, like "object" and "structured" data types. To address these limitations, +Zarr V3 introduced several key changes to how data types are represented: + +- Instead of copying NumPy character codecs, Zarr V3 defines an identifier for each data type. + The basic data types are identified by strings like `"int8"`, `"int16"`, etc., and data types + that require a configuration can be identified by a JSON object. + + For example, this JSON object declares a datetime data type: + + ```json + { + "name": "numpy.datetime64", + "configuration": { + "unit": "s", + "scale_factor": 10 + } + } + ``` + +- Zarr V3 data types do not have endianness. This is a departure from Zarr V2, where multi-byte + data types are defined with endianness information. Instead, Zarr V3 requires that the endianness + of encoded array chunks is specified in the `codecs` attribute of array metadata. The Zarr + V3 specification leaves the in-memory endianness of decoded array chunks as an implementation detail. + +For more about data types in Zarr V3, see the +[V3 specification](https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html). + +## Data Types in Zarr Python + +The two Zarr formats that Zarr Python supports specify data types in different ways: data types in +Zarr version 2 are encoded as NumPy-compatible strings (or lists, in the case of structured data +types), while data types in Zarr V3 are encoded as either strings or JSON objects. Zarr V3 data +types do not have any associated endianness information, unlike Zarr V2 data types. + +Zarr Python needs to support both Zarr V2 and V3, which means we need to abstract over these differences. +We do this with an abstract Zarr data type class: [ZDType][zarr.dtype.ZDType] +which provides Zarr V2 and Zarr V3 compatibility routines for "native" data types. + +In this context, a "native" data type is a Python class, typically defined in another library, that +models an array's data type. For example, [`numpy.dtypes.UInt8DType`][] is a native data type defined in NumPy. +Zarr Python wraps the NumPy `uint8` with a [ZDType][zarr.dtype.ZDType] instance called +[UInt8][zarr.dtype.UInt8]. + +As of this writing, the only native data types Zarr Python supports are NumPy data types. We could +avoid the "native data type" jargon and just say "NumPy data type," but we do not want to rule out the +possibility of using non-NumPy array backends in the future. + +Each data type supported by Zarr Python is modeled by a [ZDType][zarr.dtype.ZDType] subclass, which provides an +API for the following operations: + +- Encoding and decoding a native data type +- Encoding and decoding a data type to and from Zarr V2 and Zarr V3 array metadata +- Encoding and decoding a scalar value to and from Zarr V2 and Zarr V3 array metadata +- Casting a Python object to a scalar value consistent with the data type + +### List of data types + +The following section lists the data types built in to Zarr Python. With a few exceptions, Zarr +Python supports nearly all of the data types in NumPy. If you need a data type that is not listed +here, it's possible to create it yourself: see [Adding New Data Types](#adding-new-data-types). + +#### Boolean +- [Boolean][zarr.dtype.Bool] + +#### Integral +- [Signed 8-bit integer][zarr.dtype.Int8] +- [Signed 16-bit integer][zarr.dtype.Int16] +- [Signed 32-bit integer][zarr.dtype.Int32] +- [Signed 64-bit integer][zarr.dtype.Int64] +- [Unsigned 8-bit integer][zarr.dtype.UInt8] +- [Unsigned 16-bit integer][zarr.dtype.UInt16] +- [Unsigned 32-bit integer][zarr.dtype.UInt32] +- [Unsigned 64-bit integer][zarr.dtype.UInt64] + +#### Floating-point +- [16-bit floating-point][zarr.dtype.Float16] +- [32-bit floating-point][zarr.dtype.Float32] +- [64-bit floating-point][zarr.dtype.Float64] +- [64-bit complex floating-point][zarr.dtype.Complex64] +- [128-bit complex floating-point][zarr.dtype.Complex128] + +#### String +- [Fixed-length UTF-32 string][zarr.dtype.FixedLengthUTF32] +- [Variable-length UTF-8 string][zarr.dtype.VariableLengthUTF8] + +#### Bytes +- [Fixed-length null-terminated bytes][zarr.dtype.NullTerminatedBytes] +- [Fixed-length raw bytes][zarr.dtype.RawBytes] +- [Variable-length bytes][zarr.dtype.VariableLengthBytes] + +#### Temporal +- [DateTime64][zarr.dtype.DateTime64] +- [TimeDelta64][zarr.dtype.TimeDelta64] + +#### Struct-like +- [Structured][zarr.dtype.Structured] + +### Example Usage + +This section will demonstrates the basic usage of Zarr data types. + +Create a `ZDType` from a native data type: + +```python exec="true" session="data_types" source="above" +from zarr.core.dtype import Int8 +import numpy as np +int8 = Int8.from_native_dtype(np.dtype('int8')) +``` + +Convert back to a native data type: + +```python exec="true" session="data_types" source="above" +native_dtype = int8.to_native_dtype() +assert native_dtype == np.dtype('int8') +``` + +Get the default scalar value for the data type: + +```python exec="true" session="data_types" source="above" +default_value = int8.default_scalar() +assert default_value == np.int8(0) +``` + +Serialize to JSON for Zarr V2: + +```python exec="true" session="data_types" source="above" result="ansi" +json_v2 = int8.to_json(zarr_format=2) +print(json_v2) +{'name': '|i1', 'object_codec_id': None} +``` + +!!! note + + The representation returned by `to_json(zarr_format=2)` is more abstract than the literal contents + of Zarr V2 array metadata, because the JSON representation used by the `ZDType` classes must be + distinct across different data types. As noted [earlier](#object-data-type), Zarr V2 identifies + multiple distinct data types with the "object" data type identifier `"|O"`. Extra information + is needed to disambiguate these data types from one another. That's the reason for the + `object_codec_id` field you see here. + +And for V3: + +```python exec="true" session="data_types" source="above" result="ansi" +json_v3 = int8.to_json(zarr_format=3) +print(json_v3) +``` + +Serialize a scalar value to JSON: + +```python exec="true" session="data_types" source="above" result="ansi" +json_value = int8.to_json_scalar(42, zarr_format=3) +print(json_value) +``` + +Deserialize a scalar value from JSON: + +```python exec="true" session="data_types" source="above" +scalar_value = int8.from_json_scalar(42, zarr_format=3) +assert scalar_value == np.int8(42) +``` + +### Adding New Data Types + +Each Zarr data type is a separate Python class that inherits from +[ZDType][zarr.dtype.ZDType]. You can define a custom data type by +writing your own subclass of [ZDType][zarr.dtype.ZDType] and adding +your data type to the data type registry. A complete example of this process is included below. + +The source code for this example can be found in the `examples/custom_dtype.py` file in the Zarr +Python project directory. + +```python +--8<-- "examples/custom_dtype.py" +``` + +### Data Type Resolution + +Although Zarr Python uses a different data type model from NumPy, you can still define a Zarr array +with a NumPy data type object: + +```python exec="true" session="data_types" source="above" result="ansi" +from zarr import create_array +import numpy as np +a = create_array({}, shape=(10,), dtype=np.dtype('int')) +print(a) +``` + +Or a string representation of a NumPy data type: + +```python exec="true" session="data_types" source="above" result="ansi" +a = create_array({}, shape=(10,), dtype=' +``` + +This example illustrates a general problem Zarr Python has to solve: how can we allow users to +specify a data type as a string or a NumPy `dtype` object, and produce the right Zarr data type +from that input? We call this process "data type resolution." Zarr Python also performs data type +resolution when reading stored arrays, although in this case the input is a JSON value instead +of a NumPy data type. + +For simple data types like `int`, the solution could be extremely simple: just +maintain a lookup table that maps a NumPy data type to the Zarr data type equivalent. But not all +data types are so simple. Consider this case: + +```python exec="true" session="data_types" source="above" +from zarr import create_array +import warnings +import numpy as np +warnings.simplefilter("ignore", category=FutureWarning) +a = create_array({}, shape=(10,), dtype=[('a', 'f8'), ('b', 'i8')]) +print(a.dtype) # this is the NumPy data type +``` + +```python exec="true" session="data_types" source="above" +print(a.metadata.data_type) # this is the Zarr data type +``` + +In this example, we created a +[NumPy structured data type](https://numpy.org/doc/stable/user/basics.rec.html#structured-datatypes). +This data type is a container that can hold any NumPy data type, which makes it recursive. It is +not possible to make a lookup table that relates all NumPy structured data types to their Zarr +equivalents, as there is a nearly unbounded number of different structured data types. So instead of +a static lookup table, Zarr Python relies on a dynamic approach to data type resolution. + +Zarr Python defines a collection of Zarr data types. This collection, called a "data type registry," +is essentially a dictionary where the keys are strings (a canonical name for each data type), and the +values are the data type classes themselves. Dynamic data type resolution entails iterating over +these data type classes, invoking that class' [from_native_dtype][zarr.dtype.ZDType.from_native_dtype] +method, and returning a concrete data type instance if and only if exactly one of those constructor +invocations is successful. + +In plain language, we take some user input, like a NumPy data type, offer it to all the +known data type classes, and return an instance of the one data type class that can accept that user input. + +We want to avoid a situation where the same native data type matches multiple Zarr data types; that is, +a NumPy data type should *uniquely* specify a single Zarr data type. But data type resolution is +dynamic, so it's not possible to statically guarantee this uniqueness constraint. Therefore, we +attempt data type resolution against *every* data type class, and if, for some reason, a native data +type matches multiple Zarr data types, we treat this as an error and raise an exception. + +If you have a NumPy data type and you want to get the corresponding `ZDType` instance, you can use +the `parse_dtype` function, which will use the dynamic resolution described above. `parse_dtype` +handles a range of input types: + +- NumPy data types: + + ```python exec="true" session="data_types" source="above" result="ansi" + import numpy as np + from zarr.dtype import parse_dtype + my_dtype = np.dtype('>M8[10s]') + print(parse_dtype(my_dtype, zarr_format=2)) + ``` + +- NumPy data type-compatible strings: + + ```python exec="true" session="data_types" source="above" result="ansi" + dtype_str = '>M8[10s]' + print(parse_dtype(dtype_str, zarr_format=2)) + ``` + +- `ZDType` instances: + + ```python exec="true" session="data_types" source="above" result="ansi" + from zarr.dtype import DateTime64 + zdt = DateTime64(endianness='big', scale_factor=10, unit='s') + print(parse_dtype(zdt, zarr_format=2)) # Use a ZDType (this is a no-op) + ``` + +- Python dictionaries (requires `zarr_format=3`). These dictionaries must be consistent with the + `JSON` form of the data type: + + ```python exec="true" session="data_types" source="above" result="ansi" + dt_dict = {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}} + print(parse_dtype(dt_dict, zarr_format=3)) + ``` + + ```python exec="true" session="data_types" source="above" result="ansi" + print(parse_dtype(dt_dict, zarr_format=3).to_json(zarr_format=3)) + ``` diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst deleted file mode 100644 index a968cc4c86..0000000000 --- a/docs/user-guide/data_types.rst +++ /dev/null @@ -1,455 +0,0 @@ -.. _user-guide-data-types: - -Array data types -================ - -Zarr's Data Type Model ----------------------- - -Zarr is designed for interoperability with NumPy, so if you are familiar with NumPy or any other -N-dimensional array library, Zarr's model for array data types should seem familiar. However, Zarr -data types have some unique features that are described in this document. - -Zarr arrays operate under an essential design constraint: unlike NumPy arrays, Zarr arrays -are designed to be stored and accessed by other Zarr implementations. This means that, among other things, -Zarr data types must be serializable to metadata documents in accordance with the Zarr specifications, -which adds some unique aspects to the Zarr data type model. - -The following sections explain Zarr's data type model in greater detail and demonstrate the -Zarr Python APIs for working with Zarr data types. - -Array Data Types -^^^^^^^^^^^^^^^^ - -Every Zarr array has a data type, which defines the meaning of the array's elements. An array's data -type is encoded in the JSON metadata for the array. This means that the data type of an array must be -JSON-serializable. - -In Zarr V2, the data type of an array is stored in the ``dtype`` field in array metadata. -Zarr V3 changed the name of this field to ``data_type`` and also defined new rules for the values -that can be assigned to the ``data_type`` field. - -For example, in Zarr V2, the boolean array data type was represented in array metadata as the -string ``"|b1"``. In Zarr V3, the same type is represented as the string ``"bool"``. - -Scalars -^^^^^^^ - -Zarr also specifies how array elements, i.e., scalars, are encoded in array metadata. This is necessary -because Zarr uses a field in array metadata to define a default value for chunks that are not stored. -This field, called ``fill_value`` in both Zarr V2 and Zarr V3 metadata documents, contains a -JSON value that can be decoded to a scalar value compatible with the array's data type. - -For the boolean data type, the scalar encoding is simple—booleans are natively supported by -JSON, so Zarr saves booleans as JSON booleans. Other scalars, like floats or raw bytes, have -more elaborate encoding schemes, and in some cases, this scheme depends on the Zarr format version. - -Data Types in Zarr Version 2 ----------------------------- - -Version 2 of the Zarr format defined its data types relative to -`NumPy's data types `_, -and added a few non-NumPy data types as well. With one exception (`structured data types <#structured-data-type>`_), the Zarr -V2 JSON identifier for a data type is just the NumPy ``str`` attribute of that data type: - -.. code-block:: python - - >>> import zarr - >>> import numpy as np - >>> import json - >>> - >>> store = {} - >>> np_dtype = np.dtype('int64') - >>> np_dtype.str - '>> z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) - >>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] - >>> dtype_meta - '`_, - or "byte order," of the data type. As per the NumPy model, - in Zarr version 2 each data type has an endianness where applicable. - However, Zarr version 3 data types do not store endianness information. - -There are two special cases to consider: `"structured" data types <#structured-data-type>`_, and -`"object" <#object-data-type>`_ data types. - -Structured Data Type -^^^^^^^^^^^^^^^^^^^^ - -NumPy allows the construction of a so-called "structured" data types comprised of ordered collections -of named fields, where each field is itself a distinct NumPy data type. See the NumPy documentation -`here `_. - -Crucially, NumPy does not use a special data type for structured data types—instead, NumPy -implements structured data types as an optional feature of the so-called "Void" data type, which models -arbitrary fixed-size byte strings. The ``str`` attribute of a regular NumPy void -data type is the same as the ``str`` of a NumPy structured data type. This means that the ``str`` -attribute does not convey information about the fields contained in a structured data type. -For these reasons, Zarr V2 uses a special data type encoding for structured data types. -They are stored in JSON as lists of pairs, where the first element is a string, and the second -element is a Zarr V2 data type specification. This representation supports recursion. - -For example: - -.. code-block:: python - - >>> store = {} - >>> np_dtype = np.dtype([('field_a', '>i2'), ('field_b', [('subfield_c', '>f4'), ('subfield_d', 'i2')])]) - >>> np_dtype.str - '|V8' - >>> z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) - >>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] - >>> dtype_meta - [['field_a', '>i2'], ['field_b', [['subfield_c', '>f4'], ['subfield_d', '`_. - -Data Types in Zarr Python -------------------------- - -The two Zarr formats that Zarr Python supports specify data types in different ways: data types in -Zarr version 2 are encoded as NumPy-compatible strings (or lists, in the case of structured data -types), while data types in Zarr V3 are encoded as either strings or JSON objects. Zarr V3 data -types do not have any associated endianness information, unlike Zarr V2 data types. - -Zarr Python needs to support both Zarr V2 and V3, which means we need to abstract over these differences. -We do this with an abstract Zarr data type class: `ZDType <../api/zarr/dtype/index.html#zarr.dtype.ZDType>`_, -which provides Zarr V2 and Zarr V3 compatibility routines for "native" data types. - -In this context, a "native" data type is a Python class, typically defined in another library, that -models an array's data type. For example, ``np.dtypes.UInt8DType`` is a native data type defined in NumPy. -Zarr Python wraps the NumPy ``uint8`` with a ``ZDType`` instance called -`UInt8 <../api/zarr/dtype/index.html#zarr.dtype.ZDType>`_. - -As of this writing, the only native data types Zarr Python supports are NumPy data types. We could -avoid the "native data type" jargon and just say "NumPy data type," but we do not want to rule out the -possibility of using non-NumPy array backends in the future. - -Each data type supported by Zarr Python is modeled by a ``ZDType`` subclass, which provides an -API for the following operations: - -- Encoding and decoding a native data type -- Encoding and decoding a data type to and from Zarr V2 and Zarr V3 array metadata -- Encoding and decoding a scalar value to and from Zarr V2 and Zarr V3 array metadata -- Casting a Python object to a scalar value consistent with the data type - -List of data types -^^^^^^^^^^^^^^^^^^ - -The following section lists the data types built in to Zarr Python. With a few exceptions, Zarr -Python supports nearly all of the data types in NumPy. If you need a data type that is not listed -here, it's possible to create it yourself: see :ref:`adding-new-data-types`. - -Boolean -""""""" -- `Boolean <../api/zarr/dtype/index.html#zarr.dtype.Bool>`_ - -Integral -"""""""" -- `Signed 8-bit integer <../api/zarr/dtype/index.html#zarr.dtype.Int8>`_ -- `Signed 16-bit integer <../api/zarr/dtype/index.html#zarr.dtype.Int16>`_ -- `Signed 32-bit integer <../api/zarr/dtype/index.html#zarr.dtype.Int32>`_ -- `Signed 64-bit integer <../api/zarr/dtype/index.html#zarr.dtype.Int64>`_ -- `Unsigned 8-bit integer <../api/zarr/dtype/index.html#zarr.dtype.UInt8>`_ -- `Unsigned 16-bit integer <../api/zarr/dtype/index.html#zarr.dtype.UInt16>`_ -- `Unsigned 32-bit integer <../api/zarr/dtype/index.html#zarr.dtype.UInt32>`_ -- `Unsigned 64-bit integer <../api/zarr/dtype/index.html#zarr.dtype.UInt64>`_ - -Floating-point -"""""""""""""" -- `16-bit floating-point <../api/zarr/dtype/index.html#zarr.dtype.Float16>`_ -- `32-bit floating-point <../api/zarr/dtype/index.html#zarr.dtype.Float32>`_ -- `64-bit floating-point <../api/zarr/dtype/index.html#zarr.dtype.Float64>`_ -- `64-bit complex floating-point <../api/zarr/dtype/index.html#zarr.dtype.Complex64>`_ -- `128-bit complex floating-point <../api/zarr/dtype/index.html#zarr.dtype.Complex128>`_ - -String -"""""" -- `Fixed-length UTF-32 string <../api/zarr/dtype/index.html#zarr.dtype.FixedLengthUTF32>`_ -- `Variable-length UTF-8 string <../api/zarr/dtype/index.html#zarr.dtype.VariableLengthUTF8>`_ - -Bytes -""""" -- `Fixed-length null-terminated bytes <../api/zarr/dtype/index.html#zarr.dtype.NullTerminatedBytes>`_ -- `Fixed-length raw bytes <../api/zarr/dtype/index.html#zarr.dtype.RawBytes>`_ -- `Variable-length bytes <../api/zarr/dtype/index.html#zarr.dtype.VariableLengthBytes>`_ - -Temporal -"""""""" -- `DateTime64 <../api/zarr/dtype/index.html#zarr.dtype.DateTime64>`_ -- `TimeDelta64 <../api/zarr/dtype/index.html#zarr.dtype.TimeDelta64>`_ - -Struct-like -""""""""""" -- `Structured <../api/zarr/dtype/index.html#zarr.dtype.Structured>`_ - -Example Usage -^^^^^^^^^^^^^ - -This section will demonstrates the basic usage of Zarr data types. - -Create a ``ZDType`` from a native data type: - -.. code-block:: python - - >>> from zarr.core.dtype import Int8 - >>> import numpy as np - >>> int8 = Int8.from_native_dtype(np.dtype('int8')) - -Convert back to a native data type: - -.. code-block:: python - - >>> native_dtype = int8.to_native_dtype() - >>> assert native_dtype == np.dtype('int8') - -Get the default scalar value for the data type: - -.. code-block:: python - - >>> default_value = int8.default_scalar() - >>> assert default_value == np.int8(0) - -Serialize to JSON for Zarr V2: - -.. code-block:: python - - >>> json_v2 = int8.to_json(zarr_format=2) - >>> json_v2 - {'name': '|i1', 'object_codec_id': None} - -.. note:: - - The representation returned by ``to_json(zarr_format=2)`` is more abstract than the literal contents - of Zarr V2 array metadata, because the JSON representation used by the ``ZDType`` classes must be - distinct across different data types. As noted `earlier <#object-data-type>`_, Zarr V2 identifies - multiple distinct data types with the "object" data type identifier ``"|O"``. Extra information - is needed to disambiguate these data types from one another. That's the reason for the - ``object_codec_id`` field you see here. - -And for V3: - -.. code-block:: python - - >>> json_v3 = int8.to_json(zarr_format=3) - >>> json_v3 - 'int8' - -Serialize a scalar value to JSON: - -.. code-block:: python - - >>> json_value = int8.to_json_scalar(42, zarr_format=3) - >>> json_value - 42 - -Deserialize a scalar value from JSON: - -.. code-block:: python - - >>> scalar_value = int8.from_json_scalar(42, zarr_format=3) - >>> assert scalar_value == np.int8(42) - -.. _adding-new-data-types: - -Adding New Data Types -^^^^^^^^^^^^^^^^^^^^^ - -Each Zarr data type is a separate Python class that inherits from -`ZDType <../api/zarr/dtype/index.html#zarr.dtype.ZDType>`_. You can define a custom data type by -writing your own subclass of `ZDType <../api/zarr/dtype/index.html#zarr.dtype.ZDType>`_ and adding -your data type to the data type registry. A complete example of this process is included below. - -The source code for this example can be found in the ``examples/custom_dtype.py`` file in the Zarr -Python project directory. - -.. literalinclude:: ../../examples/custom_dtype.py - :language: python - -Data Type Resolution -^^^^^^^^^^^^^^^^^^^^ - -Although Zarr Python uses a different data type model from NumPy, you can still define a Zarr array -with a NumPy data type object: - -.. code-block:: python - - >>> from zarr import create_array - >>> import numpy as np - >>> a = create_array({}, shape=(10,), dtype=np.dtype('int')) - >>> a - - -Or a string representation of a NumPy data type: - -.. code-block:: python - - >>> a = create_array({}, shape=(10,), dtype='>> a - - -The ``Array`` object presents itself like a NumPy array, including exposing a NumPy -data type as its ``dtype`` attribute: - -.. code-block:: python - - >>> type(a.dtype) - - -But if we inspect the metadata for the array, we can see the Zarr data type object: - -.. code-block:: python - - >>> type(a.metadata.data_type) - - -This example illustrates a general problem Zarr Python has to solve: how can we allow users to -specify a data type as a string or a NumPy ``dtype`` object, and produce the right Zarr data type -from that input? We call this process "data type resolution." Zarr Python also performs data type -resolution when reading stored arrays, although in this case the input is a JSON value instead -of a NumPy data type. - -For simple data types like ``int``, the solution could be extremely simple: just -maintain a lookup table that maps a NumPy data type to the Zarr data type equivalent. But not all -data types are so simple. Consider this case: - -.. code-block:: python - - >>> from zarr import create_array - >>> import warnings - >>> import numpy as np - >>> warnings.simplefilter("ignore", category=FutureWarning) - >>> a = create_array({}, shape=(10,), dtype=[('a', 'f8'), ('b', 'i8')]) - >>> a.dtype # this is the NumPy data type - dtype([('a', '>> a.metadata.data_type # this is the Zarr data type - Structured(fields=(('a', Float64(endianness='little')), ('b', Int64(endianness='little')))) - -In this example, we created a -`NumPy structured data type `_. -This data type is a container that can hold any NumPy data type, which makes it recursive. It is -not possible to make a lookup table that relates all NumPy structured data types to their Zarr -equivalents, as there is a nearly unbounded number of different structured data types. So instead of -a static lookup table, Zarr Python relies on a dynamic approach to data type resolution. - -Zarr Python defines a collection of Zarr data types. This collection, called a "data type registry," -is essentially a dictionary where the keys are strings (a canonical name for each data type), and the -values are the data type classes themselves. Dynamic data type resolution entails iterating over -these data type classes, invoking that class' `from_native_dtype <#api/dtype/ZDType.from_native_dtype>`_ -method, and returning a concrete data type instance if and only if exactly one of those constructor -invocations is successful. - -In plain language, we take some user input, like a NumPy data type, offer it to all the -known data type classes, and return an instance of the one data type class that can accept that user input. - -We want to avoid a situation where the same native data type matches multiple Zarr data types; that is, -a NumPy data type should *uniquely* specify a single Zarr data type. But data type resolution is -dynamic, so it's not possible to statically guarantee this uniqueness constraint. Therefore, we -attempt data type resolution against *every* data type class, and if, for some reason, a native data -type matches multiple Zarr data types, we treat this as an error and raise an exception. - -If you have a NumPy data type and you want to get the corresponding ``ZDType`` instance, you can use -the ``parse_dtype`` function, which will use the dynamic resolution described above. ``parse_dtype`` -handles a range of input types: - -- NumPy data types: - - .. code-block:: python - - >>> import numpy as np - >>> from zarr.dtype import parse_dtype - >>> my_dtype = np.dtype('>M8[10s]') - >>> parse_dtype(my_dtype, zarr_format=2) - DateTime64(endianness='big', scale_factor=10, unit='s') - - -- NumPy data type-compatible strings: - - .. code-block:: python - - >>> dtype_str = '>M8[10s]' - >>> parse_dtype(dtype_str, zarr_format=2) - DateTime64(endianness='big', scale_factor=10, unit='s') - -- ``ZDType`` instances: - - .. code-block:: python - - >>> from zarr.dtype import DateTime64 - >>> zdt = DateTime64(endianness='big', scale_factor=10, unit='s') - >>> parse_dtype(zdt, zarr_format=2) # Use a ZDType (this is a no-op) - DateTime64(endianness='big', scale_factor=10, unit='s') - -- Python dictionaries (requires ``zarr_format=3``). These dictionaries must be consistent with the - ``JSON`` form of the data type: - - .. code-block:: python - - >>> dt_dict = {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}} - >>> parse_dtype(dt_dict, zarr_format=3) - DateTime64(endianness='little', scale_factor=10, unit='s') - >>> parse_dtype(dt_dict, zarr_format=3).to_json(zarr_format=3) - {'name': 'numpy.datetime64', 'configuration': {'unit': 's', 'scale_factor': 10}} diff --git a/docs/user-guide/extending.rst b/docs/user-guide/extending.md similarity index 50% rename from docs/user-guide/extending.rst rename to docs/user-guide/extending.md index 4487e07ddf..d857fa3356 100644 --- a/docs/user-guide/extending.rst +++ b/docs/user-guide/extending.md @@ -1,20 +1,17 @@ - -Extending Zarr -============== +# Extending Zarr Zarr-Python 3 was designed to be extensible. This means that you can extend the library by writing custom classes and plugins. Currently, Zarr can be extended in the following ways: -Custom codecs -------------- +## Custom codecs -.. note:: +!!! note This section explains how custom codecs can be created for Zarr format 3 arrays. For Zarr format 2, codecs should subclass the - `numcodecs.abc.Codec `_ + [numcodecs.abc.Codec](https://numcodecs.readthedocs.io/en/stable/abc.html#numcodecs.abc.Codec) base class and register through - `numcodecs.registry.register_codec `_. + [numcodecs.registry.register_codec](https://numcodecs.readthedocs.io/en/stable/registry.html#numcodecs.registry.register_codec). There are three types of codecs in Zarr: - array-to-array @@ -24,71 +21,68 @@ There are three types of codecs in Zarr: Array-to-array codecs are used to transform the array data before serializing to bytes. Examples include delta encoding or scaling codecs. Array-to-bytes codecs are used for serializing the array data to bytes. In Zarr, the main codec to use for numeric arrays -is the :class:`zarr.codecs.BytesCodec`. Bytes-to-bytes codecs transform the serialized bytestreams +is the [`zarr.codecs.BytesCodec`][]. Bytes-to-bytes codecs transform the serialized bytestreams of the array data. Examples include compression codecs, such as -:class:`zarr.codecs.GzipCodec`, :class:`zarr.codecs.BloscCodec` or -:class:`zarr.codecs.ZstdCodec`, and codecs that add a checksum to the bytestream, such as -:class:`zarr.codecs.Crc32cCodec`. +[`zarr.codecs.GzipCodec`][], [`zarr.codecs.BloscCodec`][] or +[`zarr.codecs.ZstdCodec`][], and codecs that add a checksum to the bytestream, such as +[`zarr.codecs.Crc32cCodec`][]. Custom codecs for Zarr are implemented by subclassing the relevant base class, see -:class:`zarr.abc.codec.ArrayArrayCodec`, :class:`zarr.abc.codec.ArrayBytesCodec` and -:class:`zarr.abc.codec.BytesBytesCodec`. Most custom codecs should implemented the -``_encode_single`` and ``_decode_single`` methods. These methods operate on single chunks -of the array data. Alternatively, custom codecs can implement the ``encode`` and ``decode`` +[`zarr.abc.codec.ArrayArrayCodec`][], [`zarr.abc.codec.ArrayBytesCodec`][] and +[`zarr.abc.codec.BytesBytesCodec`][]. Most custom codecs should implemented the +`_encode_single` and `_decode_single` methods. These methods operate on single chunks +of the array data. Alternatively, custom codecs can implement the `encode` and `decode` methods, which operate on batches of chunks, in case the codec is intended to implement its own batch processing. Custom codecs should also implement the following methods: -- ``compute_encoded_size``, which returns the byte size of the encoded data given the byte - size of the original data. It should raise ``NotImplementedError`` for codecs with +- `compute_encoded_size`, which returns the byte size of the encoded data given the byte + size of the original data. It should raise `NotImplementedError` for codecs with variable-sized outputs, such as compression codecs. -- ``validate`` (optional), which can be used to check that the codec metadata is compatible with the +- `validate` (optional), which can be used to check that the codec metadata is compatible with the array metadata. It should raise errors if not. -- ``resolve_metadata`` (optional), which is important for codecs that change the shape, +- `resolve_metadata` (optional), which is important for codecs that change the shape, dtype or fill value of a chunk. -- ``evolve_from_array_spec`` (optional), which can be useful for automatically filling in +- `evolve_from_array_spec` (optional), which can be useful for automatically filling in codec configuration metadata from the array metadata. To use custom codecs in Zarr, they need to be registered using the -`entrypoint mechanism `_. -Commonly, entrypoints are declared in the ``pyproject.toml`` of your package under the -``[project.entry-points."zarr.codecs"]`` section. Zarr will automatically discover and +[entrypoint mechanism](https://packaging.python.org/en/latest/specifications/entry-points/). +Commonly, entrypoints are declared in the `pyproject.toml` of your package under the +`[project.entry-points."zarr.codecs"]` section. Zarr will automatically discover and load all codecs registered with the entrypoint mechanism from imported modules. -.. code-block:: toml - - [project.entry-points."zarr.codecs"] - "custompackage.fancy_codec" = "custompackage:FancyCodec" +```toml +[project.entry-points."zarr.codecs"] +"custompackage.fancy_codec" = "custompackage:FancyCodec" +``` New codecs need to have their own unique identifier. To avoid naming collisions, it is strongly recommended to prefix the codec identifier with a unique name. For example, -the codecs from ``numcodecs`` are prefixed with ``numcodecs.``, e.g. ``numcodecs.delta``. +the codecs from `numcodecs` are prefixed with `numcodecs.`, e.g. `numcodecs.delta`. -.. note:: +!!! note Note that the extension mechanism for the Zarr format 3 is still under development. Requirements for custom codecs including the choice of codec identifiers might change in the future. It is also possible to register codecs as replacements for existing codecs. This might be useful for providing specialized implementations, such as GPU-based codecs. In case of -multiple codecs, the :mod:`zarr.core.config` mechanism can be used to select the preferred +multiple codecs, the [`zarr.config`][] mechanism can be used to select the preferred implementation. -Custom stores -------------- +## Custom stores Coming soon. -Custom array buffers --------------------- +## Custom array buffers Zarr-python provides control over where and how arrays stored in memory through -:mod:`zarr.buffer`. Currently both CPU (the default) and GPU implementations are -provided (see :ref:`user-guide-gpu` for more). You can implement your own buffer -classes by implementing the interface defined in :mod:`zarr.abc.buffer`. +[`zarr.abc.buffer.Buffer`][]. Currently both CPU (the default) and GPU implementations are +provided (see [Using GPUs with Zarr](gpu.md) for more information). You can implement your own buffer +classes by implementing the interface defined in [`zarr.abc.buffer.BufferPrototype`][]. -Other extensions ----------------- +## Other extensions In the future, Zarr will support writing custom custom data types and chunk grids. diff --git a/docs/user-guide/gpu.md b/docs/user-guide/gpu.md new file mode 100644 index 0000000000..3317bdf065 --- /dev/null +++ b/docs/user-guide/gpu.md @@ -0,0 +1,31 @@ +# Using GPUs with Zarr + +Zarr can use GPUs to accelerate your workload by running `zarr.Config.enable_gpu`. + +!!! note + `zarr-python` currently supports reading the ndarray data into device (GPU) + memory as the final stage of the codec pipeline. Data will still be read into + or copied to host (CPU) memory for encoding and decoding. + + In the future, codecs will be available compressing and decompressing data on + the GPU, avoiding the need to move data between the host and device for + compression and decompression. + +## Reading data into device memory + +[`zarr.config`][] configures Zarr to use GPU memory for the data +buffers used internally by Zarr via `enable_gpu()`. + +```python +import zarr +import cupy as cp +zarr.config.enable_gpu() +store = zarr.storage.MemoryStore() +z = zarr.create_array( + store=store, shape=(100, 100), chunks=(10, 10), dtype="float32", +) +type(z[:10, :10]) +# cupy.ndarray +``` + +Note that the output type is a `cupy.ndarray` rather than a NumPy array. diff --git a/docs/user-guide/gpu.rst b/docs/user-guide/gpu.rst deleted file mode 100644 index 4d3492f8bd..0000000000 --- a/docs/user-guide/gpu.rst +++ /dev/null @@ -1,37 +0,0 @@ -.. _user-guide-gpu: - -Using GPUs with Zarr -==================== - -Zarr can use GPUs to accelerate your workload by running -:meth:`zarr.config.enable_gpu`. - -.. note:: - - `zarr-python` currently supports reading the ndarray data into device (GPU) - memory as the final stage of the codec pipeline. Data will still be read into - or copied to host (CPU) memory for encoding and decoding. - - In the future, codecs will be available compressing and decompressing data on - the GPU, avoiding the need to move data between the host and device for - compression and decompression. - -Reading data into device memory -------------------------------- - -:meth:`zarr.config.enable_gpu` configures Zarr to use GPU memory for the data -buffers used internally by Zarr. - -.. code-block:: python - - >>> import zarr - >>> import cupy as cp # doctest: +SKIP - >>> zarr.config.enable_gpu() # doctest: +SKIP - >>> store = zarr.storage.MemoryStore() # doctest: +SKIP - >>> z = zarr.create_array( # doctest: +SKIP - ... store=store, shape=(100, 100), chunks=(10, 10), dtype="float32", - ... ) - >>> type(z[:10, :10]) # doctest: +SKIP - cupy.ndarray - -Note that the output type is a ``cupy.ndarray`` rather than a NumPy array. diff --git a/docs/user-guide/groups.md b/docs/user-guide/groups.md new file mode 100644 index 0000000000..8a3f9ff7bf --- /dev/null +++ b/docs/user-guide/groups.md @@ -0,0 +1,137 @@ +# Working with groups + +Zarr supports hierarchical organization of arrays via groups. As with arrays, +groups can be stored in memory, on disk, or via other storage systems that +support a similar interface. + +To create a group, use the [`zarr.group`][] function: + +```python exec="true" session="groups" source="above" result="ansi" +import zarr +store = zarr.storage.MemoryStore() +root = zarr.create_group(store=store) +print(root) +``` + +Groups have a similar API to the Group class from [h5py](https://www.h5py.org/). For example, groups can contain other groups: + +```python exec="true" session="groups" source="above" +foo = root.create_group('foo') +bar = foo.create_group('bar') +``` + +Groups can also contain arrays, e.g.: + +```python exec="true" session="groups" source="above" result="ansi" +z1 = bar.create_array(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +print(z1) +``` + +Members of a group can be accessed via the suffix notation, e.g.: + +```python exec="true" session="groups" source="above" result="ansi" +print(root['foo']) +``` + +The '/' character can be used to access multiple levels of the hierarchy in one +call, e.g.: + +```python exec="true" session="groups" source="above" result="ansi" +print(root['foo/bar']) +``` + +```python exec="true" session="groups" source="above" result="ansi" +print(root['foo/bar/baz']) +``` + +The [`zarr.Group.tree`][] method can be used to print a tree +representation of the hierarchy, e.g.: + +```python exec="true" session="groups" source="above" result="ansi" +print(root.tree()) +``` + +The [`zarr.open_group`][] function provides a convenient way to create or +re-open a group stored in a directory on the file-system, with sub-groups stored in +sub-directories, e.g.: + +```python exec="true" session="groups" source="above" result="ansi" +root = zarr.open_group('data/group.zarr', mode='w') +print(root) +``` + +```python exec="true" session="groups" source="above" result="ansi" +z = root.create_array(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +print(z) +``` + +For more information on groups see the [`zarr.Group` API docs](../api/group.md). + +## Batch Group Creation + +You can also create multiple groups concurrently with a single function call. [`zarr.create_hierarchy`][] takes +a [`zarr Storage instance`](../api/storage.md) instance and a dict of `key : metadata` pairs, parses that dict, and +writes metadata documents to storage: + +```python exec="true" session="groups" source="above" result="ansi" +from zarr import create_hierarchy +from zarr.core.group import GroupMetadata +from zarr.storage import LocalStore + +from pprint import pprint +import io + +node_spec = {'a/b/c': GroupMetadata()} +nodes_created = dict(create_hierarchy(store=LocalStore(root='data'), nodes=node_spec)) +# Report nodes (pprint is used for cleaner rendering in the docs) +output = io.StringIO() +pprint(nodes_created, stream=output, width=60) +print(output.getvalue()) +``` + +Note that we only specified a single group named `a/b/c`, but 4 groups were created. These additional groups +were created to ensure that the desired node `a/b/c` is connected to the root group `''` by a sequence +of intermediate groups. [`zarr.create_hierarchy`][] normalizes the `nodes` keyword argument to +ensure that the resulting hierarchy is complete, i.e. all groups or arrays are connected to the root +of the hierarchy via intermediate groups. + +Because [`zarr.create_hierarchy`][] concurrently creates metadata documents, it's more efficient +than repeated calls to [`create_group`][zarr.create_group] or [`create_array`][zarr.create_array], provided you can statically define +the metadata for the groups and arrays you want to create. + +## Array and group diagnostics + +Diagnostic information about arrays and groups is available via the `info` +property. E.g.: + +```python exec="true" session="groups" source="above" result="ansi" +store = zarr.storage.MemoryStore() +root = zarr.group(store=store) +foo = root.create_group('foo') +bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64') +bar[:] = 42 +baz = foo.create_array(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='float32') +baz[:] = 4.2 +print(root.info) +``` + +```python exec="true" session="groups" source="above" result="ansi" +print(foo.info) +``` + +```python exec="true" session="groups" source="above" result="ansi" +print(bar.info_complete()) +``` + +```python exec="true" session="groups" source="above" result="ansi" +print(baz.info) +``` + +Groups also have the [`zarr.Group.tree`][] method, e.g.: + +```python exec="true" session="groups" source="above" result="ansi" +print(root.tree()) +``` + +!!! note + [`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra. \ No newline at end of file diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst deleted file mode 100644 index a343c3617e..0000000000 --- a/docs/user-guide/groups.rst +++ /dev/null @@ -1,172 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - -.. _user-guide-groups: - -Working with groups -=================== - -Zarr supports hierarchical organization of arrays via groups. As with arrays, -groups can be stored in memory, on disk, or via other storage systems that -support a similar interface. - -To create a group, use the :func:`zarr.group` function:: - - >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> root = zarr.create_group(store=store) - >>> root - - -Groups have a similar API to the Group class from `h5py -`_. For example, groups can contain other groups:: - - >>> foo = root.create_group('foo') - >>> bar = foo.create_group('bar') - -Groups can also contain arrays, e.g.:: - - >>> z1 = bar.create_array(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - >>> z1 - - -Members of a group can be accessed via the suffix notation, e.g.:: - - >>> root['foo'] - - -The '/' character can be used to access multiple levels of the hierarchy in one -call, e.g.:: - - >>> root['foo/bar'] - - >>> root['foo/bar/baz'] - - -The :func:`zarr.Group.tree` method can be used to print a tree -representation of the hierarchy, e.g.:: - - >>> root.tree() - / - └── foo - └── bar - └── baz (10000, 10000) int32 - - -The :func:`zarr.open_group` function provides a convenient way to create or -re-open a group stored in a directory on the file-system, with sub-groups stored in -sub-directories, e.g.:: - - >>> root = zarr.open_group('data/group.zarr', mode='w') - >>> root - - >>> - >>> z = root.create_array(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - >>> z - - -.. TODO: uncomment after __enter__ and __exit__ are implemented -.. Groups can be used as context managers (in a ``with`` statement). -.. If the underlying store has a ``close`` method, it will be called on exit. - -For more information on groups see the :class:`zarr.Group` API docs. - -.. _user-guide-diagnostics: - -Batch Group Creation --------------------- - -You can also create multiple groups concurrently with a single function call. :func:`zarr.create_hierarchy` takes -a :class:`zarr.storage.Store` instance and a dict of ``key : metadata`` pairs, parses that dict, and -writes metadata documents to storage: - - >>> from zarr import create_hierarchy - >>> from zarr.core.group import GroupMetadata - >>> from zarr.storage import LocalStore - >>> node_spec = {'a/b/c': GroupMetadata()} - >>> nodes_created = dict(create_hierarchy(store=LocalStore(root='data'), nodes=node_spec)) - >>> print(sorted(nodes_created.items(), key=lambda kv: len(kv[0]))) - [('', ), ('a', ), ('a/b', ), ('a/b/c', )] - -Note that we only specified a single group named ``a/b/c``, but 4 groups were created. These additional groups -were created to ensure that the desired node ``a/b/c`` is connected to the root group ``''`` by a sequence -of intermediate groups. :func:`zarr.create_hierarchy` normalizes the ``nodes`` keyword argument to -ensure that the resulting hierarchy is complete, i.e. all groups or arrays are connected to the root -of the hierarchy via intermediate groups. - -Because :func:`zarr.create_hierarchy` concurrently creates metadata documents, it's more efficient -than repeated calls to :func:`create_group` or :func:`create_array`, provided you can statically define -the metadata for the groups and arrays you want to create. - -Array and group diagnostics ---------------------------- - -Diagnostic information about arrays and groups is available via the ``info`` -property. E.g.:: - - >>> store = zarr.storage.MemoryStore() - >>> root = zarr.group(store=store) - >>> foo = root.create_group('foo') - >>> bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64') - >>> bar[:] = 42 - >>> baz = foo.create_array(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='float32') - >>> baz[:] = 4.2 - >>> root.info - Name : - Type : Group - Zarr format : 3 - Read-only : False - Store type : MemoryStore - >>> foo.info - Name : foo - Type : Group - Zarr format : 3 - Read-only : False - Store type : MemoryStore - >>> bar.info_complete() - Type : Array - Zarr format : 3 - Data type : Int64(endianness='little') - Fill value : 0 - Shape : (1000000,) - Chunk shape : (100000,) - Order : C - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 8000000 (7.6M) - No. bytes stored : 1614 (1.6K) - Storage ratio : 4956.6 - Chunks Initialized : 10 - >>> baz.info - Type : Array - Zarr format : 3 - Data type : Float32(endianness='little') - Fill value : 0.0 - Shape : (1000, 1000) - Chunk shape : (100, 100) - Order : C - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 4000000 (3.8M) - -Groups also have the :func:`zarr.Group.tree` method, e.g.:: - - >>> root.tree() - / - └── foo - ├── bar (1000000,) int64 - └── baz (1000, 1000) float32 - - -.. note:: - - :func:`zarr.Group.tree` requires the optional `rich `_ - dependency. It can be installed with the ``[tree]`` extra. diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md new file mode 100644 index 0000000000..14808457bd --- /dev/null +++ b/docs/user-guide/index.md @@ -0,0 +1,41 @@ +# User Guide + +Welcome to the user guide, where you can learn more about using Zarr-Python! + +## Getting Started + +New to Zarr-Python? Start here: + +- **[Installation](installation.md)** - Install Zarr-Python +- **[Quick-start](../quick-start.md)** - Quick overview of core functionality + +## Core Concepts + +Learn the essential building blocks: + +- **[Arrays](arrays.md)** - Learn the fundamentals of working with arrays +- **[Groups](groups.md)** - Organize your data with groups +- **[Attributes](attributes.md)** - Configure metadata to your data structures +- **[Storage](storage.md)** - Learn how data is stored and accessed + +## Configuration & Setup + +Customize your experience: + +- **[Runtime Configuration](config.md)** - Configure Zarr-Python for your needs +- **[V3 Migration](v3_migration.md)** - Upgrading from version 2 to version 3 + +## Advanced Topics + +Take your skills to the next level: + +- **[Data Types](data_types.md)** - Learn about supported and extensible data types +- **[Performance](performance.md)** - Optimize for speed and efficiency +- **[GPU](gpu.md)** - Leverage GPU acceleration +- **[Extending](extending.md)** - Extend functionality with custom code +- **[Consolidated Metadata](consolidated_metadata.md)** - Advanced metadata management + +## Need Help? + +- Browse the [API Reference](../api/index.md) for detailed function documentation +- Report issues on [GitHub](https://github.com/zarr-developers/zarr-python/issues?q=sort%3Aupdated-desc+is%3Aissue+is%3Aopen) diff --git a/docs/user-guide/index.rst b/docs/user-guide/index.rst deleted file mode 100644 index a83a30172b..0000000000 --- a/docs/user-guide/index.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. _user-guide: - -User guide -========== - -.. toctree:: - :maxdepth: 1 - - installation - arrays - groups - attributes - storage - config - v3_migration - cli - -Advanced Topics ---------------- - -.. toctree:: - :maxdepth: 1 - - data_types - performance - consolidated_metadata - extending - gpu - - -.. Coming soon - async diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md new file mode 100644 index 0000000000..f672a348cd --- /dev/null +++ b/docs/user-guide/installation.md @@ -0,0 +1,59 @@ +# Installation + +## Required dependencies + +Required dependencies include: + +- [Python](https://docs.python.org/3/) (3.11 or later) +- [packaging](https://packaging.pypa.io) (22.0 or later) +- [numpy](https://numpy.org) (1.26 or later) +- [numcodecs[crc32c]](https://numcodecs.readthedocs.io) (0.14 or later) +- [typing_extensions](https://typing-extensions.readthedocs.io) (4.9 or later) +- [donfig](https://donfig.readthedocs.io) (0.8 or later) + +## pip + +Zarr is available on [PyPI](https://pypi.org/project/zarr/). Install it using `pip`: + +```console +pip install zarr +``` + +There are a number of optional dependency groups you can install for extra functionality. +These can be installed using `pip install "zarr[]"`, e.g. `pip install "zarr[gpu]"` + +- `gpu`: support for GPUs +- `remote`: support for reading/writing to remote data stores + +Additional optional dependencies include `rich`, `universal_pathlib`. These must be installed separately. + +## conda + +Zarr is also published to [conda-forge](https://conda-forge.org). Install it using `conda`: + +```console +conda install -c conda-forge zarr +``` + +Conda does not support optional dependencies, so you will have to manually install any packages +needed to enable extra functionality. + +# Nightly wheels + +Development wheels are built nightly and published to the [scientific-python-nightly-wheels](https://anaconda.org/scientific-python-nightly-wheels) index. To install the latest nightly build: + +```console +pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple zarr +``` + +Note that nightly wheels may be unstable and are intended for testing purposes. +## Dependency support + +Zarr has endorsed [Scientific-Python SPEC 0](https://scientific-python.org/specs/spec-0000/) and now follows the version support window as outlined below: + +- Python: 36 months after initial release +- Core package dependencies (e.g. NumPy): 24 months after initial release + +## Development + +To install the latest development version of Zarr, see the contributing guide. diff --git a/docs/user-guide/installation.rst b/docs/user-guide/installation.rst deleted file mode 100644 index 67705d3be6..0000000000 --- a/docs/user-guide/installation.rst +++ /dev/null @@ -1,67 +0,0 @@ -Installation -============ - -Required dependencies ---------------------- - -Required dependencies include: - -- `Python `_ (3.11 or later) -- `packaging `_ (22.0 or later) -- `numpy `_ (1.26 or later) -- `numcodecs[crc32c] `_ (0.14 or later) -- `typing_extensions `_ (4.9 or later) -- `donfig `_ (0.8 or later) - -pip ---- - -Zarr is available on `PyPI `_. Install it using ``pip``: - -.. code-block:: console - - $ pip install zarr - -There are a number of optional dependency groups you can install for extra functionality. -These can be installed using ``pip install "zarr[]"``, e.g. ``pip install "zarr[gpu]"`` - -- ``gpu``: support for GPUs -- ``remote``: support for reading/writing to remote data stores - -Additional optional dependencies include ``rich``, ``universal_pathlib``. These must be installed separately. - -conda ------ - -Zarr is also published to `conda-forge `_. Install it using ``conda``: - -.. code-block:: console - - $ conda install -c conda-forge zarr - -Conda does not support optional dependencies, so you will have to manually install any packages -needed to enable extra functionality. - -Nightly wheels --------------- - -Development wheels are built nightly and published to the `scientific-python-nightly-wheels `_ index. To install the latest nightly build: - -.. code-block:: console - - $ pip install --pre \ - --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ - zarr - -Note that nightly wheels may be unstable and are intended for testing purposes. - -Dependency support ------------------- -Zarr has endorsed `Scientific-Python SPEC 0 `_ and now follows the version support window as outlined below: - -- Python: 36 months after initial release -- Core package dependencies (e.g. NumPy): 24 months after initial release - -Development ------------ -To install the latest development version of Zarr, see the :ref:`contributing guide `. diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md new file mode 100644 index 0000000000..ad101319f1 --- /dev/null +++ b/docs/user-guide/performance.md @@ -0,0 +1,206 @@ +# Optimizing performance + +## Chunk optimizations + +### Chunk size and shape + +In general, chunks of at least 1 megabyte (1M) uncompressed size seem to provide +better performance, at least when using the Blosc compression library. + +The optimal chunk shape will depend on how you want to access the data. E.g., +for a 2-dimensional array, if you only ever take slices along the first +dimension, then chunk across the second dimension. If you know you want to chunk +across an entire dimension you can use the full size of that dimension within the +`chunks` argument, e.g.: + +```python exec="true" session="performance" source="above" result="ansi" +import zarr +z1 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(100, 10000), dtype='int32') +print(z1.chunks) +``` + +Alternatively, if you only ever take slices along the second dimension, then +chunk across the first dimension, e.g.: + +```python exec="true" session="performance" source="above" result="ansi" +z2 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 100), dtype='int32') +print(z2.chunks) +``` + +If you require reasonable performance for both access patterns then you need to +find a compromise, e.g.: + +```python exec="true" session="performance" source="above" result="ansi" +z3 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +print(z3.chunks) +``` + +If you are feeling lazy, you can let Zarr guess a chunk shape for your data by +providing `chunks='auto'`, although please note that the algorithm for guessing +a chunk shape is based on simple heuristics and may be far from optimal. E.g.: + +```python exec="true" session="performance" source="above" result="ansi" +z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32') +print(z4.chunks) +``` + +If you know you are always going to be loading the entire array into memory, you +can turn off chunks by providing `chunks` equal to `shape`, in which case there +will be one single chunk for the array: + +```python exec="true" session="performance" source="above" result="ansi" +z5 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 10000), dtype='int32') +print(z5.chunks) +``` + +### Sharding + +If you have large arrays but need small chunks to efficiently access the data, you can +use sharding. Sharding provides a mechanism to store multiple chunks in a single +storage object or file. This can be useful because traditional file systems and object +storage systems may have performance issues storing and accessing many files. +Additionally, small files can be inefficient to store if they are smaller than the +block size of the file system. + +Picking a good combination of chunk shape and shard shape is important for performance. +The chunk shape determines what unit of your data can be read independently, while the +shard shape determines what unit of your data can be written efficiently. + +For an example, consider you have a 100 GB array and need to read small chunks of 1 MB. +Without sharding, each chunk would be one file resulting in 100,000 files. That can +already cause performance issues on some file systems. +With sharding, you could use a shard size of 1 GB. This would result in 1000 chunks per +file and 100 files in total, which seems manageable for most storage systems. +You would still be able to read each 1 MB chunk independently, but you would need to +write your data in 1 GB increments. + +To use sharding, you need to specify the `shards` parameter when creating the array. + +```python exec="true" session="performance" source="above" result="ansi" +z6 = zarr.create_array(store={}, shape=(10000, 10000, 1000), shards=(1000, 1000, 1000), chunks=(100, 100, 100), dtype='uint8') +print(z6.info) +``` + +### Chunk memory layout + +The order of bytes **within each chunk** of an array can be changed via the +`order` config option, to use either C or Fortran layout. For +multi-dimensional arrays, these two layouts may provide different compression +ratios, depending on the correlation structure within the data. E.g.: + +```python exec="true" session="performance" source="above" result="ansi" +import numpy as np + +a = np.arange(100000000, dtype='int32').reshape(10000, 10000).T +c = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype, config={'order': 'C'}) +c[:] = a +print(c.info_complete()) +``` + +```python exec="true" session="performance" source="above" result="ansi" +with zarr.config.set({'array.order': 'F'}): + f = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype) + f[:] = a +print(f.info_complete()) + +``` + +In the above example, Fortran order gives a better compression ratio. This is an +artificial example but illustrates the general point that changing the order of +bytes within chunks of an array may improve the compression ratio, depending on +the structure of the data, the compression algorithm used, and which compression +filters (e.g., byte-shuffle) have been applied. + +### Empty chunks + +It is possible to configure how Zarr handles the storage of chunks that are "empty" +(i.e., every element in the chunk is equal to the array's fill value). When creating +an array with `write_empty_chunks=False`, Zarr will check whether a chunk is empty before compression and storage. If a chunk is empty, +then Zarr does not store it, and instead deletes the chunk from storage +if the chunk had been previously stored. + +This optimization prevents storing redundant objects and can speed up reads, but the cost is +added computation during array writes, since the contents of +each chunk must be compared to the fill value, and these advantages are contingent on the content of the array. +If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above. +In this case, creating an array with `write_empty_chunks=True` (the default) will instruct Zarr to write every chunk without checking for emptiness. + +The following example illustrates the effect of the `write_empty_chunks` flag on +the time required to write an array with different values.: + +```python exec="true" session="performance" source="above" result="ansi" +import zarr +import numpy as np +import time + +def timed_write(write_empty_chunks): + """ + Measure the time required and number of objects created when writing + to a Zarr array with random ints or fill value. + """ + chunks = (8192,) + shape = (chunks[0] * 1024,) + data = np.random.randint(0, 255, shape) + dtype = 'uint8' + arr = zarr.create_array( + f'data/example-{write_empty_chunks}.zarr', + shape=shape, + chunks=chunks, + dtype=dtype, + fill_value=0, + config={'write_empty_chunks': write_empty_chunks} + ) + # initialize all chunks + arr[:] = 100 + result = [] + for value in (data, arr.fill_value): + start = time.time() + arr[:] = value + elapsed = time.time() - start + result.append((elapsed, arr.nchunks_initialized)) + return result + +# log results +for write_empty_chunks in (True, False): + full, empty = timed_write(write_empty_chunks) + print(f'\nwrite_empty_chunks={write_empty_chunks}:\n\tRandom Data: {full[0]:.4f}s, {full[1]} objects stored\n\t Empty Data: {empty[0]:.4f}s, {empty[1]} objects stored\n') +``` + +In this example, writing random data is slightly slower with `write_empty_chunks=True`, +but writing empty data is substantially faster and generates far fewer objects in storage. + +### Changing chunk shapes (rechunking) + +Coming soon. + +## Parallel computing and synchronization + +Coming soon. + +## Pickle support + +Zarr arrays and groups can be pickled, as long as the underlying store object can be +pickled. With the exception of the `zarr.storage.MemoryStore`, any of the +storage classes provided in the `zarr.storage` module can be pickled. + +If an array or group is backed by a persistent store such as the a `zarr.storage.LocalStore`, +`zarr.storage.ZipStore` or `zarr.storage.FsspecStore` then the store data +**are not** pickled. The only thing that is pickled is the necessary parameters to allow the store +to re-open any underlying files or databases upon being unpickled. + +E.g., pickle/unpickle an local store array: + +```python exec="true" session="performance" source="above" result="ansi" +import pickle +data = np.arange(100000) +z1 = zarr.create_array(store='data/perf-example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype) +z1[:] = data +s = pickle.dumps(z1) +z2 = pickle.loads(s) +assert z1 == z2 +print(np.all(z1[:] == z2[:])) +``` + +## Configuring Blosc + +Coming soon. diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst deleted file mode 100644 index 0f31e5d7be..0000000000 --- a/docs/user-guide/performance.rst +++ /dev/null @@ -1,278 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - -.. _user-guide-performance: - -Optimizing performance -====================== - -.. _user-guide-chunks: - -Chunk optimizations -------------------- - -.. _user-guide-chunks-shape: - -Chunk size and shape -~~~~~~~~~~~~~~~~~~~~ - -In general, chunks of at least 1 megabyte (1M) uncompressed size seem to provide -better performance, at least when using the Blosc compression library. - -The optimal chunk shape will depend on how you want to access the data. E.g., -for a 2-dimensional array, if you only ever take slices along the first -dimension, then chunk across the second dimension. If you know you want to chunk -across an entire dimension you can use the full size of that dimension within the -``chunks`` argument, e.g.:: - - >>> import zarr - >>> z1 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(100, 10000), dtype='int32') - >>> z1.chunks - (100, 10000) - -Alternatively, if you only ever take slices along the second dimension, then -chunk across the first dimension, e.g.:: - - >>> z2 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 100), dtype='int32') - >>> z2.chunks - (10000, 100) - -If you require reasonable performance for both access patterns then you need to -find a compromise, e.g.:: - - >>> z3 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - >>> z3.chunks - (1000, 1000) - -If you are feeling lazy, you can let Zarr guess a chunk shape for your data by -providing ``chunks='auto'``, although please note that the algorithm for guessing -a chunk shape is based on simple heuristics and may be far from optimal. E.g.:: - - >>> z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32') - >>> z4.chunks - (625, 625) - -If you know you are always going to be loading the entire array into memory, you -can turn off chunks by providing ``chunks`` equal to ``shape``, in which case there -will be one single chunk for the array:: - - >>> z5 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 10000), dtype='int32') - >>> z5.chunks - (10000, 10000) - - -Sharding -~~~~~~~~ - -If you have large arrays but need small chunks to efficiently access the data, you can -use sharding. Sharding provides a mechanism to store multiple chunks in a single -storage object or file. This can be useful because traditional file systems and object -storage systems may have performance issues storing and accessing many files. -Additionally, small files can be inefficient to store if they are smaller than the -block size of the file system. - -Picking a good combination of chunk shape and shard shape is important for performance. -The chunk shape determines what unit of your data can be read independently, while the -shard shape determines what unit of your data can be written efficiently. - -For an example, consider you have a 100 GB array and need to read small chunks of 1 MB. -Without sharding, each chunk would be one file resulting in 100,000 files. That can -already cause performance issues on some file systems. -With sharding, you could use a shard size of 1 GB. This would result in 1000 chunks per -file and 100 files in total, which seems manageable for most storage systems. -You would still be able to read each 1 MB chunk independently, but you would need to -write your data in 1 GB increments. - -To use sharding, you need to specify the ``shards`` parameter when creating the array. - - >>> z6 = zarr.create_array(store={}, shape=(10000, 10000, 1000), shards=(1000, 1000, 1000), chunks=(100, 100, 100), dtype='uint8') - >>> z6.info - Type : Array - Zarr format : 3 - Data type : UInt8() - Fill value : 0 - Shape : (10000, 10000, 1000) - Shard shape : (1000, 1000, 1000) - Chunk shape : (100, 100, 100) - Order : C - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=None) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 100000000000 (93.1G) - -.. _user-guide-chunks-order: - -Chunk memory layout -~~~~~~~~~~~~~~~~~~~ - -The order of bytes **within each chunk** of an array can be changed via the -``order`` config option, to use either C or Fortran layout. For -multi-dimensional arrays, these two layouts may provide different compression -ratios, depending on the correlation structure within the data. E.g.:: - - >>> import numpy as np - >>> - >>> a = np.arange(100000000, dtype='int32').reshape(10000, 10000).T - >>> c = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype, config={'order': 'C'}) - >>> c[:] = a - >>> c.info_complete() - Type : Array - Zarr format : 3 - Data type : Int32(endianness='little') - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : C - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 400000000 (381.5M) - No. bytes stored : 342588911 (326.7M) - Storage ratio : 1.2 - Chunks Initialized : 100 - >>> with zarr.config.set({'array.order': 'F'}): - ... f = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype) - ... f[:] = a - >>> f.info_complete() - Type : Array - Zarr format : 3 - Data type : Int32(endianness='little') - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : F - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 400000000 (381.5M) - No. bytes stored : 342588911 (326.7M) - Storage ratio : 1.2 - Chunks Initialized : 100 - -In the above example, Fortran order gives a better compression ratio. This is an -artificial example but illustrates the general point that changing the order of -bytes within chunks of an array may improve the compression ratio, depending on -the structure of the data, the compression algorithm used, and which compression -filters (e.g., byte-shuffle) have been applied. - -.. _user-guide-chunks-empty-chunks: - -Empty chunks -~~~~~~~~~~~~ - -It is possible to configure how Zarr handles the storage of chunks that are "empty" -(i.e., every element in the chunk is equal to the array's fill value). When creating -an array with ``write_empty_chunks=False``, Zarr will check whether a chunk is empty before compression and storage. If a chunk is empty, -then Zarr does not store it, and instead deletes the chunk from storage -if the chunk had been previously stored. - -This optimization prevents storing redundant objects and can speed up reads, but the cost is -added computation during array writes, since the contents of -each chunk must be compared to the fill value, and these advantages are contingent on the content of the array. -If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above. -In this case, creating an array with ``write_empty_chunks=True`` (the default) will instruct Zarr to write every chunk without checking for emptiness. - -The following example illustrates the effect of the ``write_empty_chunks`` flag on -the time required to write an array with different values.:: - - >>> import zarr - >>> import numpy as np - >>> import time - >>> - >>> def timed_write(write_empty_chunks): - ... """ - ... Measure the time required and number of objects created when writing - ... to a Zarr array with random ints or fill value. - ... """ - ... chunks = (8192,) - ... shape = (chunks[0] * 1024,) - ... data = np.random.randint(0, 255, shape) - ... dtype = 'uint8' - ... arr = zarr.create_array( - ... f'data/example-{write_empty_chunks}.zarr', - ... shape=shape, - ... chunks=chunks, - ... dtype=dtype, - ... fill_value=0, - ... config={'write_empty_chunks': write_empty_chunks} - ... ) - ... # initialize all chunks - ... arr[:] = 100 - ... result = [] - ... for value in (data, arr.fill_value): - ... start = time.time() - ... arr[:] = value - ... elapsed = time.time() - start - ... result.append((elapsed, arr.nchunks_initialized)) - ... return result - ... # log results - >>> for write_empty_chunks in (True, False): - ... full, empty = timed_write(write_empty_chunks) - ... print(f'\nwrite_empty_chunks={write_empty_chunks}:\n\tRandom Data: {full[0]:.4f}s, {full[1]} objects stored\n\t Empty Data: {empty[0]:.4f}s, {empty[1]} objects stored\n') - write_empty_chunks=True: - Random Data: ..., 1024 objects stored - Empty Data: ...s, 1024 objects stored - - write_empty_chunks=False: - Random Data: ...s, 1024 objects stored - Empty Data: ...s, 0 objects stored - - -In this example, writing random data is slightly slower with ``write_empty_chunks=True``, -but writing empty data is substantially faster and generates far fewer objects in storage. - -.. _user-guide-rechunking: - -Changing chunk shapes (rechunking) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Coming soon. - -.. _user-guide-sync: - -Parallel computing and synchronization --------------------------------------- - -Coming soon. - -.. _user-guide-pickle: - -Pickle support --------------- - -Zarr arrays and groups can be pickled, as long as the underlying store object can be -pickled. With the exception of the :class:`zarr.storage.MemoryStore`, any of the -storage classes provided in the :mod:`zarr.storage` module can be pickled. - -If an array or group is backed by a persistent store such as the a :class:`zarr.storage.LocalStore`, -:class:`zarr.storage.ZipStore` or :class:`zarr.storage.FsspecStore` then the store data -**are not** pickled. The only thing that is pickled is the necessary parameters to allow the store -to re-open any underlying files or databases upon being unpickled. - -E.g., pickle/unpickle an local store array:: - - >>> import pickle - >>> data = np.arange(100000) - >>> z1 = zarr.create_array(store='data/example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype) - >>> z1[:] = data - >>> s = pickle.dumps(z1) - >>> z2 = pickle.loads(s) - >>> z1 == z2 - True - >>> np.all(z1[:] == z2[:]) - np.True_ - -.. _user-guide-tips-blosc: - -Configuring Blosc ------------------ - -Coming soon. diff --git a/docs/user-guide/storage.md b/docs/user-guide/storage.md new file mode 100644 index 0000000000..86dac188a5 --- /dev/null +++ b/docs/user-guide/storage.md @@ -0,0 +1,149 @@ +# Storage guide + +Zarr-Python supports multiple storage backends, including: local file systems, +Zip files, remote stores via [fsspec](https://filesystem-spec.readthedocs.io) (S3, HTTP, etc.), and in-memory stores. In +Zarr-Python 3, stores must implement the abstract store API from +[`zarr.abc.store.Store`][]. + +!!! note + Unlike Zarr-Python 2 where the store interface was built around a generic `MutableMapping` + API, Zarr-Python 3 utilizes a custom store API that utilizes Python's AsyncIO library. + +## Implicit Store Creation + +In most cases, it is not required to create a `Store` object explicitly. Passing a string +to Zarr's top level API will result in the store being created automatically: + +```python exec="true" session="storage" source="above" result="ansi" +import zarr + +# Implicitly create a writable LocalStore +group = zarr.create_group(store='data/foo/bar') +print(group) +``` + +```python exec="true" session="storage" source="above" result="ansi" +# Implicitly create a read-only FsspecStore +group = zarr.open_group( + store='s3://noaa-nwm-retro-v2-zarr-pds', + mode='r', + storage_options={'anon': True} +) +print(group) +``` + +```python exec="true" session="storage" source="above" result="ansi" +# Implicitly creates a MemoryStore +data = {} +group = zarr.create_group(store=data) +print(group) +``` + +## Explicit Store Creation + +In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four +built-in store: [`zarr.storage.LocalStore`][], [`zarr.storage.FsspecStore`][], +[`zarr.storage.ZipStore`][], [`zarr.storage.MemoryStore`][], and [`zarr.storage.ObjectStore`][]. + +### Local Store + +The [`zarr.storage.LocalStore`][] stores data in a nested set of directories on a local +filesystem: + +```python exec="true" session="storage" source="above" result="ansi" +store = zarr.storage.LocalStore('data/foo/bar', read_only=True) +group = zarr.open_group(store=store, mode='r') +print(group) +``` + +### Zip Store + +The [`zarr.storage.ZipStore`][] stores the contents of a Zarr hierarchy in a single +Zip file. The [Zip Store specification](https://github.com/zarr-developers/zarr-specs/pull/311) is currently in draft form: + +```python exec="true" session="storage" source="above" result="ansi" +store = zarr.storage.ZipStore('data.zip', mode='w') +array = zarr.create_array(store=store, shape=(2,), dtype='float64') +print(array) +``` + +### Remote Store + +The [`zarr.storage.FsspecStore`][] stores the contents of a Zarr hierarchy in following the same +logical layout as the [`LocalStore`][zarr.storage.LocalStore], except the store is assumed to be on a remote storage system +such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The +[`zarr.storage.FsspecStore`][] is backed by [fsspec](https://filesystem-spec.readthedocs.io) and can support any backend +that implements the [AbstractFileSystem](https://filesystem-spec.readthedocs.io/en/stable/api.html#fsspec.spec.AbstractFileSystem) +API. `storage_options` can be used to configure the fsspec backend: + +```python exec="true" session="storage" source="above" result="ansi" +store = zarr.storage.FsspecStore.from_url( + 's3://noaa-nwm-retro-v2-zarr-pds', + read_only=True, + storage_options={'anon': True} +) +group = zarr.open_group(store=store, mode='r') +print(group) +``` + +The type of filesystem (e.g. S3, https, etc..) is inferred from the scheme of the url (e.g. s3 for "**s3**://noaa-nwm-retro-v2-zarr-pds"). +In case a specific filesystem is needed, one can explicitly create it. For example to create a S3 filesystem: + +```python exec="true" session="storage" source="above" result="ansi" +import fsspec +fs = fsspec.filesystem( + 's3', anon=True, asynchronous=True, + client_kwargs={'endpoint_url': "https://noaa-nwm-retro-v2-zarr-pds.s3.amazonaws.com"} +) +store = zarr.storage.FsspecStore(fs) +print(store) +``` + + +### Memory Store + +The [`zarr.storage.MemoryStore`][] a in-memory store that allows for serialization of +Zarr data (metadata and chunks) to a dictionary: + +```python exec="true" session="storage" source="above" result="ansi" +data = {} +store = zarr.storage.MemoryStore(data) +array = zarr.create_array(store=store, shape=(2,), dtype='float64') +print(array) +``` + +### Object Store + +[`zarr.storage.ObjectStore`][] stores the contents of the Zarr hierarchy using any ObjectStore +[storage implementation](https://developmentseed.org/obstore/latest/api/store/), including AWS S3 ([`obstore.store.S3Store`][]), Google Cloud Storage ([`obstore.store.GCSStore`][]), and Azure Blob Storage ([`obstore.store.AzureStore`][]). This store is backed by [obstore](https://developmentseed.org/obstore/latest/), which +builds on the production quality Rust library [object_store](https://docs.rs/object_store/latest/object_store/). + +```python exec="true" session="storage" source="above" result="ansi" +from zarr.storage import ObjectStore +from obstore.store import MemoryStore + +store = ObjectStore(MemoryStore()) +array = zarr.create_array(store=store, shape=(2,), dtype='float64') +print(array) +``` + +Here's an example of using ObjectStore for accessing remote data: + +```python exec="true" session="storage" source="above" result="ansi" +from zarr.storage import ObjectStore +from obstore.store import S3Store + +s3_store = S3Store('noaa-nwm-retro-v2-zarr-pds', skip_signature=True, region="us-west-2") +store = zarr.storage.ObjectStore(store=s3_store, read_only=True) +group = zarr.open_group(store=store, mode='r') +print(group.info) +``` + +!!! warning + The [`zarr.storage.ObjectStore`][] class is experimental. + +## Developing custom stores + +Zarr-Python [`zarr.abc.store.Store`][] API is meant to be extended. The Store Abstract Base +Class includes all of the methods needed to be a fully operational store in Zarr Python. +Zarr also provides a test harness for custom stores: [`zarr.testing.store.StoreTests`][]. diff --git a/docs/user-guide/storage.rst b/docs/user-guide/storage.rst deleted file mode 100644 index e5a333872e..0000000000 --- a/docs/user-guide/storage.rst +++ /dev/null @@ -1,158 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - -.. _user-guide-storage: - -Storage guide -============= - -Zarr-Python supports multiple storage backends, including: local file systems, -Zip files, remote stores via fsspec_ (S3, HTTP, etc.), and in-memory stores. In -Zarr-Python 3, stores must implement the abstract store API from -:class:`zarr.abc.store.Store`. - -.. note:: - Unlike Zarr-Python 2 where the store interface was built around a generic ``MutableMapping`` - API, Zarr-Python 3 utilizes a custom store API that utilizes Python's AsyncIO library. - -Implicit Store Creation ------------------------ - -In most cases, it is not required to create a ``Store`` object explicitly. Passing a string -to Zarr's top level API will result in the store being created automatically.: - - >>> import zarr - >>> - >>> # Implicitly create a writable LocalStore - >>> zarr.create_group(store='data/foo/bar') - - >>> - >>> # Implicitly create a read-only FsspecStore - >>> zarr.open_group( - ... store='s3://noaa-nwm-retro-v2-zarr-pds', - ... mode='r', - ... storage_options={'anon': True} - ... ) - > - >>> - >>> # Implicitly creates a MemoryStore - >>> data = {} - >>> zarr.create_group(store=data) - - -Explicit Store Creation ------------------------ - -In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four -built-in store: :class:`zarr.storage.LocalStore`, :class:`zarr.storage.FsspecStore`, -:class:`zarr.storage.ZipStore`, :class:`zarr.storage.MemoryStore`, and :class:`zarr.storage.ObjectStore`. - -Local Store -~~~~~~~~~~~ - -The :class:`zarr.storage.LocalStore` stores data in a nested set of directories on a local -filesystem.: - - >>> store = zarr.storage.LocalStore('data/foo/bar', read_only=True) - >>> zarr.open_group(store=store, mode='r') - - -Zip Store -~~~~~~~~~ - -The :class:`zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single -Zip file. The `Zip Store specification`_ is currently in draft form.: - - >>> store = zarr.storage.ZipStore('data.zip', mode='w') - >>> zarr.create_array(store=store, shape=(2,), dtype='float64') - - -Remote Store -~~~~~~~~~~~~ - -The :class:`zarr.storage.FsspecStore` stores the contents of a Zarr hierarchy in following the same -logical layout as the ``LocalStore``, except the store is assumed to be on a remote storage system -such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The -:class:`zarr.storage.FsspecStore` is backed by `fsspec`_ and can support any backend -that implements the `AbstractFileSystem `_ -API. ``storage_options`` can be used to configure the fsspec backend.: - - >>> store = zarr.storage.FsspecStore.from_url( - ... 's3://noaa-nwm-retro-v2-zarr-pds', - ... read_only=True, - ... storage_options={'anon': True} - ... ) - >>> zarr.open_group(store=store, mode='r') - > - -The type of filesystem (e.g. S3, https, etc..) is inferred from the scheme of the url (e.g. s3 for "**s3**://noaa-nwm-retro-v2-zarr-pds"). -In case a specific filesystem is needed, one can explicitly create it. For example to create a S3 filesystem: - - >>> import fsspec - >>> fs = fsspec.filesystem( - ... 's3', anon=True, asynchronous=True, - ... client_kwargs={'endpoint_url': "https://noaa-nwm-retro-v2-zarr-pds.s3.amazonaws.com"} - ... ) - >>> store = zarr.storage.FsspecStore(fs) - -Memory Store -~~~~~~~~~~~~ - -The :class:`zarr.storage.MemoryStore` a in-memory store that allows for serialization of -Zarr data (metadata and chunks) to a dictionary.: - - >>> data = {} - >>> store = zarr.storage.MemoryStore(data) - >>> # TODO: replace with create_array after #2463 - >>> zarr.create_array(store=store, shape=(2,), dtype='float64') - - -Object Store -~~~~~~~~~~~~ - -:class:`zarr.storage.ObjectStore` stores the contents of the Zarr hierarchy using any ObjectStore -`storage implementation `_, including AWS S3 (:class:`obstore.store.S3Store`), Google Cloud Storage (:class:`obstore.store.GCSStore`), and Azure Blob Storage (:class:`obstore.store.AzureStore`). This store is backed by `obstore `_, which -builds on the production quality Rust library `object_store `_. - - - >>> from zarr.storage import ObjectStore - >>> from obstore.store import MemoryStore - >>> - >>> store = ObjectStore(MemoryStore()) - >>> zarr.create_array(store=store, shape=(2,), dtype='float64') - - -Here's an example of using ObjectStore for accessing remote data: - - >>> from zarr.storage import ObjectStore - >>> from obstore.store import S3Store - >>> - >>> s3_store = S3Store('noaa-nwm-retro-v2-zarr-pds', skip_signature=True, region="us-west-2") - >>> store = zarr.storage.ObjectStore(store=s3_store, read_only=True) - >>> group = zarr.open_group(store=store, mode='r') - >>> group.info - Name : - Type : Group - Zarr format : 2 - Read-only : True - Store type : ObjectStore - No. members : 12 - No. arrays : 12 - No. groups : 0 - -.. warning:: - The :class:`zarr.storage.ObjectStore` class is experimental. - -.. _user-guide-custom-stores: - -Developing custom stores ------------------------- - -Zarr-Python :class:`zarr.abc.store.Store` API is meant to be extended. The Store Abstract Base -Class includes all of the methods needed to be a fully operational store in Zarr Python. -Zarr also provides a test harness for custom stores: :class:`zarr.testing.store.StoreTests`. - -.. _Zip Store Specification: https://github.com/zarr-developers/zarr-specs/pull/311 -.. _fsspec: https://filesystem-spec.readthedocs.io diff --git a/docs/user-guide/v3_migration.md b/docs/user-guide/v3_migration.md new file mode 100644 index 0000000000..c3a7ddbafe --- /dev/null +++ b/docs/user-guide/v3_migration.md @@ -0,0 +1,228 @@ +# 3.0 Migration Guide + +Zarr-Python 3 represents a major refactor of the Zarr-Python codebase. Some of the +goals motivating this refactor included: + +* adding support for the Zarr format 3 specification (along with the Zarr format 2 specification) +* cleaning up internal and user facing APIs +* improving performance (particularly in high latency storage environments like + cloud object stores) + +To accommodate this, Zarr-Python 3 introduces a number of changes to the API, including a number +of significant breaking changes and deprecations. + +This page provides a guide explaining breaking changes and deprecations to help you +migrate your code from version 2 to version 3. If we have missed anything, please +open a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) +so we can improve this guide. + +## Compatibility target + +The goals described above necessitated some breaking changes to the API (hence the +major version update), but where possible we have maintained backwards compatibility +in the most widely used parts of the API. This in the [`zarr.Array`][] and +[`zarr.Group`][] classes and the "top-level API" (e.g. [`zarr.open_array`][] and +[`zarr.open_group`][]). + +## Getting ready for 3.0 + +Before migrating to Zarr-Python 3, we suggest projects that depend on Zarr-Python take +the following actions in order: + +1. Pin the supported Zarr-Python version to `zarr>=2,<3`. This is a best practice + and will protect your users from any incompatibilities that may arise during the + release of Zarr-Python 3. This pin can be removed after migrating to Zarr-Python 3. +2. Limit your imports from the Zarr-Python package. Most of the primary API `zarr.*` + will be compatible in Zarr-Python 3. However, the following breaking API changes are + planned: + + - `numcodecs.*` will no longer be available in `zarr.*`. To migrate, import codecs + directly from `numcodecs`: + + ```python + from numcodecs import Blosc + # instead of: + # from zarr import Blosc + ``` + + - The `zarr.v3_api_available` feature flag is being removed. In Zarr-Python 3 + the v3 API is always available, so you shouldn't need to use this flag. + - The following internal modules are being removed or significantly changed. If + your application relies on imports from any of the below modules, you will need + to either a) modify your application to no longer rely on these imports or b) + vendor the parts of the specific modules that you need. + + * `zarr.attrs` has gone, with no replacement + * `zarr.codecs` has changed, see "Codecs" section below for more information + * `zarr.context` has gone, with no replacement + * `zarr.core` remains but should be considered private API + * `zarr.hierarchy` has gone, with no replacement (use `zarr.Group` inplace of `zarr.hierarchy.Group`) + * `zarr.indexing` has gone, with no replacement + * `zarr.meta` has gone, with no replacement + * `zarr.meta_v1` has gone, with no replacement + * `zarr.sync` has gone, with no replacement + * `zarr.types` has gone, with no replacement + * `zarr.util` has gone, with no replacement + * `zarr.n5` has gone, see below for an alternative N5 options + +3. Test that your package works with version 3. +4. Update the pin to include `zarr>=3,<4`. + +## Zarr-Python 2 support window + +Zarr-Python 2.x is still available, though we recommend migrating to Zarr-Python 3 for +its performance improvements and new features. Security and bug fixes will be made to +the 2.x series for at least six months following the first Zarr-Python 3 release. +If you need to use the latest Zarr-Python 2 release, you can install it with: + +```console +$ pip install "zarr==2.*" +``` + +!!! note + Development and maintenance of the 2.x release series has moved to the + [support/v2](https://github.com/zarr-developers/zarr-python/tree/support/v2) branch. + Issues and pull requests related to this branch are tagged with the + [V2](https://github.com/zarr-developers/zarr-python/labels/V2) label. + +## Migrating to Zarr-Python 3 + +The following sections provide details on breaking changes in Zarr-Python 3. + +### The Array class + +1. Disallow direct construction - the signature for initializing the `Array` class has changed + significantly. Please use [`zarr.create_array`][] or [`zarr.open_array`][] instead of + directly constructing the [`zarr.Array`][] class. + +2. Defaulting to `zarr_format=3` - newly created arrays will use the version 3 of the + Zarr specification. To continue using version 2, set `zarr_format=2` when creating arrays + or set `default_zarr_version=2` in Zarr's runtime configuration. + +### The Group class + +1. Disallow direct construction - use [`zarr.open_group`][] or [`zarr.create_group`][] + instead of directly constructing the `zarr.Group` class. +2. Most of the h5py compatibility methods are deprecated and will issue warnings if used. + The following functions are drop in replacements that have the same signature and functionality: + + - Use [`zarr.Group.create_array`][] in place of `zarr.Group.create_dataset` + - Use [`zarr.Group.require_array`][] in place of `zarr.Group.require_dataset` +3. Disallow "." syntax for getting group members. To get a member of a group named `foo`, + use `group["foo"]` in place of `group.foo`. + +### The Store class + +The Store API has changed significant in Zarr-Python 3. The most notable changes to the +Store API are: + +#### Store Import Paths + +Several store implementations have moved from the top-level module to `zarr.storage`: + +```diff title="Store import changes from v2 to v3" +# Before (v2) +- from zarr import MemoryStore, DirectoryStore ++ from zarr.storage import MemoryStore, LocalStore # LocalStore replaces DirectoryStore +``` + +Common replacements: + +| v2 Import | v3 Import | +|-------------------------|------------------------------------| +| `zarr.MemoryStore` | [`zarr.storage.MemoryStore`][] | +| `zarr.DirectoryStore` | [`zarr.storage.LocalStore`][] | +| `zarr.TempStore` | Use [`tempfile.TemporaryDirectory`][] with [`LocalStore`][zarr.storage.LocalStore] | + +1. Replaced the `MutableMapping` base class in favor of a custom abstract base class + ([`zarr.abc.store.Store`][]). +2. Switched to an asynchronous interface for all store methods that result in IO. This + change ensures that all store methods are non-blocking and are as performant as + possible. + +Beyond the changes store interface, a number of deprecated stores were also removed in +Zarr-Python 3. See issue #1274 for more details on the removal of these stores. + +- `N5Store` - see https://github.com/zarr-developers/n5py for an alternative interface to + N5 formatted data. +- `ABSStore` - use the [`zarr.storage.FsspecStore`][] instead along with fsspec's + [adlfs backend](https://github.com/fsspec/adlfs). + +The following stores have been removed altogether. Users who need these stores will have to +implement their own version in zarr-python v3. + +- `DBMStore` +- `LMDBStore` +- `SQLiteStore` +- `MongoDBStore` +- `RedisStore` + +At present, the latter five stores in this list do not have an equivalent in Zarr-Python 3. +If you are interested in developing a custom store that targets these backends, see +[developing custom stores](storage.md/#developing-custom-stores) or open an +[issue](https://github.com/zarr-developers/zarr-python/issues) to discuss your use case. + +### Codecs + +Codecs defined in ``numcodecs`` (and also imported into the ``zarr.codecs`` namespace in Zarr-Python 2) +should still be used when creating Zarr format 2 arrays. + +Codecs for creating Zarr format 3 arrays are available in two locations: + +- `zarr.codecs` contains Zarr format 3 codecs that are defined in the [codecs section of the Zarr format 3 specification](https://zarr-specs.readthedocs.io/en/latest/v3/codecs/index.html). +- `numcodecs.zarr3` contains codecs from `numcodecs` that can be used to create Zarr format 3 arrays, but are not necessarily part of the Zarr format 3 specification. + +### Dependencies + +When installing using `pip`: + +- The new `remote` dependency group can be used to install a supported version of + `fsspec`, required for remote data access. +- The new `gpu` dependency group can be used to install a supported version of + `cuda`, required for GPU functionality. +- The `jupyter` optional dependency group has been removed, since v3 contains no + jupyter specific functionality. + +### Miscellaneous + +- The keyword argument `zarr_version` available in most creation functions in `zarr` + (e.g. [`zarr.create`][], [`zarr.open`][], [`zarr.group`][], [`zarr.array`][]) has + been deprecated in favor of `zarr_format`. + +## 🚧 Work in Progress 🚧 + +Zarr-Python 3 is still under active development, and is not yet fully complete. +The following list summarizes areas of the codebase that we expect to build out +after the 3.0.0 release. If features listed below are important to your use case +of Zarr-Python, please open (or comment on) a +[GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new). + +- The following functions / methods have not been ported to Zarr-Python 3 yet: + + * `zarr.copy` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) + * `zarr.copy_all` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) + * `zarr.copy_store` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) + * `zarr.Group.move` ([issue #2108](https://github.com/zarr-developers/zarr-python/issues/2108)) + +- The following features (corresponding to function arguments to functions in + `zarr`) have not been ported to Zarr-Python 3 yet. Using these features + will raise a warning or a `NotImplementedError`: + + * `cache_attrs` + * `cache_metadata` + * `chunk_store` ([issue #2495](https://github.com/zarr-developers/zarr-python/issues/2495)) + * `meta_array` + * `object_codec` ([issue #2617](https://github.com/zarr-developers/zarr-python/issues/2617)) + * `synchronizer` ([issue #1596](https://github.com/zarr-developers/zarr-python/issues/1596)) + * `dimension_separator` + +- The following features that were supported by Zarr-Python 2 have not been ported + to Zarr-Python 3 yet: + + * Structured arrays / dtypes ([issue #2134](https://github.com/zarr-developers/zarr-python/issues/2134)) + * Fixed-length string dtypes ([issue #2347](https://github.com/zarr-developers/zarr-python/issues/2347)) + * Datetime and timedelta dtypes ([issue #2616](https://github.com/zarr-developers/zarr-python/issues/2616)) + * Object dtypes ([issue #2616](https://github.com/zarr-developers/zarr-python/issues/2616)) + * Ragged arrays ([issue #2618](https://github.com/zarr-developers/zarr-python/issues/2618)) + * Groups and Arrays do not implement `__enter__` and `__exit__` protocols ([issue #2619](https://github.com/zarr-developers/zarr-python/issues/2619)) + * Default filters for object dtypes for Zarr format 2 arrays ([issue #2627](https://github.com/zarr-developers/zarr-python/issues/2627)) diff --git a/docs/user-guide/v3_migration.rst b/docs/user-guide/v3_migration.rst deleted file mode 100644 index 2b53e39b83..0000000000 --- a/docs/user-guide/v3_migration.rst +++ /dev/null @@ -1,250 +0,0 @@ -.. _v3 migration guide: - -3.0 Migration Guide -=================== - -Zarr-Python 3 represents a major refactor of the Zarr-Python codebase. Some of the -goals motivating this refactor included: - -* adding support for the Zarr format 3 specification (along with the Zarr format 2 specification) -* cleaning up internal and user facing APIs -* improving performance (particularly in high latency storage environments like - cloud object stores) - -To accommodate this, Zarr-Python 3 introduces a number of changes to the API, including a number -of significant breaking changes and deprecations. - -This page provides a guide explaining breaking changes and deprecations to help you -migrate your code from version 2 to version 3. If we have missed anything, please -open a `GitHub issue `_ -so we can improve this guide. - -Compatibility target --------------------- - -The goals described above necessitated some breaking changes to the API (hence the -major version update), but where possible we have maintained backwards compatibility -in the most widely used parts of the API. This in the :class:`zarr.Array` and -:class:`zarr.Group` classes and the "top-level API" (e.g. :func:`zarr.open_array` and -:func:`zarr.open_group`). - -Getting ready for 3.0 ---------------------- - -Before migrating to Zarr-Python 3, we suggest projects that depend on Zarr-Python take -the following actions in order: - -1. Pin the supported Zarr-Python version to ``zarr>=2,<3``. This is a best practice - and will protect your users from any incompatibilities that may arise during the - release of Zarr-Python 3. This pin can be removed after migrating to Zarr-Python 3. -2. Limit your imports from the Zarr-Python package. Most of the primary API ``zarr.*`` - will be compatible in Zarr-Python 3. However, the following breaking API changes are - planned: - - - ``numcodecs.*`` will no longer be available in ``zarr.*``. To migrate, import codecs - directly from ``numcodecs``: - - .. code-block:: python - - from numcodecs import Blosc - # instead of: - # from zarr import Blosc - - - The ``zarr.v3_api_available`` feature flag is being removed. In Zarr-Python 3 - the v3 API is always available, so you shouldn't need to use this flag. - - The following internal modules are being removed or significantly changed. If - your application relies on imports from any of the below modules, you will need - to either a) modify your application to no longer rely on these imports or b) - vendor the parts of the specific modules that you need. - - * ``zarr.attrs`` has gone, with no replacement - * ``zarr.codecs`` has changed, see "Codecs" section below for more information - * ``zarr.context`` has gone, with no replacement - * ``zarr.core`` remains but should be considered private API - * ``zarr.hierarchy`` has gone, with no replacement (use ``zarr.Group`` inplace of ``zarr.hierarchy.Group``) - * ``zarr.indexing`` has gone, with no replacement - * ``zarr.meta`` has gone, with no replacement - * ``zarr.meta_v1`` has gone, with no replacement - * ``zarr.sync`` has gone, with no replacement - * ``zarr.types`` has gone, with no replacement - * ``zarr.util`` has gone, with no replacement - * ``zarr.n5`` has gone, see below for an alternative N5 options - -3. Test that your package works with version 3. -4. Update the pin to include ``zarr>=3,<4``. - -Zarr-Python 2 support window ----------------------------- - -Zarr-Python 2.x is still available, though we recommend migrating to Zarr-Python 3 for -its performance improvements and new features. Security and bug fixes will be made to -the 2.x series for at least six months following the first Zarr-Python 3 release. -If you need to use the latest Zarr-Python 2 release, you can install it with: - -.. code-block:: console - - $ pip install "zarr==2.*" - -.. note:: - Development and maintenance of the 2.x release series has moved to the - `support/v2 `_ branch. - Issues and pull requests related to this branch are tagged with the - `V2 `_ label. - -Migrating to Zarr-Python 3 --------------------------- - -The following sections provide details on breaking changes in Zarr-Python 3. - -The Array class -~~~~~~~~~~~~~~~ - -1. Disallow direct construction - the signature for initializing the ``Array`` class has changed - significantly. Please use :func:`zarr.create_array` or :func:`zarr.open_array` instead of - directly constructing the :class:`zarr.Array` class. - -2. Defaulting to ``zarr_format=3`` - newly created arrays will use the version 3 of the - Zarr specification. To continue using version 2, set ``zarr_format=2`` when creating arrays - or set ``default_zarr_version=2`` in Zarr's :ref:`runtime configuration `. - -The Group class -~~~~~~~~~~~~~~~ - -1. Disallow direct construction - use :func:`zarr.open_group` or :func:`zarr.create_group` - instead of directly constructing the :class:`zarr.Group` class. -2. Most of the h5py compatibility methods are deprecated and will issue warnings if used. - The following functions are drop in replacements that have the same signature and functionality: - - - Use :func:`zarr.Group.create_array` in place of :func:`zarr.Group.create_dataset` - - Use :func:`zarr.Group.require_array` in place of :func:`zarr.Group.require_dataset` -3. Disallow "." syntax for getting group members. To get a member of a group named ``foo``, - use ``group["foo"]`` in place of ``group.foo``. - -The Store class -~~~~~~~~~~~~~~~ - -The Store API has changed significant in Zarr-Python 3. The most notable changes to the -Store API are: - -Store Import Paths -^^^^^^^^^^^^^^^^^^ -Several store implementations have moved from the top-level module to ``zarr.storage``: - -.. code-block:: diff - :caption: Store import changes from v2 to v3 - - # Before (v2) - - from zarr import MemoryStore, DirectoryStore - + from zarr.storage import MemoryStore, LocalStore # LocalStore replaces DirectoryStore - -Common replacements: - -+-------------------------+------------------------------------+ -| v2 Import | v3 Import | -+=========================+====================================+ -| ``zarr.MemoryStore`` | ``zarr.storage.MemoryStore`` | -+-------------------------+------------------------------------+ -| ``zarr.DirectoryStore`` | ``zarr.storage.LocalStore`` | -+-------------------------+------------------------------------+ -| ``zarr.TempStore`` | Use ``tempfile.TemporaryDirectory``| -| | with ``LocalStore`` | -+-------------------------+------------------------------------+ - -1. Replaced the ``MutableMapping`` base class in favor of a custom abstract base class - (:class:`zarr.abc.store.Store`). -2. Switched to an asynchronous interface for all store methods that result in IO. This - change ensures that all store methods are non-blocking and are as performant as - possible. - -Beyond the changes store interface, a number of deprecated stores were also removed in -Zarr-Python 3. See :issue:`1274` for more details on the removal of these stores. - -- ``N5Store`` - see https://github.com/zarr-developers/n5py for an alternative interface to - N5 formatted data. -- ``ABSStore`` - use the :class:`zarr.storage.FsspecStore` instead along with fsspec's - `adlfs backend `_. - -The following stores have been removed altogether. Users who need these stores will have to -implement their own version in zarr-python v3. - -- ``DBMStore`` -- ``LMDBStore`` -- ``SQLiteStore`` -- ``MongoDBStore`` -- ``RedisStore`` - -At present, the latter five stores in this list do not have an equivalent in Zarr-Python 3. -If you are interested in developing a custom store that targets these backends, see -:ref:`developing custom stores ` or open an -`issue `_ to discuss your use case. - - -Codecs -~~~~~~ -Codecs defined in ``numcodecs`` (and also imported into the ``zarr.codecs`` namespace in Zarr-Python 2) -should still be used when creating Zarr format 2 arrays. - -Codecs for creating Zarr format 3 arrays are available in two locations: - -- `zarr.codecs` contains Zarr format 3 codecs that are defined in the `codecs section of the Zarr format 3 specification `_. -- `numcodecs.zarr3` contains codecs from ``numcodecs`` that can be used to create Zarr format 3 arrays, but are not necessarily part of the Zarr format 3 specification. - - -Dependencies -~~~~~~~~~~~~ - -When installing using ``pip``: - -- The new ``remote`` dependency group can be used to install a supported version of - ``fsspec``, required for remote data access. -- The new ``gpu`` dependency group can be used to install a supported version of - ``cuda``, required for GPU functionality. -- The ``jupyter`` optional dependency group has been removed, since v3 contains no - jupyter specific functionality. - -Miscellaneous -~~~~~~~~~~~~~ - -- The keyword argument ``zarr_version`` available in most creation functions in :mod:`zarr` - (e.g. :func:`zarr.create`, :func:`zarr.open`, :func:`zarr.group`, :func:`zarr.array`) has - been deprecated in favor of ``zarr_format``. - -🚧 Work in Progress 🚧 ----------------------- - -Zarr-Python 3 is still under active development, and is not yet fully complete. -The following list summarizes areas of the codebase that we expect to build out -after the 3.0.0 release. If features listed below are important to your use case -of Zarr-Python, please open (or comment on) a -`GitHub issue `_. - -- The following functions / methods have not been ported to Zarr-Python 3 yet: - - * :func:`zarr.copy` (:issue:`2407`) - * :func:`zarr.copy_all` (:issue:`2407`) - * :func:`zarr.copy_store` (:issue:`2407`) - * :func:`zarr.Group.move` (:issue:`2108`) - -- The following features (corresponding to function arguments to functions in - :mod:`zarr`) have not been ported to Zarr-Python 3 yet. Using these features - will raise a warning or a ``NotImplementedError``: - - * ``cache_attrs`` - * ``cache_metadata`` - * ``chunk_store`` (:issue:`2495`) - * ``meta_array`` - * ``object_codec`` (:issue:`2617`) - * ``synchronizer`` (:issue:`1596`) - * ``dimension_separator`` - -- The following features that were supported by Zarr-Python 2 have not been ported - to Zarr-Python 3 yet: - - * Structured arrays / dtypes (:issue:`2134`) - * Fixed-length string dtypes (:issue:`2347`) - * Datetime and timedelta dtypes (:issue:`2616`) - * Object dtypes (:issue:`2617`) - * Ragged arrays (:issue:`2618`) - * Groups and Arrays do not implement ``__enter__`` and ``__exit__`` protocols (:issue:`2619`) - * Big Endian dtypes (:issue:`2324`) - * Default filters for object dtypes for Zarr format 2 arrays (:issue:`2627`) diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000000..c938ec36a8 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,198 @@ +# Based on https://github.com/developmentseed/obspec/blob/main/mkdocs.yml +site_name: zarr-python +repo_name: zarr-developers/zarr-python +repo_url: https://github.com/zarr-developers/zarr-python +site_description: An implementation of chunked, compressed, N-dimensional arrays for Python. +site_author: Alistair Miles +site_url: https://zarr.readthedocs.io/ +docs_dir: docs + +nav: + - "index.md" + - "quick-start.md" + - User Guide: + - user-guide/index.md + - user-guide/installation.md + - user-guide/arrays.md + - user-guide/groups.md + - user-guide/attributes.md + - user-guide/storage.md + - user-guide/config.md + - user-guide/cli.md + - user-guide/v3_migration.md + - user-guide/data_types.md + - user-guide/performance.md + - user-guide/extending.md + - user-guide/gpu.md + - user-guide/consolidated_metadata.md + - API Reference: + - api/index.md + - api/array.md + - api/group.md + - api/create.md + - api/dtype.md + - api/open.md + - api/load.md + - api/save.md + - api/buffer.md + - api/convenience.md + - api/config.md + - api/codecs.md + - api/errors.md + - api/registry.md + - api/storage.md + - api/testing.md + - Async API: api/api_async.md + - Sync API: api/api_sync.md + - ABC: + - api/abc/buffer.md + - api/abc/codec.md + - api/abc/metadata.md + - api/abc/store.md + - deprecated: + - Convenience sub-module: api/deprecated/convenience.md + - Creation sub-module: api/deprecated/creation.md + - release-notes.md + - contributing.md +watch: + - src/zarr + - docs + +theme: + language: en + name: material + custom_dir: docs/overrides + logo: _static/logo_horizontal.svg + palette: + # Palette toggle for automatic mode + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + primary: blue grey + accent: pink + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: grey + accent: pink + toggle: + icon: material/brightness-4 + name: Switch to system preference + + font: + text: Roboto + code: Roboto Mono + + features: + - content.code.annotate + - content.code.copy + - navigation.indexes + - navigation.instant + - navigation.tracking + - search.suggest + - search.share + +extra: + social: + - icon: fontawesome/brands/mastodon + link: https://fosstodon.org/@zarr + - icon: fontawesome/brands/bluesky + link: https://bsky.app/profile/zarr.dev + +extra_css: + - overrides/stylesheets/extra.css + +plugins: + - search + - markdown-exec + - mkdocstrings: + enable_inventory: true + handlers: + python: + paths: [src/zarr] + options: + allow_inspection: true + docstring_section_style: list + docstring_style: numpy + inherited_members: true + line_length: 60 + separate_signature: true + show_root_heading: true + show_signature_annotations: true + show_source: true + show_symbol_type_toc: true + signature_crossrefs: true + extensions: + - griffe_inherited_docstrings + + inventories: + - https://docs.python.org/3/objects.inv + - https://docs.xarray.dev/en/stable/objects.inv + - https://numpy.org/doc/stable/objects.inv + - https://numcodecs.readthedocs.io/en/stable/objects.inv + - https://developmentseed.org/obstore/latest/objects.inv + - https://filesystem-spec.readthedocs.io/en/latest/objects.inv + - https://requests.readthedocs.io/en/latest/objects.inv + - https://docs.aiohttp.org/en/stable/objects.inv + - https://s3fs.readthedocs.io/en/latest/objects.inv + - https://docs.h5py.org/en/stable/objects.inv + - https://icechunk.io/en/stable/objects.inv + - https://lithops-cloud.github.io/docs/objects.inv + - https://docs.dask.org/en/stable/objects.inv + - redirects: + redirect_maps: + 'spec/index.md': 'https://zarr-specs.readthedocs.io' + 'spec/v1.md': 'https://zarr-specs.readthedocs.io/en/latest/v1/v1.0.html' + 'spec/v2.md': 'https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html' + 'spec/v3.md': 'https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html' + 'license.md': 'https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt' + 'tutorial.md': 'user-guide/installation.md' + 'getting-started.md': 'quick-start.md' + 'roadmap.md': 'https://zarr.readthedocs.io/en/v3.0.8/developers/roadmap.html' + 'installation.md': 'user-guide/installation.md' + 'release.md': 'release-notes.md' + +# https://github.com/developmentseed/titiler/blob/50934c929cca2fa8d3c408d239015f8da429c6a8/docs/mkdocs.yml#L115-L140 +markdown_extensions: + - admonition + - attr_list + - codehilite: + guess_lang: false + - def_list + - footnotes + - md_in_html + - pymdownx.arithmatex + - pymdownx.betterem + - pymdownx.caret: + insert: false + - pymdownx.details + - pymdownx.escapeall: + hardbreak: true + nbsp: true + - pymdownx.magiclink: + hide_protocol: true + repo_url_shortener: true + - pymdownx.smartsymbols + - pymdownx.superfences + - pymdownx.tasklist: + custom_checkbox: true + - pymdownx.tilde + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - toc: + permalink: true + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets diff --git a/pyproject.toml b/pyproject.toml index 11d91944d5..f6293a6df1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,15 +96,15 @@ remote_tests = [ optional = ["rich", "universal-pathlib"] docs = [ # Doc building - 'sphinx==8.1.3', - 'sphinx-autobuild>=2021.3.14', - 'sphinx-autoapi==3.4.0', - 'sphinx_design', - 'sphinx-issues', - 'sphinx-copybutton', - 'sphinx-reredirects', - 'pydata-sphinx-theme', - 'numpydoc', + "mkdocs-material[imaging]>=9.6.14", + "mkdocs>=1.6.1", + "mkdocstrings>=0.29.1", + "mkdocstrings-python>=1.16.10", + "mike>=2.1.3", + "mkdocs-redirects>=1.2.0", + "markdown-exec[ansi]", + "griffe-inherited-docstrings", + "ruff", # Changelog generation 'towncrier', # Optional dependencies to run examples @@ -195,6 +195,14 @@ python = ["3.11", "3.12", "3.13"] numpy = ["1.26", "2.2"] version = ["minimal"] +[tool.hatch.envs.gputest.scripts] +run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy" +run = "run-coverage --no-cov" +run-verbose = "run-coverage --verbose" +run-mypy = "mypy src" +run-hypothesis = "run-coverage --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" +list-env = "pip list" + [tool.hatch.envs.upstream] template = 'test' python = "3.13" @@ -239,22 +247,15 @@ dependencies = [ 'zarr[remote_tests]', ] - -[tool.hatch.envs.doctest] -features = ["test", "optional", "remote", "remote_tests"] -description = "Test environment for doctests" - -[tool.hatch.envs.doctest.scripts] -run = "rm -r data/; pytest docs/user-guide --doctest-glob='*.rst'" -fix = "rm -r data/; pytest docs/user-guide --doctest-glob='*.rst' --accept" -list-env = "pip list" - [tool.hatch.envs.docs] -features = ['docs'] +features = ['docs', 'remote'] [tool.hatch.envs.docs.scripts] -build = "cd docs && make html" -serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0" +serve = "mkdocs serve" +build = "mkdocs build" +check = "mkdocs build --strict" +readthedocs = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r site $READTHEDOCS_OUTPUT/html" +list-env = "pip list" [tool.ruff] line-length = 100 @@ -411,8 +412,9 @@ ignore = [ [tool.numpydoc_validation] # See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks checks = [ - "GL06", - "GL07", + # Requires third-party support; see https://github.com/numpy/numpydoc/issues/463 + # "GL06", + # "GL07", # Currently broken; see https://github.com/numpy/numpydoc/issues/573 # "GL09", "GL10", @@ -425,9 +427,10 @@ checks = [ [tool.towncrier] directory = 'changes' -filename = "docs/release-notes.rst" -underlines = ["-", "~", "^"] -issue_format = ":issue:`{issue}`" +filename = "docs/release-notes.md" +underlines = ["", "", ""] +issue_format = "[#{issue}](https://github.com/zarr-developers/zarr-python/issues{issue})" +start_string = "\n" [tool.codespell] ignore-words-list = "astroid" diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index e8d1329b17..4b3edf78d1 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -450,7 +450,7 @@ async def getsize_prefix(self, prefix: str) -> int: Notes ----- ``getsize_prefix`` is just provided as a potentially faster alternative to - listing all the keys under a prefix calling :meth:`Store.getsize` on each. + listing all the keys under a prefix calling [`Store.getsize`][zarr.abc.store.Store.getsize] on each. In general, ``prefix`` should be the path of an Array or Group in the Store. Implementations may differ on the behavior when some other ``prefix`` diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index b79c41801e..881341ace2 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -306,7 +306,7 @@ async def load( See Also -------- - save, savez + save Notes ----- @@ -352,8 +352,8 @@ async def open( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Additional parameters are passed through to :func:`zarr.creation.open_array` or - :func:`zarr.hierarchy.open_group`. + Additional parameters are passed through to [`zarr.creation.open_array`][] or + [`open_group`][zarr.api.asynchronous.open_group]. Returns ------- @@ -398,7 +398,7 @@ async def open_consolidated( *args: Any, use_consolidated: Literal[True] = True, **kwargs: Any ) -> AsyncGroup: """ - Alias for :func:`open_group` with ``use_consolidated=True``. + Alias for [`open_group`][zarr.api.asynchronous.open_group] with ``use_consolidated=True``. """ if use_consolidated is not True: raise TypeError( @@ -469,7 +469,7 @@ async def save_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Passed through to :func:`create`, e.g., compressor. + Passed through to [`create`][zarr.api.asynchronous.create], e.g., compressor. """ zarr_format = ( _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) @@ -568,8 +568,8 @@ async def save_group( async def tree(grp: AsyncGroup, expand: bool | None = None, level: int | None = None) -> Any: """Provide a rich display of the hierarchy. - .. deprecated:: 3.0.0 - `zarr.tree()` is deprecated and will be removed in a future release. + !!! warning "Deprecated" + `zarr.tree()` is deprecated since v3.0.0 and will be removed in a future release. Use `group.tree()` instead. Parameters @@ -599,7 +599,7 @@ async def array( data : array_like The data to fill the array with. **kwargs - Passed through to :func:`create`. + Passed through to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -734,7 +734,7 @@ async def create_group( The zarr format to use when saving. If no ``zarr_format`` is provided, the default format will be used. This default can be changed by modifying the value of ``default_zarr_format`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. @@ -783,12 +783,12 @@ async def open_group( Store or path to directory in file system or name of zip file. Strings are interpreted as paths on the local file system - and used as the ``root`` argument to :class:`zarr.storage.LocalStore`. + and used as the ``root`` argument to [zarr.storage.LocalStore][]. Dictionaries are used as the ``store_dict`` argument in - :class:`zarr.storage.MemoryStore``. + [zarr.storage.MemoryStore][]. - By default (``store=None``) a new :class:`zarr.storage.MemoryStore` + By default (``store=None``) a new [zarr.storage.MemoryStore][] is created. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional @@ -928,7 +928,7 @@ async def create( Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If neither ``compressor`` nor ``filters`` are provided, the default compressor - :class:`zarr.codecs.ZstdCodec` is used. + [`zarr.codecs.ZstdCodec`][] is used. If ``compressor`` is set to ``None``, no compression is used. fill_value : Any, optional @@ -955,8 +955,8 @@ async def create( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -965,7 +965,7 @@ async def create( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -1016,8 +1016,8 @@ async def create( If no codecs are provided, default codecs will be used based on the data type of the array. For most data types, the default codecs are the tuple ``(BytesCodec(), ZstdCodec())``; - data types that require a special :class:`zarr.abc.codec.ArrayBytesCodec`, like variable-length strings or bytes, - will use the :class:`zarr.abc.codec.ArrayBytesCodec` required for the data type instead of :class:`zarr.codecs.BytesCodec`. + data types that require a special [`zarr.abc.codec.ArrayBytesCodec`][], like variable-length strings or bytes, + will use the [`zarr.abc.codec.ArrayBytesCodec`][] required for the data type instead of [`zarr.codecs.BytesCodec`][]. dimension_names : Iterable[str | None] | None = None An iterable of dimension names. Zarr format 3 only. storage_options : dict @@ -1105,7 +1105,7 @@ async def empty( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Notes ----- @@ -1127,7 +1127,7 @@ async def empty_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1160,7 +1160,7 @@ async def full( fill_value : scalar Fill value. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1181,7 +1181,7 @@ async def full_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1205,7 +1205,7 @@ async def ones( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1225,7 +1225,7 @@ async def ones_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1261,7 +1261,7 @@ async def open_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Any keyword arguments to pass to :func:`create`. + Any keyword arguments to pass to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1329,7 +1329,7 @@ async def zeros( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1349,7 +1349,7 @@ async def zeros_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index d0134a4900..728822a326 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -198,8 +198,8 @@ def open( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Additional parameters are passed through to :func:`zarr.creation.open_array` or - :func:`zarr.hierarchy.open_group`. + Additional parameters are passed through to [`zarr.creation.open_array`][] or + [`open_group`][zarr.api.asynchronous.open_group]. Returns ------- @@ -225,7 +225,7 @@ def open( def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group: """ - Alias for :func:`open_group` with ``use_consolidated=True``. + Alias for [`open_group`][zarr.api.synchronous.open_group] with ``use_consolidated=True``. """ return Group( sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs)) @@ -291,7 +291,7 @@ def save_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Passed through to :func:`create`, e.g., compressor. + Passed through to [`create`][zarr.api.asynchronous.create], e.g., compressor. """ return sync( async_api.save_array( @@ -353,8 +353,8 @@ def save_group( def tree(grp: Group, expand: bool | None = None, level: int | None = None) -> Any: """Provide a rich display of the hierarchy. - .. deprecated:: 3.0.0 - `zarr.tree()` is deprecated and will be removed in a future release. + !!! warning "Deprecated" + `zarr.tree()` is deprecated since v3.0.0 and will be removed in a future release. Use `group.tree()` instead. Parameters @@ -383,7 +383,7 @@ def array(data: npt.ArrayLike | Array, **kwargs: Any) -> Array: data : array_like The data to fill the array with. **kwargs - Passed through to :func:`create`. + Passed through to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -483,12 +483,12 @@ def open_group( Store or path to directory in file system or name of zip file. Strings are interpreted as paths on the local file system - and used as the ``root`` argument to :class:`zarr.storage.LocalStore`. + and used as the ``root`` argument to [zarr.storage.LocalStore][]. Dictionaries are used as the ``store_dict`` argument in - :class:`zarr.storage.MemoryStore``. + [zarr.storage.MemoryStore][]. - By default (``store=None``) a new :class:`zarr.storage.MemoryStore` + By default (``store=None``) a new [zarr.storage.MemoryStore][] is created. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional @@ -580,7 +580,7 @@ def create_group( The zarr format to use when saving. If no ``zarr_format`` is provided, the default format will be used. This default can be changed by modifying the value of ``default_zarr_format`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. @@ -661,7 +661,7 @@ def create( Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If neither ``compressor`` nor ``filters`` are provided, the default compressor - :class:`zarr.codecs.ZstdCodec` is used. + [`zarr.codecs.ZstdCodec`][] is used. If ``compressor`` is set to ``None``, no compression is used. fill_value : Any, optional @@ -688,8 +688,8 @@ def create( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -698,7 +698,7 @@ def create( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -749,8 +749,8 @@ def create( If no codecs are provided, default codecs will be used based on the data type of the array. For most data types, the default codecs are the tuple ``(BytesCodec(), ZstdCodec())``; - data types that require a special :class:`zarr.abc.codec.ArrayBytesCodec`, like variable-length strings or bytes, - will use the :class:`zarr.abc.codec.ArrayBytesCodec` required for the data type instead of :class:`zarr.codecs.BytesCodec`. + data types that require a special [`zarr.abc.codec.ArrayBytesCodec`][], like variable-length strings or bytes, + will use the [`zarr.abc.codec.ArrayBytesCodec`][] required for the data type instead of [`zarr.codecs.BytesCodec`][]. dimension_names : Iterable[str | None] | None = None An iterable of dimension names. Zarr format 3 only. storage_options : dict @@ -827,7 +827,7 @@ def create_array( ) -> Array: """Create an array. - This function wraps :func:`zarr.core.array.create_array`. + This function wraps [zarr.core.array.create_array][]. Parameters ---------- @@ -853,8 +853,9 @@ def create_array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -863,7 +864,7 @@ def create_array( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -875,20 +876,20 @@ def create_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -898,7 +899,7 @@ def create_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional @@ -1029,8 +1030,8 @@ def from_array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -1040,8 +1041,8 @@ def from_array( which is to choose default filters based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are the empty tuple ``()``. The only cases where default filters are not empty is when the Zarr format is 2, and the - data type is a variable-length data type like :class:`zarr.dtype.VariableLengthUTF8` or - :class:`zarr.dtype.VariableLengthUTF8`. In these cases, the default filters is a tuple with a + data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or + [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters is a tuple with a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -1071,7 +1072,7 @@ def from_array( - dict[str, JSON]: A dict representation of an ``ArrayBytesCodec``. - ArrayBytesCodec: An instance of ``ArrayBytesCodec``. - "auto": a default serializer will be used. These defaults can be changed by modifying the value of - ``array.v3_default_serializer`` in :mod:`zarr.core.config`. + ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. - "keep": Retain the serializer of the input array if it is a zarr Array. fill_value : Any, optional @@ -1195,7 +1196,7 @@ def empty(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1222,7 +1223,7 @@ def empty_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1249,7 +1250,7 @@ def full(shape: tuple[int, ...], fill_value: Any, **kwargs: Any) -> Array: fill_value : scalar Fill value. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1269,7 +1270,7 @@ def full_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1288,7 +1289,7 @@ def ones(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1307,7 +1308,7 @@ def ones_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1343,7 +1344,7 @@ def open_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Any keyword arguments to pass to :func:`create`. + Any keyword arguments to pass to [`create`][zarr.api.asynchronous.create]. Returns @@ -1395,7 +1396,7 @@ def zeros(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1414,7 +1415,7 @@ def zeros_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- diff --git a/src/zarr/codecs/numcodecs/_codecs.py b/src/zarr/codecs/numcodecs/_codecs.py index 21bdc4e91b..651682d317 100644 --- a/src/zarr/codecs/numcodecs/_codecs.py +++ b/src/zarr/codecs/numcodecs/_codecs.py @@ -1,7 +1,7 @@ """ -This module provides compatibility for :py:mod:`numcodecs` in Zarr version 3. +This module provides compatibility for [numcodecs][] in Zarr version 3. -These codecs were previously defined in :py:mod:`numcodecs`, and have now been moved to `zarr`. +These codecs were previously defined in [numcodecs][], and have now been moved to `zarr`. >>> import numpy as np >>> import zarr @@ -16,9 +16,8 @@ ... compressors=[numcodecs.BZ2(level=5)]) >>> array[:] = np.arange(np.prod(array.shape), dtype=array.dtype).reshape(*array.shape) -.. note:: - - Please note that the codecs in :py:mod:`zarr.codecs.numcodecs` are not part of the Zarr version +!!! note + Please note that the codecs in [zarr.codecs.numcodecs][] are not part of the Zarr version 3 specification. Using these codecs might cause interoperability issues with other Zarr implementations. """ @@ -82,7 +81,7 @@ def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs: Any) -> N cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}" cls.codec_name = f"{CODEC_PREFIX}{namespace}" cls.__doc__ = f""" - See :class:`{cls_name}` for more details and parameters. + See [{cls_name}][] for more details and parameters. """ def __init__(self, **codec_config: JSON) -> None: diff --git a/src/zarr/convenience.py b/src/zarr/convenience.py index 3ca4ffcb4b..391ffc5186 100644 --- a/src/zarr/convenience.py +++ b/src/zarr/convenience.py @@ -1,10 +1,8 @@ """ Convenience helpers. -.. warning:: - - This sub-module is deprecated. All functions here are defined - in the top level zarr namespace instead. +!!! warning "Deprecated" + This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. """ import warnings diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 793d1a034b..6aefc38031 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -492,8 +492,9 @@ async def create( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Method to create a new asynchronous array instance. - .. deprecated:: 3.0.0 - Deprecated in favor of :func:`zarr.api.asynchronous.create_array`. + !!! warning "Deprecated" + `AsyncArray.create()` is deprecated since v3.0.0 and will be removed in a future release. + Use [`zarr.api.asynchronous.create_array`][] instead. Parameters ---------- @@ -523,7 +524,6 @@ async def create( Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. If no codecs are provided, default codecs will be used: - dimension_names : Iterable[str | None], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. @@ -537,7 +537,7 @@ async def create( order : Literal["C", "F"], optional The memory of the array (default is "C"). If ``zarr_format`` is 2, this parameter sets the memory order of the array. - If `zarr_format`` is 3, then this parameter is deprecated, because memory order + If ``zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. filters : Iterable[Codec] | Literal["auto"], optional @@ -545,8 +545,8 @@ async def create( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -555,7 +555,7 @@ async def create( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -569,7 +569,7 @@ async def create( - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. - These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. + These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in [`zarr.config`][zarr.config]. overwrite : bool, optional Whether to raise an error if the store already exists (default is False). data : npt.ArrayLike, optional @@ -640,8 +640,7 @@ async def _create( config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Method to create a new asynchronous array instance. - See :func:`AsyncArray.create` for more details. - Deprecated in favor of :func:`zarr.api.asynchronous.create_array`. + Deprecated in favor of [`zarr.api.asynchronous.create_array`][]. """ dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format) @@ -1091,9 +1090,9 @@ def compressor(self) -> Numcodec | None: """ Compressor that is applied to each chunk of the array. - .. deprecated:: 3.0.0 - `array.compressor` is deprecated and will be removed in a future release. - Use `array.compressors` instead. + !!! warning "Deprecated" + `Array.compressor` is deprecated since v3.0.0 and will be removed in a future release. + Use [`Array.compressors`][zarr.AsyncArray.compressors] instead. """ if self.metadata.zarr_format == 2: return self.metadata.compressor @@ -1292,8 +1291,8 @@ async def nchunks_initialized(self) -> int: Notes ----- - On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous) - property :attr:`Array.nchunks_initialized`. + On [`AsyncArray`][zarr.AsyncArray] this is an asynchronous method, unlike the (synchronous) + property [`Array.nchunks_initialized`][zarr.Array.nchunks_initialized]. Examples -------- @@ -1325,8 +1324,8 @@ async def _nshards_initialized(self) -> int: Notes ----- - On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous) - property :attr:`Array._nshards_initialized`. + On [`AsyncArray`][zarr.AsyncArray] this is an asynchronous method, unlike the (synchronous) + property [`Array._nshards_initialized`][zarr.Array._nshards_initialized]. Examples -------- @@ -1751,15 +1750,15 @@ async def setitem( @property def oindex(self) -> AsyncOIndex[T_ArrayMetadata]: - """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and - :func:`set_orthogonal_selection` for documentation and examples.""" + """Shortcut for orthogonal (outer) indexing, see [get_orthogonal_selection][zarr.Array.get_orthogonal_selection] and + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection] for documentation and examples.""" return AsyncOIndex(self) @property def vindex(self) -> AsyncVIndex[T_ArrayMetadata]: - """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, - :func:`set_coordinate_selection`, :func:`get_mask_selection` and - :func:`set_mask_selection` for documentation and examples.""" + """Shortcut for vectorized (inner) indexing, see [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_mask_selection][zarr.Array.get_mask_selection] and + [set_mask_selection][zarr.Array.set_mask_selection] for documentation and examples.""" return AsyncVIndex(self) async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: @@ -1915,10 +1914,9 @@ def info(self) -> Any: ------- ArrayInfo - See Also - -------- - AsyncArray.info_complete - All information about a group, including dynamic information + Related + ------- + [zarr.AsyncArray.info_complete][] - All information about a group, including dynamic information like the number of bytes and chunks written. Examples @@ -1954,10 +1952,9 @@ async def info_complete(self) -> Any: ------- ArrayInfo - See Also - -------- - AsyncArray.info - A property giving just the statically known information about an array. + Related + ------- + [zarr.AsyncArray.info][] - A property giving just the statically known information about an array. """ return self._info( await self._nshards_initialized(), @@ -2029,8 +2026,9 @@ def create( ) -> Array: """Creates a new Array instance from an initialized store. - .. deprecated:: 3.0.0 - Deprecated in favor of :func:`zarr.create_array`. + !!! warning "Deprecated" + `Array.create()` is deprecated since v3.0.0 and will be removed in a future release. + Use [`zarr.create_array`][] instead. Parameters ---------- @@ -2071,16 +2069,17 @@ def create( order : Literal["C", "F"], optional The memory of the array (default is "C"). If ``zarr_format`` is 2, this parameter sets the memory order of the array. - If `zarr_format`` is 3, then this parameter is deprecated, because memory order + If ``zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'order': 'C'}``. + filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -2089,7 +2088,7 @@ def create( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -2103,7 +2102,7 @@ def create( - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. - These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. + These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in [`zarr.config`][zarr.config]. overwrite : bool, optional Whether to raise an error if the store already exists (default is False). @@ -2168,8 +2167,7 @@ def _create( config: ArrayConfigLike | None = None, ) -> Array: """Creates a new Array instance from an initialized store. - See :func:`Array.create` for more details. - Deprecated in favor of :func:`zarr.create_array`. + Deprecated in favor of [`zarr.create_array`][]. """ async_array = sync( AsyncArray._create( @@ -2330,12 +2328,12 @@ def dtype(self) -> np.dtype[Any]: @property def attrs(self) -> Attributes: - """Returns a MutableMapping containing user-defined attributes. + """Returns a [MutableMapping][collections.abc.MutableMapping] containing user-defined attributes. Returns ------- - attrs : MutableMapping - A MutableMapping object containing user-defined attributes. + attrs + A [MutableMapping][collections.abc.MutableMapping] object containing user-defined attributes. Notes ----- @@ -2399,9 +2397,9 @@ def compressor(self) -> Numcodec | None: """ Compressor that is applied to each chunk of the array. - .. deprecated:: 3.0.0 - `array.compressor` is deprecated and will be removed in a future release. - Use `array.compressors` instead. + !!! warning "Deprecated" + `array.compressor` is deprecated since v3.0.0 and will be removed in a future release. + Use [`array.compressors`][zarr.Array.compressors] instead. """ return self._async_array.compressor @@ -2776,9 +2774,9 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: fields Currently the implementation for __getitem__ is provided by - :func:`vindex` if the indexing is pure fancy indexing (ie a + [`vindex`][zarr.Array.vindex] if the indexing is pure fancy indexing (ie a broadcast-compatible tuple of integer array indices), or by - :func:`set_basic_selection` otherwise. + [`set_basic_selection`][zarr.Array.set_basic_selection] otherwise. Effectively, this means that the following indexing modes are supported: @@ -2789,14 +2787,16 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: - fancy indexing (vectorized list of integers) For specific indexing options including outer indexing, see the - methods listed under See Also. + methods listed under Related. - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __setitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection] + [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__setitem__][zarr.Array.__setitem__] """ fields, pure_selection = pop_fields(selection) @@ -2875,27 +2875,35 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: fields Currently the implementation for __setitem__ is provided by - :func:`vindex` if the indexing is pure fancy indexing (ie a + [`vindex`][zarr.Array.vindex] if the indexing is pure fancy indexing (ie a broadcast-compatible tuple of integer array indices), or by - :func:`set_basic_selection` otherwise. + [`set_basic_selection`][zarr.Array.set_basic_selection] otherwise. Effectively, this means that the following indexing modes are supported: - - integer indexing - - slice indexing - - mixed slice and integer indexing - - boolean indexing - - fancy indexing (vectorized list of integers) + - integer indexing + - slice indexing + - mixed slice and integer indexing + - boolean indexing + - fancy indexing (vectorized list of integers) For specific indexing options including outer indexing, see the - methods listed under See Also. + methods listed under Related. - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__] """ fields, pure_selection = pop_fields(selection) @@ -3006,15 +3014,23 @@ def get_basic_selection( the `fields` parameter. This method provides the implementation for accessing data via the - square bracket notation (__getitem__). See :func:`__getitem__` for examples + square bracket notation (__getitem__). See [`__getitem__`][zarr.Array.__getitem__] for examples using the alternative notation. - See Also - -------- - set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + Related + ------- + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ @@ -3108,15 +3124,23 @@ def set_basic_selection( the `fields` parameter. This method provides the underlying implementation for modifying data via square - bracket notation, see :func:`__setitem__` for equivalent examples using the + bracket notation, see [`__setitem__`][zarr.Array.__setitem__] for equivalent examples using the alternative notation. - See Also - -------- - get_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3231,12 +3255,20 @@ def get_orthogonal_selection( Slices with step > 1 are supported, but slices with negative step are not. - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, set_orthogonal_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3343,13 +3375,20 @@ def set_orthogonal_selection( Slices with step > 1 are supported, but slices with negative step are not. - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() @@ -3423,12 +3462,20 @@ def get_mask_selection( coordinate indexing. Internally the mask array is converted to coordinate arrays by calling `np.nonzero`. - See Also - -------- - get_basic_selection, set_basic_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3505,12 +3552,20 @@ def set_mask_selection( coordinate indexing. Internally the mask array is converted to coordinate arrays by calling `np.nonzero`. - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3585,12 +3640,20 @@ def get_coordinate_selection( before being applied. The shape of the output will be the same as the shape of each coordinate array after broadcasting. - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, set_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3669,12 +3732,20 @@ def set_coordinate_selection( Slices are not supported. Coordinate arrays must be provided for all dimensions of the array. - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3785,13 +3856,20 @@ def get_block_selection( [13, 14, 15, 16, 17, 18], [23, 24, 25, 26, 27, 28]]) - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() @@ -3878,12 +3956,20 @@ def set_block_selection( Slices are supported. However, only with a step size of one. - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + Related + ------- + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3893,21 +3979,28 @@ def set_block_selection( @property def vindex(self) -> VIndex: - """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, - :func:`set_coordinate_selection`, :func:`get_mask_selection` and - :func:`set_mask_selection` for documentation and examples.""" + """Shortcut for vectorized (inner) indexing, see + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_mask_selection][zarr.Array.get_mask_selection] and + [set_mask_selection][zarr.Array.set_mask_selection] for documentation and + examples.""" return VIndex(self) @property def oindex(self) -> OIndex: - """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and - :func:`set_orthogonal_selection` for documentation and examples.""" + """Shortcut for orthogonal (outer) indexing, see + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection] and + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection] for + documentation and examples.""" return OIndex(self) @property def blocks(self) -> BlockIndex: - """Shortcut for blocked chunked indexing, see :func:`get_block_selection` and - :func:`set_block_selection` for documentation and examples.""" + """Shortcut for blocked chunked indexing, see + [get_block_selection][zarr.Array.get_block_selection] and + [set_block_selection][zarr.Array.set_block_selection] for documentation and + examples.""" return BlockIndex(self) def resize(self, new_shape: ShapeLike) -> None: @@ -4027,11 +4120,10 @@ def info(self) -> Any: ------- ArrayInfo - See Also - -------- - Array.info_complete - All information about a group, including dynamic information - like the number of bytes and chunks written. + Related + ------- + [zarr.Array.info_complete][] - All information about a group, + including dynamic information like the number of bytes and chunks written. Examples -------- @@ -4064,10 +4156,9 @@ def info_complete(self) -> Any: ------- ArrayInfo - See Also - -------- - Array.info - The statically known subset of metadata about an array. + Related + ------- + [zarr.Array.info][] - The statically known subset of metadata about an array. """ return sync(self._async_array.info_complete()) @@ -4088,9 +4179,9 @@ async def _shards_initialized( chunks_initialized : tuple[str, ...] The keys of the chunks that have been initialized. - See Also - -------- - nchunks_initialized + Related + ------- + [nchunks_initialized][zarr.Array.nchunks_initialized] """ store_contents = [ @@ -4194,8 +4285,8 @@ async def from_array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -4205,8 +4296,8 @@ async def from_array( which is to choose default filters based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are the empty tuple ``()``. The only cases where default filters are not empty is when the Zarr format is 2, and the - data type is a variable-length data type like :class:`zarr.dtype.VariableLengthUTF8` or - :class:`zarr.dtype.VariableLengthUTF8`. In these cases, the default filters is a tuple with a + data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or + [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters is a tuple with a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -4236,7 +4327,7 @@ async def from_array( - dict[str, JSON]: A dict representation of an ``ArrayBytesCodec``. - ArrayBytesCodec: An instance of ``ArrayBytesCodec``. - "auto": a default serializer will be used. These defaults can be changed by modifying the value of - ``array.v3_default_serializer`` in :mod:`zarr.core.config`. + ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. - "keep": Retain the serializer of the input array if it is a zarr Array. fill_value : Any, optional @@ -4441,8 +4532,8 @@ async def init_array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -4451,7 +4542,7 @@ async def init_array( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -4459,7 +4550,7 @@ async def init_array( List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. - The default value of ``"auto"`` instructs Zarr to use a default of :class:`zarr.codecs.ZstdCodec`. + The default value of ``"auto"`` instructs Zarr to use a default of [`zarr.codecs.ZstdCodec`][]. To create an array with no compressors, provide an empty iterable or the value ``None``. serializer : dict[str, JSON] | ArrayBytesCodec | Literal["auto"], optional @@ -4467,9 +4558,9 @@ async def init_array( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. The default value of ``"auto"`` instructs Zarr to use a default codec based on the data type of the array. - For most data types this default codec is :class:`zarr.codecs.BytesCodec`. - For :class:`zarr.dtype.VariableLengthUTF8`, the default codec is :class:`zarr.codecs.VlenUTF8Codec`. - For :class:`zarr.dtype.VariableLengthBytes`, the default codec is :class:`zarr.codecs.VlenBytesCodec`. + For most data types this default codec is [`zarr.codecs.BytesCodec`][]. + For [`zarr.dtype.VariableLengthUTF8`][], the default codec is [`zarr.codecs.VlenUTF8Codec`][]. + For [`zarr.dtype.VariableLengthBytes`][], the default codec is [`zarr.codecs.VlenBytesCodec`][]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -4479,7 +4570,7 @@ async def init_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional @@ -4665,8 +4756,9 @@ async def create_array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -4675,7 +4767,7 @@ async def create_array( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -4687,20 +4779,20 @@ async def create_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -4710,7 +4802,7 @@ async def create_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional diff --git a/src/zarr/core/attributes.py b/src/zarr/core/attributes.py index e699c4f66d..e000839436 100644 --- a/src/zarr/core/attributes.py +++ b/src/zarr/core/attributes.py @@ -42,13 +42,13 @@ def put(self, d: dict[str, JSON]) -> None: Equivalent to the following pseudo-code, but performed atomically. - .. code-block:: python - - >>> attrs = {"a": 1, "b": 2} - >>> attrs.clear() - >>> attrs.update({"a": 3", "c": 4}) - >>> attrs - {'a': 3, 'c': 4} + ```python + >>> attrs = {"a": 1, "b": 2} + >>> attrs.clear() + >>> attrs.update({"a": 3", "c": 4}) + >>> attrs + {'a': 3, 'c': 4} + ``` """ self._obj.metadata.attributes.clear() self._obj = self._obj.update_attributes(d) diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index d519b87d45..189916dc91 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -218,7 +218,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self: Parameters ---------- bytes_like - bytes-like object + bytes-like object Returns ------- diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 34f92ece4a..415b9d928c 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -86,7 +86,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self: Parameters ---------- bytes_like - bytes-like object + bytes-like object Returns ------- diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index a918b789dd..5d463ec79c 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -8,21 +8,21 @@ to be ``your.module.NewBytesCodec``. Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations. - .. code-block:: python + ```python + from your.module import NewBytesCodec + from zarr.core.config import register_codec, config - from your.module import NewBytesCodec - from zarr.core.config import register_codec, config - - register_codec("bytes", NewBytesCodec) - config.set({"codecs.bytes": "your.module.NewBytesCodec"}) + register_codec("bytes", NewBytesCodec) + config.set({"codecs.bytes": "your.module.NewBytesCodec"}) + ``` Instead of setting the value programmatically with ``config.set``, you can also set the value with an environment variable. The environment variable ``ZARR_CODECS__BYTES`` can be set to ``your.module.NewBytesCodec``. The double underscore ``__`` is used to indicate nested access. - .. code-block:: bash - - export ZARR_CODECS__BYTES="your.module.NewBytesCodec" + ```bash + export ZARR_CODECS__BYTES="your.module.NewBytesCodec" + ``` For more information, see the Donfig documentation at https://github.com/pytroll/donfig. """ diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py index 37371cd0cd..3e7f5b72f0 100644 --- a/src/zarr/core/dtype/npy/bool.py +++ b/src/zarr/core/dtype/npy/bool.py @@ -23,8 +23,8 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize): """ A Zarr data type for arrays containing booleans. - Wraps the ``np.dtypes.BoolDType`` data type. Scalars for this data type are instances of - ``np.bool_``. + Wraps the [`np.dtypes.BoolDType`][numpy.dtypes.BoolDType] data type. Scalars for this data type are instances of + [`np.bool_`][numpy.bool_]. Attributes ---------- @@ -41,7 +41,7 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize): ---------- This class implements the boolean data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding)and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ _zarr_v3_name: ClassVar[Literal["bool"]] = "bool" @@ -236,7 +236,7 @@ def cast_scalar(self, data: object) -> np.bool_: Returns ------- - ``np.bool_`` + bool : np.bool_ The numpy boolean scalar. Raises @@ -258,7 +258,7 @@ def default_scalar(self) -> np.bool_: Returns ------- - ``np.bool_`` + bool : np.bool_ The default value. """ return np.False_ @@ -294,7 +294,7 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_: Returns ------- - ``np.bool_`` + bool : np.bool_ The numpy boolean scalar. Raises diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py index b7c764dcd9..16c3326f63 100644 --- a/src/zarr/core/dtype/npy/bytes.py +++ b/src/zarr/core/dtype/npy/bytes.py @@ -36,11 +36,11 @@ class FixedLengthBytesConfig(TypedDict): Examples -------- - .. code-block:: python - - { - "length_bytes": 12 - } + ```python + { + "length_bytes": 12 + } + ``` """ length_bytes: int @@ -56,17 +56,17 @@ class NullterminatedBytesJSON_V2(DTypeConfig_V2[str, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "|S10", - "object_codec_id": None - } + ```python + { + "name": "|S10", + "object_codec_id": None + } + ``` """ @@ -83,14 +83,14 @@ class NullTerminatedBytesJSON_V3( Examples -------- - .. code-block:: python - - { - "name": "null_terminated_bytes", - "configuration": { - "length_bytes": 12 - } + ```python + { + "name": "null_terminated_bytes", + "configuration": { + "length_bytes": 12 } + } + ``` """ @@ -105,17 +105,18 @@ class RawBytesJSON_V2(DTypeConfig_V2[str, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python + ```python { "name": "|V10", "object_codec_id": None } + ``` """ @@ -130,12 +131,14 @@ class RawBytesJSON_V3(NamedConfig[Literal["raw_bytes"], FixedLengthBytesConfig]) Examples -------- - .. code-block:: python - - { - "name": "raw_bytes", - "configuration": { - "length_bytes": 12 + ```python + { + "name": "raw_bytes", + "configuration": { + "length_bytes": 12 + } + } + ``` """ @@ -149,16 +152,16 @@ class VariableLengthBytesJSON_V2(DTypeConfig_V2[Literal["|O"], Literal["vlen-byt References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "|O", - "object_codec_id": "vlen-bytes" - } + ```python + { + "name": "|O", + "object_codec_id": "vlen-bytes" + } + ``` """ @@ -167,8 +170,8 @@ class NullTerminatedBytes(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLengt """ A Zarr data type for arrays containing fixed-length null-terminated byte sequences. - Wraps the ``np.dtypes.BytesDType`` data type. Scalars for this data type are instances of - ``np.bytes_``. + Wraps the [`np.dtypes.BytesDType`][numpy.dtypes.BytesDType] data type. Scalars for this data type are instances of + [`np.bytes_`][numpy.bytes_]. This data type is parametrized by an integral length which specifies size in bytes of each scalar. Because this data type uses null-terminated semantics, indexing into @@ -410,7 +413,7 @@ def _check_scalar(self, data: object) -> TypeGuard[BytesLike]: def _cast_scalar_unchecked(self, data: BytesLike) -> np.bytes_: """ - Cast the provided scalar data to ``np.bytes_``, truncating if necessary. + Cast the provided scalar data to [`np.bytes_`][numpy.bytes_], truncating if necessary. Parameters ---------- @@ -419,7 +422,7 @@ def _cast_scalar_unchecked(self, data: BytesLike) -> np.bytes_: Returns ------- - np.bytes_ + bytes : [`np.bytes_`][numpy.bytes_] The casted data as a NumPy bytes scalar. Notes @@ -447,7 +450,7 @@ def cast_scalar(self, data: object) -> np.bytes_: Returns ------- - ``np.bytes_`` + bytes : [`np.bytes_`][numpy.bytes_] The data cast as a NumPy bytes scalar. Raises @@ -470,7 +473,7 @@ def default_scalar(self) -> np.bytes_: Returns ------- - ``np.bytes_`` + bytes : [`np.bytes_`][numpy.bytes_] The default scalar value. """ return np.bytes_(b"") @@ -499,7 +502,7 @@ def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_: """ - Read a JSON-serializable value as ``np.bytes_``. + Read a JSON-serializable value as [`np.bytes_`][numpy.bytes_]. Parameters ---------- @@ -510,7 +513,7 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_: Returns ------- - ``np.bytes_`` + bytes : [`np.bytes_`][numpy.bytes_] The NumPy bytes scalar obtained from decoding the base64 string. Raises @@ -543,7 +546,7 @@ class RawBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize """ A Zarr data type for arrays containing fixed-length sequences of raw bytes. - Wraps the NumPy ``void`` data type. Scalars for this data type are instances of ``np.void``. + Wraps the NumPy ``void`` data type. Scalars for this data type are instances of [`np.void`][numpy.void]. This data type is parametrized by an integral length which specifies size in bytes of each scalar belonging to this data type. diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py index 2f432a9e0a..99abee5e24 100644 --- a/src/zarr/core/dtype/npy/complex.py +++ b/src/zarr/core/dtype/npy/complex.py @@ -353,8 +353,8 @@ class Complex64(BaseComplex[np.dtypes.Complex64DType, np.complex64]): """ A Zarr data type for arrays containing 64 bit complex floats. - Wraps the ``np.dtypes.Complex64DType`` data type. Scalars for this data type - are instances of ``np.complex64``. + Wraps the [`np.dtypes.Complex64DType`][numpy.dtypes.Complex64DType] data type. Scalars for this data type + are instances of [`np.complex64`][numpy.complex64]. Attributes ---------- @@ -388,8 +388,8 @@ class Complex128(BaseComplex[np.dtypes.Complex128DType, np.complex128], HasEndia """ A Zarr data type for arrays containing 64 bit complex floats. - Wraps the ``np.dtypes.Complex128DType`` data type. Scalars for this data type - are instances of ``np.complex128``. + Wraps the [`np.dtypes.Complex128DType`][numpy.dtypes.Complex128DType] data type. Scalars for this data type + are instances of [`np.complex128`][numpy.complex128]. Attributes ---------- diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py index 3113bc5b61..bedb44b52d 100644 --- a/src/zarr/core/dtype/npy/float.py +++ b/src/zarr/core/dtype/npy/float.py @@ -314,8 +314,8 @@ class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]): """ A Zarr data type for arrays containing 16-bit floating point numbers. - Wraps the ``np.dtypes.Float16DType`` data type. Scalars for this data type are instances - of ``np.float16``. + Wraps the [`np.dtypes.Float16DType`][numpy.dtypes.Float16DType] data type. Scalars for this data type are instances + of [`np.float16`][numpy.float16]. Attributes ---------- @@ -326,7 +326,7 @@ class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]): ---------- This class implements the float16 data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float16DType @@ -351,8 +351,8 @@ class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]): """ A Zarr data type for arrays containing 32-bit floating point numbers. - Wraps the ``np.dtypes.Float32DType`` data type. Scalars for this data type are instances - of ``np.float32``. + Wraps the [`np.dtypes.Float32DType`][numpy.dtypes.Float32DType] data type. Scalars for this data type are instances + of [`np.float32`][numpy.float32]. Attributes ---------- @@ -363,7 +363,7 @@ class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]): ---------- This class implements the float32 data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float32DType @@ -388,8 +388,8 @@ class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]): """ A Zarr data type for arrays containing 64-bit floating point numbers. - Wraps the ``np.dtypes.Float64DType`` data type. Scalars for this data type are instances - of ``np.float64``. + Wraps the [`np.dtypes.Float64DType`][numpy.dtypes.Float64DType] data type. Scalars for this data type are instances + of [`np.float64`][numpy.float64]. Attributes ---------- @@ -400,7 +400,7 @@ class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]): ---------- This class implements the float64 data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float64DType diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py index ac04d4469a..6f7ebc2f55 100644 --- a/src/zarr/core/dtype/npy/int.py +++ b/src/zarr/core/dtype/npy/int.py @@ -236,8 +236,8 @@ class Int8(BaseInt[np.dtypes.Int8DType, np.int8]): """ A Zarr data type for arrays containing 8-bit signed integers. - Wraps the ``np.dtypes.Int8DType`` data type. Scalars for this data type are - instances of ``np.int8``. + Wraps the [`np.dtypes.Int8DType`][numpy.dtypes.Int8DType] data type. Scalars for this data type are + instances of [`np.int8`][numpy.int8]. Attributes ---------- @@ -248,7 +248,7 @@ class Int8(BaseInt[np.dtypes.Int8DType, np.int8]): ---------- This class implements the 8-bit signed integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int8DType @@ -393,7 +393,7 @@ class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]): """ A Zarr data type for arrays containing 8-bit unsigned integers. - Wraps the ``np.dtypes.UInt8DType`` data type. Scalars for this data type are instances of ``np.uint8``. + Wraps the [`np.dtypes.UInt8DType`][numpy.dtypes.UInt8DType] data type. Scalars for this data type are instances of [`np.uint8`][numpy.uint8]. Attributes ---------- @@ -404,7 +404,7 @@ class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]): ---------- This class implements the 8-bit unsigned integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt8DType @@ -539,8 +539,8 @@ class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness): """ A Zarr data type for arrays containing 16-bit signed integers. - Wraps the ``np.dtypes.Int16DType`` data type. Scalars for this data type are instances of - ``np.int16``. + Wraps the [`np.dtypes.Int16DType`][numpy.dtypes.Int16DType] data type. Scalars for this data type are instances of + [`np.int16`][numpy.int16]. Attributes ---------- @@ -551,7 +551,7 @@ class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness): ---------- This class implements the 16-bit signed integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int16DType @@ -701,8 +701,8 @@ class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness): """ A Zarr data type for arrays containing 16-bit unsigned integers. - Wraps the ``np.dtypes.UInt16DType`` data type. Scalars for this data type are instances of - ``np.uint16``. + Wraps the [`np.dtypes.UInt16DType`][numpy.dtypes.UInt16DType] data type. Scalars for this data type are instances of + [`np.uint16`][numpy.uint16]. Attributes ---------- @@ -713,7 +713,7 @@ class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness): ---------- This class implements the unsigned 16-bit unsigned integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt16DType @@ -863,8 +863,8 @@ class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness): """ A Zarr data type for arrays containing 32-bit signed integers. - Wraps the ``np.dtypes.Int32DType`` data type. Scalars for this data type are instances of - ``np.int32``. + Wraps the [`np.dtypes.Int32DType`][numpy.dtypes.Int32DType] data type. Scalars for this data type are instances of + [`np.int32`][numpy.int32]. Attributes ---------- @@ -875,7 +875,7 @@ class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness): ---------- This class implements the 32-bit signed integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int32DType @@ -1046,8 +1046,8 @@ class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness): """ A Zarr data type for arrays containing 32-bit unsigned integers. - Wraps the ``np.dtypes.UInt32DType`` data type. Scalars for this data type are instances of - ``np.uint32``. + Wraps the [`np.dtypes.UInt32DType`][numpy.dtypes.UInt32DType] data type. Scalars for this data type are instances of + [`np.uint32`][numpy.uint32]. Attributes ---------- @@ -1058,7 +1058,7 @@ class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness): ---------- This class implements the 32-bit unsigned integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt32DType @@ -1204,8 +1204,8 @@ class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness): """ A Zarr data type for arrays containing 64-bit signed integers. - Wraps the ``np.dtypes.Int64DType`` data type. Scalars for this data type are instances of - ``np.int64``. + Wraps the [`np.dtypes.Int64DType`][numpy.dtypes.Int64DType] data type. Scalars for this data type are instances of + [`np.int64`][numpy.int64]. Attributes ---------- @@ -1216,7 +1216,7 @@ class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness): ---------- This class implements the 64-bit signed integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int64DType @@ -1362,8 +1362,8 @@ class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness): """ A Zarr data type for arrays containing 64-bit unsigned integers. - Wraps the ``np.dtypes.UInt64DType`` data type. Scalars for this data type - are instances of ``np.uint64``. + Wraps the [`np.dtypes.UInt64DType`][numpy.dtypes.UInt64DType] data type. Scalars for this data type + are instances of [`np.uint64`][numpy.uint64]. Attributes ---------- @@ -1374,7 +1374,7 @@ class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness): ---------- This class implements the unsigned 64-bit integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt64DType diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py index 32375a1c71..ee8cc71aaf 100644 --- a/src/zarr/core/dtype/npy/string.py +++ b/src/zarr/core/dtype/npy/string.py @@ -70,17 +70,17 @@ class FixedLengthUTF32JSON_V2(DTypeConfig_V2[str, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "`__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "|O", - "object_codec_id": "vlen-utf8" - } + ```python + { + "name": "|O", + "object_codec_id": "vlen-utf8" + } + ``` """ @@ -467,7 +467,7 @@ class UTF8Base(ZDType[TDType_co, str], HasObjectCodec): ---------- This data type does not have a Zarr V3 specification. - The Zarr V2 data type specification can be found `here `__. + The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). """ _zarr_v3_name: ClassVar[Literal["string"]] = "string" diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py index a0e3b0fbd4..7aa546ea9c 100644 --- a/src/zarr/core/dtype/npy/structured.py +++ b/src/zarr/core/dtype/npy/structured.py @@ -41,19 +41,19 @@ class StructuredJSON_V2(DTypeConfig_V2[StructuredName_V2, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": [ - ["f0", "`__. + The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). """ _zarr_v3_name: ClassVar[Literal["structured"]] = "structured" diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py index d523e16940..402a140321 100644 --- a/src/zarr/core/dtype/npy/time.py +++ b/src/zarr/core/dtype/npy/time.py @@ -113,9 +113,9 @@ class TimeConfig(TypedDict): Examples -------- - .. code-block:: python - - {"unit": "ms", "scale_factor": 1} + ```python + {"unit": "ms", "scale_factor": 1} + ``` """ unit: ReadOnly[DateTimeUnit] @@ -129,19 +129,19 @@ class DateTime64JSON_V3(NamedConfig[Literal["numpy.datetime64"], TimeConfig]): References ---------- This representation is defined in the ``numpy.datetime64`` - `specification document `__. + [specification document](https://zarr-specs.readthedocs.io/en/latest/spec/v3/datatypes.html#numpy-datetime64). Examples -------- - .. code-block:: python - - { - "name": "numpy.datetime64", - "configuration": { - "unit": "ms", - "scale_factor": 1 - } - } + ```python + { + "name": "numpy.datetime64", + "configuration": { + "unit": "ms", + "scale_factor": 1 + } + } + ``` """ @@ -152,19 +152,19 @@ class TimeDelta64JSON_V3(NamedConfig[Literal["numpy.timedelta64"], TimeConfig]): References ---------- This representation is defined in the numpy.timedelta64 - `specification document `__. + [specification document](https://zarr-specs.readthedocs.io/en/latest/spec/v3/datatypes.html#numpy-timedelta64). Examples -------- - .. code-block:: python - - { - "name": "numpy.timedelta64", - "configuration": { - "unit": "ms", - "scale_factor": 1 - } - } + ```python + { + "name": "numpy.timedelta64", + "configuration": { + "unit": "ms", + "scale_factor": 1 + } + } + ``` """ @@ -178,17 +178,17 @@ class TimeDelta64JSON_V2(DTypeConfig_V2[str, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "`__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "`__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). The Zarr V3 representation of this data type is defined in the ``numpy.timedelta64`` - `specification document `__ + [specification document](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.timedelta64) """ # mypy infers the type of np.dtypes.TimeDelta64DType to be @@ -452,15 +452,15 @@ def _from_json_v3(cls, data: DTypeJSON) -> Self: For example: - .. code-block:: json - - { - "name": "numpy.timedelta64", - "configuration": { - "unit": "generic", - "scale_factor": 1 - } + ```json + { + "name": "numpy.timedelta64", + "configuration": { + "unit": "generic", + "scale_factor": 1 } + } + ``` """ if cls._check_json_v3(data): @@ -615,10 +615,10 @@ class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEnd References ---------- The Zarr V2 representation of this data type is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). The Zarr V3 representation of this data type is defined in the ``numpy.datetime64`` - `specification document `__ + [specification document](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.datetime64) """ dtype_cls = np.dtypes.DateTime64DType # type: ignore[assignment] diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 1c41a8a4a8..e71c55c10f 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -857,9 +857,9 @@ def info(self) -> Any: ------- GroupInfo - See Also - -------- - AsyncGroup.info_complete + Related + ------- + [zarr.AsyncGroup.info_complete][] All information about a group, including dynamic information """ @@ -881,9 +881,9 @@ async def info_complete(self) -> Any: ------- GroupInfo - See Also - -------- - AsyncGroup.info + Related + ------- + [zarr.AsyncGroup.info][] """ members = [x[1].metadata async for x in self.members(max_depth=None)] return self._info(members=members) @@ -1031,7 +1031,7 @@ async def create_array( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array within this group. - This method lightly wraps :func:`zarr.core.array.create_array`. + This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- @@ -1052,8 +1052,8 @@ async def create_array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -1062,7 +1062,7 @@ async def create_array( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -1074,13 +1074,13 @@ async def create_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. @@ -1089,7 +1089,7 @@ async def create_array( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -1099,7 +1099,7 @@ async def create_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional @@ -1159,18 +1159,19 @@ async def create_dataset( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array. - .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead. + !!! warning "Deprecated" + `AsyncGroup.create_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. + Use `AsyncGroup.create_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the :func:`zarr.AsyncGroup.require_dataset` method. + with h5py, Zarr groups also implement the [zarr.AsyncGroup.require_dataset][] method. Parameters ---------- name : str Array name. **kwargs : dict - Additional arguments passed to :func:`zarr.AsyncGroup.create_array`. + Additional arguments passed to [zarr.AsyncGroup.create_array][]. Returns ------- @@ -1199,13 +1200,14 @@ async def require_dataset( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Obtain an array, creating if it doesn't exist. - .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.require_dataset` instead. + !!! warning "Deprecated" + `AsyncGroup.require_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. + Use `AsyncGroup.require_dataset` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the :func:`zarr.AsyncGroup.create_dataset` method. + with h5py, Zarr groups also implement the [zarr.AsyncGroup.create_dataset][] method. - Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. + Other `kwargs` are as per [zarr.AsyncGroup.create_dataset][]. Parameters ---------- @@ -1236,7 +1238,7 @@ async def require_array( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Obtain an array, creating if it doesn't exist. - Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. + Other `kwargs` are as per [zarr.AsyncGroup.create_dataset][]. Parameters ---------- @@ -1623,7 +1625,7 @@ async def empty( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Notes ----- @@ -1645,7 +1647,7 @@ async def zeros( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1666,7 +1668,7 @@ async def ones( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1689,7 +1691,7 @@ async def full( fill_value : scalar Value to fill the array with. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1717,7 +1719,7 @@ async def empty_like( data : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1738,7 +1740,7 @@ async def zeros_like( data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1759,7 +1761,7 @@ async def ones_like( data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1780,7 +1782,7 @@ async def full_like( data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2067,9 +2069,9 @@ def info(self) -> Any: ------- GroupInfo - See Also - -------- - Group.info_complete + Related + ------- + [zarr.Group.info_complete][] All information about a group, including dynamic information like the children members. """ @@ -2086,9 +2088,9 @@ def info_complete(self) -> Any: ------- GroupInfo - See Also - -------- - Group.info + Related + ------- + [zarr.Group.info][] """ return self._sync(self._async_group.info_complete()) @@ -2459,7 +2461,7 @@ def create( ) -> Array: """Create an array within this group. - This method lightly wraps :func:`zarr.core.array.create_array`. + This method lightly wraps [`zarr.core.array.create_array`][]. Parameters ---------- @@ -2482,8 +2484,8 @@ def create( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -2492,7 +2494,7 @@ def create( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -2504,13 +2506,13 @@ def create( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. @@ -2519,7 +2521,7 @@ def create( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -2529,7 +2531,7 @@ def create( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional @@ -2603,7 +2605,7 @@ def create_array( ) -> Array: """Create an array within this group. - This method lightly wraps :func:`zarr.core.array.create_array`. + This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- @@ -2626,8 +2628,8 @@ def create_array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -2636,7 +2638,7 @@ def create_array( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -2648,13 +2650,13 @@ def create_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. @@ -2663,7 +2665,7 @@ def create_array( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -2673,7 +2675,7 @@ def create_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional @@ -2732,19 +2734,20 @@ def create_array( def create_dataset(self, name: str, **kwargs: Any) -> Array: """Create an array. - .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `Group.create_array` instead. + !!! warning "Deprecated" + `Group.create_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. + Use `Group.create_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the :func:`zarr.Group.require_dataset` method. + with h5py, Zarr groups also implement the [zarr.Group.require_dataset][] method. Parameters ---------- name : str Array name. **kwargs : dict - Additional arguments passed to :func:`zarr.Group.create_array` + Additional arguments passed to [zarr.Group.create_array][] Returns ------- @@ -2756,20 +2759,21 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array: def require_dataset(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array: """Obtain an array, creating if it doesn't exist. - .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead. + !!! warning "Deprecated" + `Group.require_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. + Use `Group.require_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the :func:`zarr.Group.create_dataset` method. + with h5py, Zarr groups also implement the [zarr.Group.create_dataset][] method. - Other `kwargs` are as per :func:`zarr.Group.create_dataset`. + Other `kwargs` are as per [zarr.Group.create_dataset][]. Parameters ---------- name : str Array name. **kwargs : - See :func:`zarr.Group.create_dataset`. + See [zarr.Group.create_dataset][]. Returns ------- @@ -2780,14 +2784,14 @@ def require_dataset(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Arra def require_array(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array: """Obtain an array, creating if it doesn't exist. - Other `kwargs` are as per :func:`zarr.Group.create_array`. + Other `kwargs` are as per [zarr.Group.create_array][]. Parameters ---------- name : str Array name. **kwargs : - See :func:`zarr.Group.create_array`. + See [zarr.Group.create_array][]. Returns ------- @@ -2806,7 +2810,7 @@ def empty(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Notes ----- @@ -2826,7 +2830,7 @@ def zeros(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2845,7 +2849,7 @@ def ones(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2868,7 +2872,7 @@ def full( fill_value : scalar Value to fill the array with. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2892,7 +2896,7 @@ def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> data : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2917,7 +2921,7 @@ def zeros_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2937,7 +2941,7 @@ def ones_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> A data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2956,7 +2960,7 @@ def full_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> A data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2999,10 +3003,11 @@ def array( ) -> Array: """Create an array within this group. - .. deprecated:: 3.0.0 + !!! warning "Deprecated" + `Group.array()` is deprecated since v3.0.0 and will be removed in a future release. Use `Group.create_array` instead. - This method lightly wraps :func:`zarr.core.array.create_array`. + This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- @@ -3023,8 +3028,8 @@ def array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a - dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a + dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. @@ -3033,7 +3038,7 @@ def array( type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like - :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. @@ -3045,13 +3050,13 @@ def array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. @@ -3060,7 +3065,7 @@ def array( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -3070,7 +3075,7 @@ def array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index be60f4208f..243096b029 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -183,7 +183,7 @@ def _iter_regions( The linear indexing order to use. Yields - ------- + ------ Iterator[tuple[slice, ...]] An iterator over tuples of slices, where each slice spans a separate contiguous region diff --git a/src/zarr/creation.py b/src/zarr/creation.py index 622406ed75..605b5af5de 100644 --- a/src/zarr/creation.py +++ b/src/zarr/creation.py @@ -1,10 +1,9 @@ """ Helpers for creating arrays. -.. warning:: +!!! warning "Deprecated" + This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. - This sub-module is deprecated. All functions here are defined - in the top level zarr namespace instead. """ import warnings diff --git a/src/zarr/storage/_memory.py b/src/zarr/storage/_memory.py index e6076d9669..a3fd058680 100644 --- a/src/zarr/storage/_memory.py +++ b/src/zarr/storage/_memory.py @@ -188,7 +188,7 @@ class GpuMemoryStore(MemoryStore): Parameters ---------- store_dict : MutableMapping, optional - A mutable mapping with string keys and :class:`zarr.core.buffer.gpu.Buffer` + A mutable mapping with string keys and [zarr.core.buffer.gpu.Buffer][] values. read_only : bool Whether to open the store in read-only mode. @@ -222,7 +222,7 @@ def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: ---------- store_dict : mapping A mapping of strings keys to arbitrary Buffers. The buffer data - will be moved into a :class:`gpu.Buffer`. + will be moved into a [`gpu.Buffer`][zarr.core.buffer.gpu.Buffer]. Returns -------