From d5f272f4e4e0ed87193f80b34d86861611357ca4 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:21:07 -0400 Subject: [PATCH 01/64] Use mkdocs-material for Zarr-Python documentation --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/labeler.yml | 2 +- .gitignore | 1 - .readthedocs.yaml | 16 +- changes/{2921.bugfix.rst => 2921.bugfix.md} | 0 changes/{3021.feature.rst => 3021.feature.md} | 0 changes/{3066.feature.rst => 3066.feature.md} | 0 changes/{3068.bugfix.rst => 3068.bugfix.md} | 0 changes/{3081.feature.rst => 3081.feature.md} | 0 changes/{3082.feature.rst => 3082.feature.md} | 0 changes/{3100.bugfix.rst => 3100.bugfix.md} | 0 changes/{3103.bugfix.rst => 3103.bugfix.md} | 0 changes/README.md | 2 +- docs/Makefile | 231 ------ docs/_static/custom.css | 110 --- docs/_static/custom.js | 17 - docs/_static/index_api.svg | 97 --- docs/_static/index_contribute.svg | 76 -- docs/_static/index_getting_started.svg | 66 -- docs/_static/index_user_guide.svg | 67 -- docs/about.rst | 24 - docs/api/abc/codec.md | 18 + docs/api/abc/metadata.md | 5 + docs/api/abc/store.md | 13 + docs/api/api_async.md | 32 + docs/api/api_sync.md | 32 + docs/api/array.md | 2 + docs/api/codecs.md | 17 + docs/api/config.md | 5 + docs/api/convenience.md | 10 + docs/api/create.md | 19 + docs/api/deprecated/convenience.md | 14 + docs/api/deprecated/creation.md | 15 + docs/api/errors.md | 10 + docs/api/group.md | 2 + docs/api/load.md | 5 + docs/api/open.md | 9 + docs/api/registry.md | 12 + docs/api/save.md | 7 + docs/api/storage.md | 19 + docs/api/testing.md | 56 ++ docs/conf.py | 379 ---------- docs/contributing.md | 257 +++++++ docs/developers/contributing.rst | 386 ---------- docs/developers/index.rst | 9 - docs/developers/roadmap.rst | 696 ------------------ docs/index.md | 230 ++++++ docs/index.rst | 113 --- docs/overrides/main.html | 9 + docs/overrides/stylesheets/extra.css | 52 ++ docs/quickstart.rst | 209 ------ docs/release-notes.md | 210 ++++++ docs/release-notes.rst | 269 ------- docs/talks/scipy2019/submission.rst | 144 ---- docs/user-guide/arrays.md | 688 +++++++++++++++++ docs/user-guide/arrays.rst | 658 ----------------- docs/user-guide/attributes.md | 29 + docs/user-guide/attributes.rst | 30 - docs/user-guide/config.md | 92 +++ docs/user-guide/config.rst | 91 --- docs/user-guide/consolidated_metadata.md | 117 +++ docs/user-guide/consolidated_metadata.rst | 116 --- .../{extending.rst => extending.md} | 68 +- docs/user-guide/gpu.md | 32 + docs/user-guide/gpu.rst | 37 - docs/user-guide/groups.md | 172 +++++ docs/user-guide/groups.rst | 172 ----- docs/user-guide/index.rst | 30 - docs/user-guide/installation.md | 50 ++ docs/user-guide/installation.rst | 54 -- docs/user-guide/performance.md | 263 +++++++ docs/user-guide/performance.rst | 278 ------- docs/user-guide/storage.md | 140 ++++ docs/user-guide/storage.rst | 148 ---- docs/user-guide/v3_migration.md | 220 ++++++ docs/user-guide/v3_migration.rst | 238 ------ mkdocs.yml | 154 ++++ pyproject.toml | 26 +- 78 files changed, 3067 insertions(+), 4812 deletions(-) rename changes/{2921.bugfix.rst => 2921.bugfix.md} (100%) rename changes/{3021.feature.rst => 3021.feature.md} (100%) rename changes/{3066.feature.rst => 3066.feature.md} (100%) rename changes/{3068.bugfix.rst => 3068.bugfix.md} (100%) rename changes/{3081.feature.rst => 3081.feature.md} (100%) rename changes/{3082.feature.rst => 3082.feature.md} (100%) rename changes/{3100.bugfix.rst => 3100.bugfix.md} (100%) rename changes/{3103.bugfix.rst => 3103.bugfix.md} (100%) delete mode 100644 docs/Makefile delete mode 100644 docs/_static/custom.css delete mode 100644 docs/_static/custom.js delete mode 100644 docs/_static/index_api.svg delete mode 100644 docs/_static/index_contribute.svg delete mode 100644 docs/_static/index_getting_started.svg delete mode 100644 docs/_static/index_user_guide.svg delete mode 100644 docs/about.rst create mode 100644 docs/api/abc/codec.md create mode 100644 docs/api/abc/metadata.md create mode 100644 docs/api/abc/store.md create mode 100644 docs/api/api_async.md create mode 100644 docs/api/api_sync.md create mode 100644 docs/api/array.md create mode 100644 docs/api/codecs.md create mode 100644 docs/api/config.md create mode 100644 docs/api/convenience.md create mode 100644 docs/api/create.md create mode 100644 docs/api/deprecated/convenience.md create mode 100644 docs/api/deprecated/creation.md create mode 100644 docs/api/errors.md create mode 100644 docs/api/group.md create mode 100644 docs/api/load.md create mode 100644 docs/api/open.md create mode 100644 docs/api/registry.md create mode 100644 docs/api/save.md create mode 100644 docs/api/storage.md create mode 100644 docs/api/testing.md delete mode 100644 docs/conf.py create mode 100644 docs/contributing.md delete mode 100644 docs/developers/contributing.rst delete mode 100644 docs/developers/index.rst delete mode 100644 docs/developers/roadmap.rst create mode 100644 docs/index.md delete mode 100644 docs/index.rst create mode 100644 docs/overrides/main.html create mode 100644 docs/overrides/stylesheets/extra.css delete mode 100644 docs/quickstart.rst create mode 100644 docs/release-notes.md delete mode 100644 docs/release-notes.rst delete mode 100644 docs/talks/scipy2019/submission.rst create mode 100644 docs/user-guide/arrays.md delete mode 100644 docs/user-guide/arrays.rst create mode 100644 docs/user-guide/attributes.md delete mode 100644 docs/user-guide/attributes.rst create mode 100644 docs/user-guide/config.md delete mode 100644 docs/user-guide/config.rst create mode 100644 docs/user-guide/consolidated_metadata.md delete mode 100644 docs/user-guide/consolidated_metadata.rst rename docs/user-guide/{extending.rst => extending.md} (52%) create mode 100644 docs/user-guide/gpu.md delete mode 100644 docs/user-guide/gpu.rst create mode 100644 docs/user-guide/groups.md delete mode 100644 docs/user-guide/groups.rst delete mode 100644 docs/user-guide/index.rst create mode 100644 docs/user-guide/installation.md delete mode 100644 docs/user-guide/installation.rst create mode 100644 docs/user-guide/performance.md delete mode 100644 docs/user-guide/performance.rst create mode 100644 docs/user-guide/storage.md delete mode 100644 docs/user-guide/storage.rst create mode 100644 docs/user-guide/v3_migration.md delete mode 100644 docs/user-guide/v3_migration.rst create mode 100644 mkdocs.yml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9b64c97d0a..c36428b300 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,7 +3,7 @@ TODO: * [ ] Add unit tests and/or doctests in docstrings * [ ] Add docstrings and API docs for any new/modified user-facing classes and functions -* [ ] New/modified features documented in `docs/user-guide/*.rst` +* [ ] New/modified features documented in `docs/user-guide/*.md` * [ ] Changes documented as a new file in `changes/` * [ ] GitHub Actions have all passed * [ ] Test coverage is 100% (Codecov passes) diff --git a/.github/labeler.yml b/.github/labeler.yml index ede89c9d35..7eb74211ea 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,4 +1,4 @@ needs release notes: - all: - changed-files: - - all-globs-to-all-files: '!changes/*.rst' + - all-globs-to-all-files: '!changes/*.md' diff --git a/.gitignore b/.gitignore index 1b2b63e651..cfc455b715 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,6 @@ coverage.xml # Sphinx documentation docs/_build/ -docs/api docs/data data data.zip diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 6253a7196f..05239456a0 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,16 +11,8 @@ build: then towncrier build --version Unreleased --yes; fi + commands: + - mamba install -c conda-forge -c nodefaults hatch + - hatch env run --env docs build + - hatch env run --env docs readthedocs -sphinx: - configuration: docs/conf.py - fail_on_warning: true - -formats: all - -python: - install: - - method: pip - path: . - extra_requirements: - - docs diff --git a/changes/2921.bugfix.rst b/changes/2921.bugfix.md similarity index 100% rename from changes/2921.bugfix.rst rename to changes/2921.bugfix.md diff --git a/changes/3021.feature.rst b/changes/3021.feature.md similarity index 100% rename from changes/3021.feature.rst rename to changes/3021.feature.md diff --git a/changes/3066.feature.rst b/changes/3066.feature.md similarity index 100% rename from changes/3066.feature.rst rename to changes/3066.feature.md diff --git a/changes/3068.bugfix.rst b/changes/3068.bugfix.md similarity index 100% rename from changes/3068.bugfix.rst rename to changes/3068.bugfix.md diff --git a/changes/3081.feature.rst b/changes/3081.feature.md similarity index 100% rename from changes/3081.feature.rst rename to changes/3081.feature.md diff --git a/changes/3082.feature.rst b/changes/3082.feature.md similarity index 100% rename from changes/3082.feature.rst rename to changes/3082.feature.md diff --git a/changes/3100.bugfix.rst b/changes/3100.bugfix.md similarity index 100% rename from changes/3100.bugfix.rst rename to changes/3100.bugfix.md diff --git a/changes/3103.bugfix.rst b/changes/3103.bugfix.md similarity index 100% rename from changes/3103.bugfix.rst rename to changes/3103.bugfix.md diff --git a/changes/README.md b/changes/README.md index 74ed9f94a9..889a52baa4 100644 --- a/changes/README.md +++ b/changes/README.md @@ -1,7 +1,7 @@ Writing a changelog entry ------------------------- -Please put a new file in this directory named `xxxx..rst`, where +Please put a new file in this directory named `xxxx..md`, where - `xxxx` is the pull request number associated with this entry - `` is one of: diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index f42ee840e9..0000000000 --- a/docs/Makefile +++ /dev/null @@ -1,231 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -W --keep-going -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build - -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) - $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from https://www.sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " epub3 to make an epub3" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - @echo " dummy to check syntax errors of document sources" - -.PHONY: clean -clean: - rm -rf $(BUILDDIR)/* - rm -rf $(BUILDDIR)/../api - -.PHONY: html -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -.PHONY: dirhtml -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -.PHONY: singlehtml -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -.PHONY: pickle -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -.PHONY: json -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -.PHONY: htmlhelp -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -.PHONY: qthelp -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/zarr.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zarr.qhc" - -.PHONY: applehelp -applehelp: - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." - -.PHONY: devhelp -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/zarr" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/zarr" - @echo "# devhelp" - -.PHONY: epub -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -.PHONY: epub3 -epub3: - $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 - @echo - @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." - -.PHONY: latex -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -.PHONY: latexpdf -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: latexpdfja -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: text -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -.PHONY: man -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -.PHONY: texinfo -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -.PHONY: info -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -.PHONY: gettext -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -.PHONY: changes -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -.PHONY: linkcheck -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -.PHONY: doctest -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -.PHONY: coverage -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - -.PHONY: xml -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -.PHONY: pseudoxml -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." - -.PHONY: dummy -dummy: - $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy - @echo - @echo "Build finished. Dummy builder generates no files." diff --git a/docs/_static/custom.css b/docs/_static/custom.css deleted file mode 100644 index 1d32606f9a..0000000000 --- a/docs/_static/custom.css +++ /dev/null @@ -1,110 +0,0 @@ -@import url('https://fonts.googleapis.com/css2?family=Lato:ital,wght@0,400;0,700;0,900;1,400;1,700;1,900&family=Open+Sans:ital,wght@0,400;0,600;1,400;1,600&display=swap'); - -body { - font-family: 'Open Sans', sans-serif; -} - -pre, code { - font-size: 100%; - line-height: 155%; -} - -/* Style the active version button. - -- dev: orange -- stable: green -- old, PR: red - -Colors from: - -Wong, B. Points of view: Color blindness. -Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 -*/ - -/* If the active version has the name "dev", style it orange */ -#version_switcher_button[data-active-version-name*="dev"] { - background-color: #E69F00; - border-color: #E69F00; - color:#000000; -} - -/* green for `stable` */ -#version_switcher_button[data-active-version-name*="stable"] { - background-color: #009E73; - border-color: #009E73; -} - -/* red for `old` */ -#version_switcher_button:not([data-active-version-name*="stable"], [data-active-version-name*="dev"], [data-active-version-name=""]) { - background-color: #980F0F; - border-color: #980F0F; -} - -/* Main page overview cards */ - -.sd-card { - background: #fff; - border-radius: 0; - padding: 30px 10px 20px 10px; - margin: 10px 0px; -} - -.sd-card .sd-card-header { - text-align: center; -} - -.sd-card .sd-card-header .sd-card-text { - margin: 0px; -} - -.sd-card .sd-card-img-top { - height: 52px; - width: 52px; - margin-left: auto; - margin-right: auto; -} - -.sd-card .sd-card-header { - border: none; - background-color: white; - font-size: var(--pst-font-size-h5); - font-weight: bold; - padding: 2.5rem 0rem 0.5rem 0rem; -} - -.sd-card .sd-card-footer { - border: none; - background-color: white; -} - -.sd-card .sd-card-footer .sd-card-text { - max-width: 220px; - margin-left: auto; - margin-right: auto; -} - -/* Dark theme tweaking */ -html[data-theme=dark] .sd-card img[src*='.svg'] { - filter: invert(0.82) brightness(0.8) contrast(1.2); -} - -/* Main index page overview cards */ -html[data-theme=dark] .sd-card { - background-color:var(--pst-color-background); -} - -html[data-theme=dark] .sd-shadow-sm { - box-shadow: 0 .1rem 1rem rgba(250, 250, 250, .6) !important -} - -html[data-theme=dark] .sd-card .sd-card-header { - background-color:var(--pst-color-background); -} - -html[data-theme=dark] .sd-card .sd-card-footer { - background-color:var(--pst-color-background); -} - -html[data-theme=dark] h1 { - color: var(--pst-color-primary); -} diff --git a/docs/_static/custom.js b/docs/_static/custom.js deleted file mode 100644 index 52f1cba9e0..0000000000 --- a/docs/_static/custom.js +++ /dev/null @@ -1,17 +0,0 @@ -// handle redirects -(() => { - let anchorMap = { - "installation": "installation.html", - "getting-started": "getting_started.html#getting-started", - "highlights": "getting_started.html#highlights", - "contributing": "contributing.html", - "projects-using-zarr": "getting_started.html#projects-using-zarr", - "contents": "getting_started.html#contents", - "indices-and-tables": "api.html#indices-and-tables" - } - - let hash = window.location.hash.substring(1); - if (hash && hash in anchorMap) { - window.location.replace(anchorMap[hash]); - } -})(); diff --git a/docs/_static/index_api.svg b/docs/_static/index_api.svg deleted file mode 100644 index 69f7ba1d2d..0000000000 --- a/docs/_static/index_api.svg +++ /dev/null @@ -1,97 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - diff --git a/docs/_static/index_contribute.svg b/docs/_static/index_contribute.svg deleted file mode 100644 index de3d902379..0000000000 --- a/docs/_static/index_contribute.svg +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - diff --git a/docs/_static/index_getting_started.svg b/docs/_static/index_getting_started.svg deleted file mode 100644 index 2d36622cb7..0000000000 --- a/docs/_static/index_getting_started.svg +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - diff --git a/docs/_static/index_user_guide.svg b/docs/_static/index_user_guide.svg deleted file mode 100644 index bd17053517..0000000000 --- a/docs/_static/index_user_guide.svg +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - diff --git a/docs/about.rst b/docs/about.rst deleted file mode 100644 index 7a0af998c0..0000000000 --- a/docs/about.rst +++ /dev/null @@ -1,24 +0,0 @@ -About -===== - -Zarr is a format for the storage of chunked, compressed, N-dimensional arrays -inspired by `HDF5 `_, `h5py -`_ and `bcolz `_. - -These documents describe the Zarr-Python implementation. More information -about the Zarr format can be found on the `main website `_. - -Projects using Zarr -------------------- - -If you are using Zarr-Python, we would `love to hear about it -`_. - -Funding -------- -The project is fiscally sponsored by `NumFOCUS `_, a US -501(c)(3) public charity, and development is supported by the -`MRC Centre for Genomics and Global Health `_ -and the `Chan Zuckerberg Initiative `_. - -.. _NumCodecs: https://numcodecs.readthedocs.io/ diff --git a/docs/api/abc/codec.md b/docs/api/abc/codec.md new file mode 100644 index 0000000000..88e35d01bf --- /dev/null +++ b/docs/api/abc/codec.md @@ -0,0 +1,18 @@ +--- +title: codec +--- + +## Attributes + +::: zarr.abc.codec.CodecInput +::: zarr.abc.codec.CodecOutput + +## Classes + +::: zarr.abc.codec.ArrayArrayCodec +::: zarr.abc.codec.ArrayBytesCodec +::: zarr.abc.codec.ArrayBytesCodecPartialDecodeMixin +::: zarr.abc.codec.ArrayBytesCodecPartialEncodeMixin +::: zarr.abc.codec.BaseCodec +::: zarr.abc.codec.BytesBytesCodec +::: zarr.abc.codec.CodecPipeline diff --git a/docs/api/abc/metadata.md b/docs/api/abc/metadata.md new file mode 100644 index 0000000000..00c670d3e4 --- /dev/null +++ b/docs/api/abc/metadata.md @@ -0,0 +1,5 @@ +--- +title: metadata +--- + +::: zarr.abc.metadata.Metadata diff --git a/docs/api/abc/store.md b/docs/api/abc/store.md new file mode 100644 index 0000000000..bb8ceebb91 --- /dev/null +++ b/docs/api/abc/store.md @@ -0,0 +1,13 @@ +--- +title: store +--- + +## Classes + +::: zarr.abc.store.ByteGetter +::: zarr.abc.store.ByteSetter +::: zarr.abc.store.Store + +## Functions + +::: zarr.abc.store.set_or_delete diff --git a/docs/api/api_async.md b/docs/api/api_async.md new file mode 100644 index 0000000000..1380469cff --- /dev/null +++ b/docs/api/api_async.md @@ -0,0 +1,32 @@ +--- +title: asynchronous +--- + +::: zarr.api.asynchronous.array +::: zarr.api.asynchronous.consolidate_metadata +::: zarr.api.asynchronous.copy +::: zarr.api.asynchronous.copy_all +::: zarr.api.asynchronous.copy_store +::: zarr.api.asynchronous.create +::: zarr.api.asynchronous.create_array +::: zarr.api.asynchronous.create_hierarchy +::: zarr.api.asynchronous.empty +::: zarr.api.asynchronous.empty_like +::: zarr.api.asynchronous.from_array +::: zarr.api.asynchronous.full +::: zarr.api.asynchronous.full_like +::: zarr.api.asynchronous.group +::: zarr.api.asynchronous.load +::: zarr.api.asynchronous.ones +::: zarr.api.asynchronous.ones_like +::: zarr.api.asynchronous.open +::: zarr.api.asynchronous.open_array +::: zarr.api.asynchronous.open_consolidated +::: zarr.api.asynchronous.open_group +::: zarr.api.asynchronous.open_like +::: zarr.api.asynchronous.save +::: zarr.api.asynchronous.save_array +::: zarr.api.asynchronous.save_group +::: zarr.api.asynchronous.tree +::: zarr.api.asynchronous.zeros +::: zarr.api.asynchronous.zeros_like diff --git a/docs/api/api_sync.md b/docs/api/api_sync.md new file mode 100644 index 0000000000..730be3449e --- /dev/null +++ b/docs/api/api_sync.md @@ -0,0 +1,32 @@ +--- +title: synchronous +--- + +::: zarr.api.synchronous.array +::: zarr.api.synchronous.consolidate_metadata +::: zarr.api.synchronous.copy +::: zarr.api.synchronous.copy_all +::: zarr.api.synchronous.copy_store +::: zarr.api.synchronous.create +::: zarr.api.synchronous.create_array +::: zarr.api.synchronous.create_hierarchy +::: zarr.api.synchronous.empty +::: zarr.api.synchronous.empty_like +::: zarr.api.synchronous.from_array +::: zarr.api.synchronous.full +::: zarr.api.synchronous.full_like +::: zarr.api.synchronous.group +::: zarr.api.synchronous.load +::: zarr.api.synchronous.ones +::: zarr.api.synchronous.ones_like +::: zarr.api.synchronous.open +::: zarr.api.synchronous.open_array +::: zarr.api.synchronous.open_consolidated +::: zarr.api.synchronous.open_group +::: zarr.api.synchronous.open_like +::: zarr.api.synchronous.save +::: zarr.api.synchronous.save_array +::: zarr.api.synchronous.save_group +::: zarr.api.synchronous.tree +::: zarr.api.synchronous.zeros +::: zarr.api.synchronous.zeros_like diff --git a/docs/api/array.md b/docs/api/array.md new file mode 100644 index 0000000000..ff61cb1fe2 --- /dev/null +++ b/docs/api/array.md @@ -0,0 +1,2 @@ +::: zarr.Array +::: zarr.AsyncArray diff --git a/docs/api/codecs.md b/docs/api/codecs.md new file mode 100644 index 0000000000..f2793875f4 --- /dev/null +++ b/docs/api/codecs.md @@ -0,0 +1,17 @@ +--- +title: codecs +--- + +::: zarr.codecs.BloscCname +::: zarr.codecs.BloscCodec +::: zarr.codecs.BloscShuffle +::: zarr.codecs.BytesCodec +::: zarr.codecs.Crc32cCodec +::: zarr.codecs.Endian +::: zarr.codecs.GzipCodec +::: zarr.codecs.ShardingCodec +::: zarr.codecs.ShardingCodecIndexLocation +::: zarr.codecs.TransposeCodec +::: zarr.codecs.VLenBytesCodec +::: zarr.codecs.VLenUTF8Codec +::: zarr.codecs.ZstdCodec diff --git a/docs/api/config.md b/docs/api/config.md new file mode 100644 index 0000000000..30803918f5 --- /dev/null +++ b/docs/api/config.md @@ -0,0 +1,5 @@ +--- +title: config +--- + +::: zarr.config diff --git a/docs/api/convenience.md b/docs/api/convenience.md new file mode 100644 index 0000000000..f2614e3724 --- /dev/null +++ b/docs/api/convenience.md @@ -0,0 +1,10 @@ +--- +title: convenience +--- + +::: zarr.consolidate_metadata +::: zarr.copy +::: zarr.copy_all +::: zarr.copy_store +::: zarr.print_debug_info +::: zarr.tree diff --git a/docs/api/create.md b/docs/api/create.md new file mode 100644 index 0000000000..971e9c293c --- /dev/null +++ b/docs/api/create.md @@ -0,0 +1,19 @@ +--- +title: create +--- + +::: zarr.array +::: zarr.create +::: zarr.create_array +::: zarr.create_group +::: zarr.create_hierarchy +::: zarr.empty +::: zarr.empty_like +::: zarr.full +::: zarr.full_like +::: zarr.from_array +::: zarr.group +::: zarr.ones +::: zarr.ones_like +::: zarr.zeros +::: zarr.zeros_like diff --git a/docs/api/deprecated/convenience.md b/docs/api/deprecated/convenience.md new file mode 100644 index 0000000000..f7629b7ac6 --- /dev/null +++ b/docs/api/deprecated/convenience.md @@ -0,0 +1,14 @@ +!!! warning "Deprecated" + This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. + +::: zarr.convenience.consolidate_metadata +::: zarr.convenience.copy +::: zarr.convenience.copy_all +::: zarr.convenience.copy_store +::: zarr.convenience.load +::: zarr.convenience.open +::: zarr.convenience.open_consolidated +::: zarr.convenience.save +::: zarr.convenience.save_array +::: zarr.convenience.save_group +::: zarr.convenience.tree diff --git a/docs/api/deprecated/creation.md b/docs/api/deprecated/creation.md new file mode 100644 index 0000000000..1f87c7cf2e --- /dev/null +++ b/docs/api/deprecated/creation.md @@ -0,0 +1,15 @@ +!!! warning "Deprecated" + This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. + +::: zarr.creation.array +::: zarr.creation.create +::: zarr.creation.empty +::: zarr.creation.empty_like +::: zarr.creation.full +::: zarr.creation.full_like +::: zarr.creation.ones +::: zarr.creation.ones_like +::: zarr.creation.open_array +::: zarr.creation.open_like +::: zarr.creation.zeros +::: zarr.creation.zeros_like diff --git a/docs/api/errors.md b/docs/api/errors.md new file mode 100644 index 0000000000..dfdd74c07a --- /dev/null +++ b/docs/api/errors.md @@ -0,0 +1,10 @@ +--- +title: errors +--- + +::: zarr.errors.BaseZarrError +::: zarr.errors.ContainsArrayAndGroupError +::: zarr.errors.ContainsArrayError +::: zarr.errors.ContainsGroupError +::: zarr.errors.MetadataValidationError +::: zarr.errors.NodeTypeValidationError diff --git a/docs/api/group.md b/docs/api/group.md new file mode 100644 index 0000000000..0cf9372de2 --- /dev/null +++ b/docs/api/group.md @@ -0,0 +1,2 @@ +::: zarr.Group +::: zarr.AsyncGroup diff --git a/docs/api/load.md b/docs/api/load.md new file mode 100644 index 0000000000..d6463ca976 --- /dev/null +++ b/docs/api/load.md @@ -0,0 +1,5 @@ +--- +title: load +--- + +::: zarr.load diff --git a/docs/api/open.md b/docs/api/open.md new file mode 100644 index 0000000000..c59f896129 --- /dev/null +++ b/docs/api/open.md @@ -0,0 +1,9 @@ +--- +title: open +--- + +::: zarr.open +::: zarr.open_array +::: zarr.open_consolidated +::: zarr.open_group +::: zarr.open_like diff --git a/docs/api/registry.md b/docs/api/registry.md new file mode 100644 index 0000000000..97262f8920 --- /dev/null +++ b/docs/api/registry.md @@ -0,0 +1,12 @@ +--- +title: registry +--- + +::: zarr.registry.get_buffer_class +::: zarr.registry.get_codec_class +::: zarr.registry.get_ndbuffer_class +::: zarr.registry.get_pipeline_class +::: zarr.registry.register_buffer +::: zarr.registry.register_codec +::: zarr.registry.register_ndbuffer +::: zarr.registry.register_pipeline diff --git a/docs/api/save.md b/docs/api/save.md new file mode 100644 index 0000000000..c611d10a4c --- /dev/null +++ b/docs/api/save.md @@ -0,0 +1,7 @@ +--- +title: save +--- + +::: zarr.save +::: zarr.save_array +::: zarr.save_group diff --git a/docs/api/storage.md b/docs/api/storage.md new file mode 100644 index 0000000000..ac8e94158b --- /dev/null +++ b/docs/api/storage.md @@ -0,0 +1,19 @@ +--- +title: storage +--- + +## Attributes + +::: zarr.storage.StoreLike + +## Classes + +::: zarr.storage.FsspecStore +::: zarr.storage.GpuMemoryStore +::: zarr.storage.LocalStore +::: zarr.storage.LoggingStore +::: zarr.storage.MemoryStore +::: zarr.storage.ObjectStore +::: zarr.storage.StorePath +::: zarr.storage.WrapperStore +::: zarr.storage.ZipStore \ No newline at end of file diff --git a/docs/api/testing.md b/docs/api/testing.md new file mode 100644 index 0000000000..257d0ff59b --- /dev/null +++ b/docs/api/testing.md @@ -0,0 +1,56 @@ +--- +title: testing +--- + +## Buffer + +::: zarr.testing.buffer.NDBufferUsingTestNDArrayLike +::: zarr.testing.buffer.StoreExpectingTestBuffer +::: zarr.testing.buffer.TestBuffer + +## Stateful + +::: zarr.testing.stateful.MAX_BINARY_SIZE +::: zarr.testing.stateful.SyncStoreWrapper +::: zarr.testing.stateful.ZarrHierarchyStateMachine +::: zarr.testing.stateful.ZarrStoreStateMachine +::: zarr.testing.stateful.split_prefix_name + +## Store + +::: zarr.testing.store.StoreTests + +## Strategies + +::: zarr.testing.strategies.array_names +::: zarr.testing.strategies.array_shapes +::: zarr.testing.strategies.attrs +::: zarr.testing.strategies.compressors +::: zarr.testing.strategies.node_names +::: zarr.testing.strategies.short_node_names +::: zarr.testing.strategies.stores +::: zarr.testing.strategies.zarr_formats +::: zarr.testing.strategies.zarr_key_chars +::: zarr.testing.strategies.array_metadata +::: zarr.testing.strategies.arrays +::: zarr.testing.strategies.basic_indices +::: zarr.testing.strategies.chunk_shapes +::: zarr.testing.strategies.clear_store +::: zarr.testing.strategies.dimension_names +::: zarr.testing.strategies.end_slices +::: zarr.testing.strategies.is_negative_slice +::: zarr.testing.strategies.key_ranges +::: zarr.testing.strategies.keys +::: zarr.testing.strategies.np_array_and_chunks +::: zarr.testing.strategies.numpy_arrays +::: zarr.testing.strategies.orthogonal_indices +::: zarr.testing.strategies.paths +::: zarr.testing.strategies.safe_unicode_for_dtype +::: zarr.testing.strategies.shard_shapes +::: zarr.testing.strategies.simple_arrays +::: zarr.testing.strategies.v2_dtypes +::: zarr.testing.strategies.v3_dtypes + +## Utils + +::: zarr.testing.utils.assert_bytes_equal diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index 9bb1c48901..0000000000 --- a/docs/conf.py +++ /dev/null @@ -1,379 +0,0 @@ -#!/usr/bin/env python3 -# -# zarr documentation build configuration file, created by -# sphinx-quickstart on Mon May 2 21:40:09 2016. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - - -import os -import sys -from importlib.metadata import version as get_version -from typing import Any - -import sphinx -import sphinx.application - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.append(os.path.abspath("..")) - - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.autosummary", - "sphinx.ext.viewcode", - "sphinx.ext.intersphinx", - 'autoapi.extension', - "numpydoc", - "sphinx_issues", - "sphinx_copybutton", - "sphinx_design", - 'sphinx_reredirects', -] - -issues_github_path = "zarr-developers/zarr-python" - -autoapi_dirs = ['../src/zarr'] -autoapi_add_toctree_entry = False -autoapi_generate_api_docs = True -autoapi_member_order = "groupwise" -autoapi_root = "api" -autoapi_keep_files = True -autoapi_options = [ 'members', 'undoc-members', 'show-inheritance', 'show-module-summary', 'imported-members', ] - -def skip_submodules( - app: sphinx.application.Sphinx, - what: str, - name: str, - obj: object, - skip: bool, - options: dict[str, Any] - ) -> bool: - # Skip documenting zarr.codecs submodules - # codecs are documented in the main zarr.codecs namespace - if what == "module" and name.startswith("zarr.codecs.") or name.startswith("zarr.core"): - skip = True - return skip - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The main toctree document. -main_doc = "index" - -# General information about the project. -project = "zarr" -copyright = "2025, Zarr Developers" -author = "Zarr Developers" - -version = get_version("zarr") -release = get_version("zarr") - -redirects = { - "spec": "https://zarr-specs.readthedocs.io", - "spec/v1": 'https://zarr-specs.readthedocs.io/en/latest/v1/v1.0.html', - "spec/v2": "https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html", - "spec/v3": "https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html", - "license": "https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt", - "tutorial": "user-guide", - "getting-started": "quickstart", - "roadmap": "developers/roadmap.html", - "installation": "user-guide/installation.html", - "api": "api/zarr/index", - "release": "release-notes" -} - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = "en" - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "talks"] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -# keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = "pydata_sphinx_theme" - -html_favicon = "_static/logo1.png" - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -html_theme_options = { - "github_url": "https://github.com/zarr-developers/zarr-python", - "twitter_url": "https://twitter.com/zarr_dev", - "icon_links": [ - { - "name": "Zarr Dev", - "url": "https://zarr.dev/", - "icon": "_static/logo1.png", - "type": "local", - }, - ], - "collapse_navigation": True, - "navigation_with_keys": False, - "announcement": "Zarr-Python 3 is here! Check out the release announcement here.", -} - -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - -# The name for this set of Sphinx documents. -# " v documentation" by default. -# html_title = 'zarr v@@' - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -html_logo = "_static/logo_horizontal.svg" - - -def setup(app: sphinx.application.Sphinx) -> None: - app.add_css_file("custom.css") - app.connect("autoapi-skip-member", skip_submodules) - - -# The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] -html_js_files = [ - "custom.js", -] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -# html_extra_path = [] - -# If not None, a 'Last updated on:' timestamp is inserted at every page -# bottom, using the given strftime format. -# The empty string is equivalent to '%b %d, %Y'. -# html_last_updated_fmt = None - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -html_sidebars = {"tutorial": []} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# html_additional_pages = {} - -# If false, no module index is generated. -# html_domain_indices = True - -# If false, no index is generated. -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' -# html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# 'ja' uses this config value. -# 'zh' user can custom change `jieba` dictionary path. -# html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -# html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = "zarrdoc" - -maximum_signature_line_length = 80 - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - #'preamble': '', - # Latex figure (float) alignment - #'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (main_doc, "zarr.tex", "Zarr-Python", author, "manual"), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# latex_use_parts = False - -# If true, show page references after internal links. -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# latex_appendices = [] - -# If false, no module index is generated. -# latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [(main_doc, "zarr", "Zarr-Python", [author], 1)] - -# If true, show URL addresses after external links. -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - main_doc, - "zarr", - "Zarr-Python", - author, - "zarr", - "One line description of project.", - "Miscellaneous", - ), -] - -# Documents to append as an appendix to all manuals. -# texinfo_appendices = [] - -# If false, no module index is generated. -# texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -# texinfo_no_detailmenu = False - - -# Example configuration for intersphinx: refer to the Python standard library. -# use in refs e.g: -# :ref:`comparison manual ` -intersphinx_mapping = { - "python": ("https://docs.python.org/3/", None), - "numpy": ("https://numpy.org/doc/stable/", None), - "numcodecs": ("https://numcodecs.readthedocs.io/en/stable/", None), - "obstore": ("https://developmentseed.org/obstore/latest/", None), -} - - -# sphinx-copybutton configuration -copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " -copybutton_line_continuation_character = "\\" -copybutton_prompt_is_regexp = True diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000000..b53bffe8a0 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,257 @@ +# Contributing + +Zarr is a community maintained project. We welcome contributions in the form of bug reports, bug fixes, documentation, enhancement proposals and more. This page provides information on how best to contribute. + +## Asking for help + +If you have a question about how to use Zarr, please post your question on StackOverflow using the ["zarr" tag](https://stackoverflow.com/questions/tagged/zarr). If you don't get a response within a day or two, feel free to raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) including a link to your StackOverflow question. We will try to respond to questions as quickly as possible, but please bear in mind that there may be periods where we have limited time to answer questions due to other commitments. + +## Bug reports + +If you find a bug, please raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new). Please include the following items in a bug report: + +1. A minimal, self-contained snippet of Python code reproducing the problem. You can format the code nicely using markdown, e.g.: + +```python +import zarr +g = zarr.group() +# etc. +``` + +2. An explanation of why the current behaviour is wrong/not desired, and what you expect instead. + +3. Information about the version of Zarr, along with versions of dependencies and the Python interpreter, and installation information. The version of Zarr can be obtained from the `zarr.__version__` property. Please also state how Zarr was installed, e.g., "installed via pip into a virtual environment", or "installed using conda". Information about other packages installed can be obtained by executing `pip freeze` (if using pip to install packages) or `conda env export` (if using conda to install packages) from the operating system command prompt. The version of the Python interpreter can be obtained by running a Python interactive session, e.g.: + +``` +$ python + Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin +``` + +## Enhancement proposals + +If you have an idea about a new feature or some other improvement to Zarr, please raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) first to discuss. + +We very much welcome ideas and suggestions for how to improve Zarr, but please bear in mind that we are likely to be conservative in accepting proposals for new features. The reasons for this are that we would like to keep the Zarr code base lean and focused on a core set of functionalities, and available time for development, review and maintenance of new features is limited. But if you have a great idea, please don't let that stop you from posting it on GitHub, just please don't be offended if we respond cautiously. + +## Contributing code and/or documentation + +### Forking the repository + +The Zarr source code is hosted on GitHub at the following location: + +* [https://github.com/zarr-developers/zarr-python](https://github.com/zarr-developers/zarr-python) + +You will need your own fork to work on the code. Go to the link above and hit the ["Fork"](https://github.com/zarr-developers/zarr-python/fork) button. Then clone your fork to your local machine: + +```bash +$ git clone git@github.com:your-user-name/zarr-python.git +$ cd zarr-python +$ git remote add upstream git@github.com:zarr-developers/zarr-python.git +``` + +### Creating a development environment + +To work with the Zarr source code, it is recommended to use [hatch](https://hatch.pypa.io/latest/index.html) to create and manage development environments. Hatch will automatically install all Zarr dependencies using the same versions as are used by the core developers and continuous integration services. Assuming you have a Python 3 interpreter already installed, and you have cloned the Zarr source code and your current working directory is the root of the repository, you can do something like the following: + +```bash +$ pip install hatch +$ hatch env show # list all available environments +``` + +To verify that your development environment is working, you can run the unit tests for one of the test environments, e.g.: + +```bash +$ hatch env run --env test.py3.12-2.1-optional run-pytest +``` + +### Creating a branch + +Before you do any new work or submit a pull request, please open an issue on GitHub to report the bug or propose the feature you'd like to add. + +It's best to synchronize your fork with the upstream repository, then create a new, separate branch for each piece of work you want to do. E.g.: + +```bash +git checkout main +git fetch upstream +git checkout -b shiny-new-feature upstream/main +git push -u origin shiny-new-feature +``` + +This changes your working directory to the 'shiny-new-feature' branch. Keep any changes in this branch specific to one bug or feature so it is clear what the branch brings to Zarr. + +To update this branch with latest code from Zarr, you can retrieve the changes from the main branch and perform a rebase: + +```bash +git fetch upstream +git rebase upstream/main +``` + +This will replay your commits on top of the latest Zarr git main. If this leads to merge conflicts, these need to be resolved before submitting a pull request. Alternatively, you can merge the changes in from upstream/main instead of rebasing, which can be simpler: + +```bash +git pull upstream main +``` + +Again, any conflicts need to be resolved before submitting a pull request. + +### Running the test suite + +Zarr includes a suite of unit tests. The simplest way to run the unit tests is to activate your development environment (see [creating a development environment](#creating-a-development-environment) above) and invoke: + +```bash +$ hatch env run --env test.py3.12-2.1-optional run-pytest +``` + +All tests are automatically run via GitHub Actions for every pull request and must pass before code can be accepted. Test coverage is also collected automatically via the Codecov service. + +> **Note:** Previous versions of Zarr-Python made extensive use of doctests. These tests were not maintained during the 3.0 refactor but may be brought back in the future. See issue #2614 for more details. + +### Code standards - using pre-commit + +All code must conform to the PEP8 standard. Regarding line length, lines up to 100 characters are allowed, although please try to keep under 90 wherever possible. + +`Zarr` uses a set of `pre-commit` hooks and the `pre-commit` bot to format, type-check, and prettify the codebase. `pre-commit` can be installed locally by running: + +```bash +$ python -m pip install pre-commit +``` + +The hooks can be installed locally by running: + +```bash +$ pre-commit install +``` + +This would run the checks every time a commit is created locally. These checks will also run on every commit pushed to an open PR, resulting in some automatic styling fixes by the `pre-commit` bot. The checks will by default only run on the files modified by a commit, but the checks can be triggered for all the files by running: + +```bash +$ pre-commit run --all-files +``` + +If you would like to skip the failing checks and push the code for further discussion, use the `--no-verify` option with `git commit`. + +### Test coverage + +> **Note:** Test coverage for Zarr-Python 3 is currently not at 100%. This is a known issue and help is welcome to bring test coverage back to 100%. See issue #2613 for more details. + +Zarr strives to maintain 100% test coverage under the latest Python stable release. Both unit tests and docstring doctests are included when computing coverage. Running: + +```bash +$ hatch env run --env test.py3.12-2.1-optional run-coverage +``` + +will automatically run the test suite with coverage and produce a XML coverage report. This should be 100% before code can be accepted into the main code base. + +You can also generate an HTML coverage report by running: + +```bash +$ hatch env run --env test.py3.12-2.1-optional run-coverage-html +``` + +When submitting a pull request, coverage will also be collected across all supported Python versions via the Codecov service, and will be reported back within the pull request. Codecov coverage must also be 100% before code can be accepted. + +### Documentation + +Docstrings for user-facing classes and functions should follow the [numpydoc](https://numpydoc.readthedocs.io/en/stable/format.html#docstring-standard) standard, including sections for Parameters and Examples. All examples should run and pass as doctests under Python 3.11. + +Zarr uses mkdocs for documentation, hosted on readthedocs.org. Documentation is written in the Markdown markup language (.md files) in the `docs` folder. The documentation consists both of prose and API documentation. All user-facing classes and functions are included in the API documentation, under the `docs/api` folder using the [mkdocstrings](https://mkdocstrings.github.io/) extension. Add any new public functions or classes to the relevant markdown file in `docs/api/*.md`. Any new features or important usage information should be included in the user-guide (`docs/user-guide`). Any changes should also be included as a new file in the `changes` directory. + +The documentation can be built locally by running: + +```bash +$ hatch --env docs run build +``` + +The resulting built documentation will be available in the `docs/_build/html` folder. + +Hatch can also be used to serve continuously updating version of the documentation during development at [http://0.0.0.0:8000/](http://0.0.0.0:8000/). This can be done by running: + +```bash +$ hatch --env docs run serve +``` + +### Changelog + +zarr-python uses [towncrier](https://towncrier.readthedocs.io/en/stable/tutorial.html) to manage release notes. Most pull requests should include at least one news fragment describing the changes. To add a release note, you'll need the GitHub issue or pull request number and the type of your change (`feature`, `bugfix`, `doc`, `removal`, `misc`). With that, run `towncrier create` with your development environment, which will prompt you for the issue number, change type, and the news text: + +```bash +towncrier create +``` + +Alternatively, you can manually create the files in the `changes` directory using the naming convention `{issue-number}.{change-type}.md`. + +See the [towncrier](https://towncrier.readthedocs.io/en/stable/tutorial.html) docs for more. + +## Development best practices, policies and procedures + +The following information is mainly for core developers, but may also be of interest to contributors. + +### Merging pull requests + +Pull requests submitted by an external contributor should be reviewed and approved by at least one core developer before being merged. Ideally, pull requests submitted by a core developer should be reviewed and approved by at least one other core developer before being merged. + +Pull requests should not be merged until all CI checks have passed (GitHub Actions Codecov) against code that has had the latest main merged in. + +### Compatibility and versioning policies + +#### Versioning + +Versions of this library are identified by a triplet of integers with the form `..`, for example `3.0.4`. A release of `zarr-python` is associated with a new version identifier. That new identifier is generated by incrementing exactly one of the components of the previous version identifier by 1. When incrementing the `major` component of the version identifier, the `minor` and `patch` components is reset to 0. When incrementing the minor component, the patch component is reset to 0. + +Releases are classified by the library changes contained in that release. This classification determines which component of the version identifier is incremented on release. + +* **major** releases (for example, `2.18.0` -> `3.0.0`) are for changes that will require extensive adaptation efforts from many users and downstream projects. For example, breaking changes to widely-used user-facing APIs should only be applied in a major release. + + Users and downstream projects should carefully consider the impact of a major release before adopting it. In advance of a major release, developers should communicate the scope of the upcoming changes, and help users prepare for them. + +* **minor** releases (for example, `3.0.0` -> `3.1.0`) are for changes that do not require significant effort from most users or downstream downstream projects to respond to. API changes are possible in minor releases if the burden on users imposed by those changes is sufficiently small. + + For example, a recently released API may need fixes or refinements that are breaking, but low impact due to the recency of the feature. Such API changes are permitted in a minor release. + + Minor releases are safe for most users and downstream projects to adopt. + +* **patch** releases (for example, `3.1.0` -> `3.1.1`) are for changes that contain no breaking or behaviour changes for downstream projects or users. Examples of changes suitable for a patch release are bugfixes and documentation improvements. + + Users should always feel safe upgrading to a the latest patch release. + +Note that this versioning scheme is not consistent with [Semantic Versioning](https://semver.org/). Contrary to SemVer, the Zarr library may release breaking changes in `minor` releases, or even `patch` releases under exceptional circumstances. But we should strive to avoid doing so. + +A better model for our versioning scheme is [Intended Effort Versioning](https://jacobtomlinson.dev/effver/), or "EffVer". The guiding principle off EffVer is to categorize releases based on the *expected effort required to upgrade to that release*. + +Zarr developers should make changes as smooth as possible for users. This means making backwards-compatible changes wherever possible. When a backwards-incompatible change is necessary, users should be notified well in advance, e.g. via informative deprecation warnings. + +#### Data format compatibility + +The Zarr library is an implementation of a file format standard defined externally -- see the [Zarr specifications website](https://zarr-specs.readthedocs.io) for the list of Zarr file format specifications. + +If an existing Zarr format version changes, or a new version of the Zarr format is released, then the Zarr library will generally require changes. It is very likely that a new Zarr format will require extensive breaking changes to the Zarr library, and so support for a new Zarr format in the Zarr library will almost certainly come in new `major` release. When the Zarr library adds support for a new Zarr format, there may be a period of accelerated changes as developers refine newly added APIs and deprecate old APIs. In such a transitional phase breaking changes may be more frequent than usual. + +### Release procedure + +> **Note:** Most of the release process is now handled by GitHub workflow which should automatically push a release to PyPI if a tag is pushed. + +#### Pre-release + +1. Make sure that all pull requests which will be included in the release have been properly documented as changelog files in `changes`. +2. Run `towncrier build --version x.y.z` to create the changelog. + +#### Releasing + +To make a new release, go to https://github.com/zarr-developers/zarr-python/releases and click "Draft a new release". Choose a version number prefixed with a `v` (e.g. `v0.0.0`). For pre-releases, include the appropriate suffix (e.g. `v0.0.0a1` or `v0.0.0rc2`). + +Set the description of the release to: + +``` +See release notes https://zarr.readthedocs.io/en/stable/release-notes.html#release-0-0-0 +``` + +replacing the correct version numbers. For pre-release versions, the URL should omit the pre-release suffix, e.g. "a1" or "rc1". + +Click on "Generate release notes" to auto-file the description. + +After creating the release, the documentation will be built on https://readthedocs.io. Full releases will be available under [/stable](https://zarr.readthedocs.io/en/stable) while pre-releases will be available under [/latest](https://zarr.readthedocs.io/en/latest). + +#### Post-release + +- Review and merge the pull request on the [conda-forge feedstock](https://github.com/conda-forge/zarr-feedstock) that will be automatically generated. +- Create a new "Unreleased" section in the release notes diff --git a/docs/developers/contributing.rst b/docs/developers/contributing.rst deleted file mode 100644 index fa65f71d48..0000000000 --- a/docs/developers/contributing.rst +++ /dev/null @@ -1,386 +0,0 @@ -.. _dev-guide-contributing: - -Contributing to Zarr -==================== - -Zarr is a community maintained project. We welcome contributions in the form of bug -reports, bug fixes, documentation, enhancement proposals and more. This page provides -information on how best to contribute. - -Asking for help ---------------- - -If you have a question about how to use Zarr, please post your question on -StackOverflow using the `"zarr" tag `_. -If you don't get a response within a day or two, feel free to raise a `GitHub issue -`_ including a link to your StackOverflow -question. We will try to respond to questions as quickly as possible, but please bear -in mind that there may be periods where we have limited time to answer questions -due to other commitments. - -Bug reports ------------ - -If you find a bug, please raise a `GitHub issue -`_. Please include the following items in -a bug report: - -1. A minimal, self-contained snippet of Python code reproducing the problem. You can - format the code nicely using markdown, e.g.:: - - - ```python - import zarr - g = zarr.group() - # etc. - ``` - -2. An explanation of why the current behaviour is wrong/not desired, and what you - expect instead. - -3. Information about the version of Zarr, along with versions of dependencies and the - Python interpreter, and installation information. The version of Zarr can be obtained - from the ``zarr.__version__`` property. Please also state how Zarr was installed, - e.g., "installed via pip into a virtual environment", or "installed using conda". - Information about other packages installed can be obtained by executing ``pip freeze`` - (if using pip to install packages) or ``conda env export`` (if using conda to install - packages) from the operating system command prompt. The version of the Python - interpreter can be obtained by running a Python interactive session, e.g.:: - - $ python - Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin - -Enhancement proposals ---------------------- - -If you have an idea about a new feature or some other improvement to Zarr, please raise a -`GitHub issue `_ first to discuss. - -We very much welcome ideas and suggestions for how to improve Zarr, but please bear in -mind that we are likely to be conservative in accepting proposals for new features. The -reasons for this are that we would like to keep the Zarr code base lean and focused on -a core set of functionalities, and available time for development, review and maintenance -of new features is limited. But if you have a great idea, please don't let that stop -you from posting it on GitHub, just please don't be offended if we respond cautiously. - -Contributing code and/or documentation --------------------------------------- - -Forking the repository -~~~~~~~~~~~~~~~~~~~~~~ - -The Zarr source code is hosted on GitHub at the following location: - -* `https://github.com/zarr-developers/zarr-python `_ - -You will need your own fork to work on the code. Go to the link above and hit -the `"Fork" `_ button. -Then clone your fork to your local machine:: - - $ git clone git@github.com:your-user-name/zarr-python.git - $ cd zarr-python - $ git remote add upstream git@github.com:zarr-developers/zarr-python.git - -Creating a development environment -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To work with the Zarr source code, it is recommended to use -`hatch `_ to create and manage development -environments. Hatch will automatically install all Zarr dependencies using the same -versions as are used by the core developers and continuous integration services. -Assuming you have a Python 3 interpreter already installed, and you have cloned the -Zarr source code and your current working directory is the root of the repository, -you can do something like the following:: - - $ pip install hatch - $ hatch env show # list all available environments - -To verify that your development environment is working, you can run the unit tests -for one of the test environments, e.g.:: - - $ hatch env run --env test.py3.12-2.1-optional run-pytest - -Creating a branch -~~~~~~~~~~~~~~~~~ - -Before you do any new work or submit a pull request, please open an issue on GitHub to -report the bug or propose the feature you'd like to add. - -It's best to synchronize your fork with the upstream repository, then create a -new, separate branch for each piece of work you want to do. E.g.:: - - git checkout main - git fetch upstream - git checkout -b shiny-new-feature upstream/main - git push -u origin shiny-new-feature - -This changes your working directory to the 'shiny-new-feature' branch. Keep any changes in -this branch specific to one bug or feature so it is clear what the branch brings to -Zarr. - -To update this branch with latest code from Zarr, you can retrieve the changes from -the main branch and perform a rebase:: - - git fetch upstream - git rebase upstream/main - -This will replay your commits on top of the latest Zarr git main. If this leads to -merge conflicts, these need to be resolved before submitting a pull request. -Alternatively, you can merge the changes in from upstream/main instead of rebasing, -which can be simpler:: - - git pull upstream main - -Again, any conflicts need to be resolved before submitting a pull request. - -Running the test suite -~~~~~~~~~~~~~~~~~~~~~~ - -Zarr includes a suite of unit tests. The simplest way to run the unit tests -is to activate your development environment -(see `creating a development environment`_ above) and invoke:: - - $ hatch env run --env test.py3.12-2.1-optional run-pytest - -All tests are automatically run via GitHub Actions for every pull -request and must pass before code can be accepted. Test coverage is -also collected automatically via the Codecov service. - -.. note:: - Previous versions of Zarr-Python made extensive use of doctests. These tests were - not maintained during the 3.0 refactor but may be brought back in the future. - See :issue:`2614` for more details. - -Code standards - using pre-commit -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -All code must conform to the PEP8 standard. Regarding line length, lines up to 100 -characters are allowed, although please try to keep under 90 wherever possible. - -``Zarr`` uses a set of ``pre-commit`` hooks and the ``pre-commit`` bot to format, -type-check, and prettify the codebase. ``pre-commit`` can be installed locally by -running:: - - $ python -m pip install pre-commit - -The hooks can be installed locally by running:: - - $ pre-commit install - -This would run the checks every time a commit is created locally. These checks will also run -on every commit pushed to an open PR, resulting in some automatic styling fixes by the -``pre-commit`` bot. The checks will by default only run on the files modified by a commit, -but the checks can be triggered for all the files by running:: - - $ pre-commit run --all-files - -If you would like to skip the failing checks and push the code for further discussion, use -the ``--no-verify`` option with ``git commit``. - - -Test coverage -~~~~~~~~~~~~~ - -.. note:: - Test coverage for Zarr-Python 3 is currently not at 100%. This is a known issue and help - is welcome to bring test coverage back to 100%. See :issue:`2613` for more details. - -Zarr strives to maintain 100% test coverage under the latest Python stable release -Both unit tests and docstring doctests are included when computing coverage. Running:: - - $ hatch env run --env test.py3.12-2.1-optional run-coverage - -will automatically run the test suite with coverage and produce a XML coverage report. -This should be 100% before code can be accepted into the main code base. - -You can also generate an HTML coverage report by running:: - - $ hatch env run --env test.py3.12-2.1-optional run-coverage-html - -When submitting a pull request, coverage will also be collected across all supported -Python versions via the Codecov service, and will be reported back within the pull -request. Codecov coverage must also be 100% before code can be accepted. - -Documentation -~~~~~~~~~~~~~ - -Docstrings for user-facing classes and functions should follow the -`numpydoc -`_ -standard, including sections for Parameters and Examples. All examples -should run and pass as doctests under Python 3.11. - -Zarr uses Sphinx for documentation, hosted on readthedocs.org. Documentation is -written in the RestructuredText markup language (.rst files) in the ``docs`` folder. -The documentation consists both of prose and API documentation. All user-facing classes -and functions are included in the API documentation, under the ``docs/api`` folder -using the `autodoc `_ -extension to sphinx. Any new features or important usage information should be included in the -user-guide (``docs/user-guide``). Any changes should also be included as a new file in the -:file:`changes` directory. - -The documentation can be built locally by running:: - - $ hatch --env docs run build - -The resulting built documentation will be available in the ``docs/_build/html`` folder. - -Hatch can also be used to serve continuously updating version of the documentation -during development at `http://0.0.0.0:8000/ `_. This can be done by running:: - - $ hatch --env docs run serve - -.. _changelog: - -Changelog -~~~~~~~~~ - -zarr-python uses `towncrier`_ to manage release notes. Most pull requests should -include at least one news fragment describing the changes. To add a release -note, you'll need the GitHub issue or pull request number and the type of your -change (``feature``, ``bugfix``, ``doc``, ``removal``, ``misc``). With that, run -```towncrier create``` with your development environment, which will prompt you -for the issue number, change type, and the news text:: - - towncrier create - -Alternatively, you can manually create the files in the ``changes`` directory -using the naming convention ``{issue-number}.{change-type}.rst``. - -See the `towncrier`_ docs for more. - -.. _towncrier: https://towncrier.readthedocs.io/en/stable/tutorial.html - -Development best practices, policies and procedures ---------------------------------------------------- - -The following information is mainly for core developers, but may also be of interest to -contributors. - -Merging pull requests -~~~~~~~~~~~~~~~~~~~~~ - -Pull requests submitted by an external contributor should be reviewed and approved by at least -one core developer before being merged. Ideally, pull requests submitted by a core developer -should be reviewed and approved by at least one other core developer before being merged. - -Pull requests should not be merged until all CI checks have passed (GitHub Actions -Codecov) against code that has had the latest main merged in. - -Compatibility and versioning policies -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Versioning -"""""""""" -Versions of this library are identified by a triplet of integers with the form -``..``, for example ``3.0.4``. A release of ``zarr-python`` is associated with a new -version identifier. That new identifier is generated by incrementing exactly one of the components of -the previous version identifier by 1. When incrementing the ``major`` component of the version identifier, -the ``minor`` and ``patch`` components is reset to 0. When incrementing the minor component, -the patch component is reset to 0. - -Releases are classified by the library changes contained in that release. This classification -determines which component of the version identifier is incremented on release. - -* ``major`` releases (for example, ``2.18.0`` -> ``3.0.0``) are for changes that will - require extensive adaptation efforts from many users and downstream projects. - For example, breaking changes to widely-used user-facing APIs should only be applied in a major release. - - - Users and downstream projects should carefully consider the impact of a major release before - adopting it. - In advance of a major release, developers should communicate the scope of the upcoming changes, - and help users prepare for them. - -* ``minor`` releases (or example, ``3.0.0`` -> ``3.1.0``) are for changes that do not require - significant effort from most users or downstream downstream projects to respond to. API changes - are possible in minor releases if the burden on users imposed by those changes is sufficiently small. - - For example, a recently released API may need fixes or refinements that are breaking, but low impact - due to the recency of the feature. Such API changes are permitted in a minor release. - - - Minor releases are safe for most users and downstream projects to adopt. - - -* ``patch`` releases (for example, ``3.1.0`` -> ``3.1.1``) are for changes that contain no breaking - or behaviour changes for downstream projects or users. Examples of changes suitable for a patch release are - bugfixes and documentation improvements. - - - Users should always feel safe upgrading to a the latest patch release. - -Note that this versioning scheme is not consistent with `Semantic Versioning `_. -Contrary to SemVer, the Zarr library may release breaking changes in ``minor`` releases, or even -``patch`` releases under exceptional circumstances. But we should strive to avoid doing so. - -A better model for our versioning scheme is `Intended Effort Versioning `_, -or "EffVer". The guiding principle off EffVer is to categorize releases based on the *expected effort -required to upgrade to that release*. - -Zarr developers should make changes as smooth as possible for users. This means making -backwards-compatible changes wherever possible. When a backwards-incompatible change is necessary, -users should be notified well in advance, e.g. via informative deprecation warnings. - -Data format compatibility -^^^^^^^^^^^^^^^^^^^^^^^^^ - -The Zarr library is an implementation of a file format standard defined externally -- -see the `Zarr specifications website `_ for the list of -Zarr file format specifications. - - -If an existing Zarr format version changes, or a new version of the Zarr format is released, then -the Zarr library will generally require changes. It is very likely that a new Zarr format will -require extensive breaking changes to the Zarr library, and so support for a new Zarr format in the -Zarr library will almost certainly come in new ``major`` release. -When the Zarr library adds support for a new Zarr format, there may be a period of accelerated -changes as developers refine newly added APIs and deprecate old APIs. In such a transitional phase -breaking changes may be more frequent than usual. - - -Release procedure -~~~~~~~~~~~~~~~~~ - -.. note:: - - Most of the release process is now handled by GitHub workflow which should - automatically push a release to PyPI if a tag is pushed. - -Pre-release -""""""""""" -1. Make sure that all pull requests which will be included in the release - have been properly documented as changelog files in :file:`changes`. -2. Run ``towncrier build --version x.y.z`` to create the changelog. - -Releasing -""""""""" -To make a new release, go to -https://github.com/zarr-developers/zarr-python/releases and -click "Draft a new release". Choose a version number prefixed -with a `v` (e.g. `v0.0.0`). For pre-releases, include the -appropriate suffix (e.g. `v0.0.0a1` or `v0.0.0rc2`). - - -Set the description of the release to:: - - See release notes https://zarr.readthedocs.io/en/stable/release-notes.html#release-0-0-0 - -replacing the correct version numbers. For pre-release versions, -the URL should omit the pre-release suffix, e.g. "a1" or "rc1". - -Click on "Generate release notes" to auto-file the description. - -After creating the release, the documentation will be built on -https://readthedocs.io. Full releases will be available under -`/stable `_ while -pre-releases will be available under -`/latest `_. - -Post-release -"""""""""""" - -- Review and merge the pull request on the - `conda-forge feedstock `_ that will be - automatically generated. -- Create a new "Unreleased" section in the release notes diff --git a/docs/developers/index.rst b/docs/developers/index.rst deleted file mode 100644 index 4bccb3a469..0000000000 --- a/docs/developers/index.rst +++ /dev/null @@ -1,9 +0,0 @@ - -Developer's Guide ------------------ - -.. toctree:: - :maxdepth: 1 - - contributing - roadmap diff --git a/docs/developers/roadmap.rst b/docs/developers/roadmap.rst deleted file mode 100644 index d9fc32b775..0000000000 --- a/docs/developers/roadmap.rst +++ /dev/null @@ -1,696 +0,0 @@ -Roadmap -======= - -- Status: active -- Author: Joe Hamman -- Created On: October 31, 2023 -- Input from: - - - Davis Bennett / @d-v-b - - Norman Rzepka / @normanrz - - Deepak Cherian @dcherian - - Brian Davis / @monodeldiablo - - Oliver McCormack / @olimcc - - Ryan Abernathey / @rabernat - - Jack Kelly / @JackKelly - - Martin Durrant / @martindurant - -.. note:: - - This document was written in the early stages of the 3.0 refactor. Some - aspects of the design have changed since this was originally written. - Questions and discussion about the contents of this document should be directed to - `this GitHub Discussion `__. - -Introduction ------------- - -This document lays out a design proposal for version 3.0 of the -`Zarr-Python `__ package. A -specific focus of the design is to bring Zarr-Python’s API up to date -with the `Zarr V3 -specification `__, -with the hope of enabling the development of the many features and -extensions that motivated the V3 Spec. The ideas presented here are -expected to result in a major release of Zarr-Python (version 3.0) -including significant a number of breaking API changes. For clarity, -“V3” will be used to describe the version of the Zarr specification and -“3.0” will be used to describe the release tag of the Zarr-Python -project. - -Current status of V3 in Zarr-Python -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -During the development of the V3 Specification, a `prototype -implementation `__ -was added to the Zarr-Python library. Since that implementation, the V3 -spec evolved in significant ways and as a result, the Zarr-Python -library is now out of sync with the approved spec. Downstream libraries -(e.g. `Xarray `__) have added support -for this implementation and will need to migrate to the accepted spec -when its available in Zarr-Python. - -Goals ------ - -- Provide a complete implementation of Zarr V3 through the Zarr-Python - API -- Clear the way for exciting extensions / ZEPs - (i.e. `sharding `__, - `variable chunking `__, - etc.) -- Provide a developer API that can be used to implement and register V3 - extensions -- Improve the performance of Zarr-Python by streamlining the interface - between the Store layer and higher level APIs (e.g. Groups and - Arrays) -- Clean up the internal and user facing APIs -- Improve code quality and robustness (e.g. achieve 100% type hint - coverage) -- Align the Zarr-Python array API with the `array API - Standard `__ - -Examples of what 3.0 will enable? ---------------------------------- - -1. Reading and writing V3 spec-compliant groups and arrays -2. V3 extensions including sharding and variable chunking. -3. Improved performance by leveraging concurrency when - creating/reading/writing to stores (imagine a - ``create_hierarchy(zarr_objects)`` function). -4. User-developed extensions (e.g. storage-transformers) can be - registered with Zarr-Python at runtime - -Non-goals (of this document) ----------------------------- - -- Implementation of any unaccepted Zarr V3 extensions -- Major revisions to the Zarr V3 spec - -Requirements ------------- - -1. Read and write spec compliant V2 and V3 data -2. Limit unnecessary traffic to/from the store -3. Cleanly define the Array/Group/Store abstractions -4. Cleanly define how V2 will be supported going forward -5. Provide a clear roadmap to help users upgrade to 3.0 -6. Developer tools / hooks for registering extensions - -Design ------- - -Async API -~~~~~~~~~ - -Zarr-Python is an IO library. As such, supporting concurrent action -against the storage layer is critical to achieving acceptable -performance. The Zarr-Python 2 was not designed with asynchronous -computation in mind and as a result has struggled to effectively -leverage the benefits of concurrency. At one point, ``getitems`` and -``setitems`` support was added to the Zarr store model but that is only -used for operating on a set of chunks in a single variable. - -With Zarr-Python 3.0, we have the opportunity to revisit this design. -The proposal here is as follows: - -1. The ``Store`` interface will be entirely async. -2. On top of the async ``Store`` interface, we will provide an - ``AsyncArray`` and ``AsyncGroup`` interface. -3. Finally, the primary user facing API will be synchronous ``Array`` - and ``Group`` classes that wrap the async equivalents. - -**Examples** - -- **Store** - - .. code:: python - - class Store: - ... - async def get(self, key: str) -> bytes: - ... - async def get_partial_values(self, key_ranges: List[Tuple[str, Tuple[int, Optional[int]]]]) -> bytes: - ... - # (no sync interface here) - -- **Array** - - .. code:: python - - class AsyncArray: - ... - - async def getitem(self, selection: Selection) -> np.ndarray: - # the core logic for getitem goes here - - class Array: - _async_array: AsyncArray - - def __getitem__(self, selection: Selection) -> np.ndarray: - return sync(self._async_array.getitem(selection)) - -- **Group** - - .. code:: python - - class AsyncGroup: - ... - - async def create_group(self, path: str, **kwargs) -> AsyncGroup: - # the core logic for create_group goes here - - class Group: - _async_group: AsyncGroup - - def create_group(self, path: str, **kwargs) -> Group: - return sync(self._async_group.create_group(path, **kwargs)) - - **Internal Synchronization API** - -With the ``Store`` and core ``AsyncArray``/ ``AsyncGroup`` classes being -predominantly async, Zarr-Python will need an internal API to provide a -synchronous API. The proposal here is to use the approach in -`fsspec `__ -to provide a high-level ``sync`` function that takes an ``awaitable`` -and runs it in its managed IO Loop / thread. - -| **FAQ** 1. Why two levels of Arrays/groups? a. First, this is an - intentional decision and departure from the current Zarrita - implementation b. The idea is that users rarely want to mix - interfaces. Either they are working within an async context (currently - quite rare) or they are in a typical synchronous context. c. Splitting - the two will allow us to clearly define behavior on the ``AsyncObj`` - and simply wrap it in the ``SyncObj``. 2. What if a store is only has - a synchronous backend? a. First off, this is expected to be a fairly - rare occurrence. Most storage backends have async interfaces. b. But - in the event a storage backend doesn’t have a async interface, there - is nothing wrong with putting synchronous code in ``async`` methods. - There are approaches to enabling concurrent action through wrappers - like AsyncIO’s ``loop.run_in_executor`` (`ref - 1 `__, - `ref 2 `__, `ref - 3 `__, - `ref - 4 `__. -| 3. Will Zarr help manage the async contexts encouraged by some - libraries - (e.g. `AioBotoCore `__)? - a. Many async IO libraries require entering an async context before - interacting with the API. We expect some experimentation to be needed - here but the initial design will follow something close to what fsspec - does (`example in - s3fs `__). - 4. Why not provide a synchronous Store interface? a. We could but this - design is simpler. It would mean supporting it in the ``AsyncGroup`` - and ``AsyncArray`` classes which, may be more trouble than its worth. - Storage backends that do not have an async API will be encouraged to - wrap blocking calls in an async wrapper - (e.g. ``loop.run_in_executor``). - -Store API -~~~~~~~~~ - -The ``Store`` API is specified directly in the V3 specification. All V3 -stores should implement this abstract API, omitting Write and List -support as needed. As described above, all stores will be expected to -expose the required methods as async methods. - -**Example** - -.. code:: python - - class ReadWriteStore: - ... - async def get(self, key: str) -> bytes: - ... - - async def get_partial_values(self, key_ranges: List[Tuple[str, int, int]) -> bytes: - ... - - async def set(self, key: str, value: Union[bytes, bytearray, memoryview]) -> None: - ... # required for writable stores - - async def set_partial_values(self, key_start_values: List[Tuple[str, int, Union[bytes, bytearray, memoryview]]]) -> None: - ... # required for writable stores - - async def list(self) -> List[str]: - ... # required for listable stores - - async def list_prefix(self, prefix: str) -> List[str]: - ... # required for listable stores - - async def list_dir(self, prefix: str) -> List[str]: - ... # required for listable stores - - # additional (optional methods) - async def getsize(self, prefix: str) -> int: - ... - - async def rename(self, src: str, dest: str) -> None - ... - - -Recognizing that there are many Zarr applications today that rely on the -``MutableMapping`` interface supported by Zarr-Python 2, a wrapper store -will be developed to allow existing stores to plug directly into this -API. - -Array API -~~~~~~~~~ - -The user facing array interface will implement a subset of the `Array -API Standard `__. Most of the -computational parts of the Array API Standard don’t fit into Zarr right -now. That’s okay. What matters most is that we ensure we can give -downstream applications a compliant API. - -*Note, Zarr already does most of this so this is more about formalizing -the relationship than a substantial change in API.* - -+------------------------+------------------------+-------------------------+-------------------------+ -| | Included | Not Included | Unknown / Maybe Possible| -+========================+========================+=========================+=========================+ -| **Attributes** | ``dtype`` | ``mT`` | ``device`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``ndim`` | ``T`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``shape`` | | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``size`` | | | -+------------------------+------------------------+-------------------------+-------------------------+ -| **Methods** | ``__getitem__`` | ``__array_namespace__`` | ``to_device`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``__setitem__`` | ``__abs__`` | ``__bool__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``__eq__`` | ``__add__`` | ``__complex__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``__bool__`` | ``__and__`` | ``__dlpack__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__floordiv__`` | ``__dlpack_device__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__ge__`` | ``__float__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__gt__`` | ``__index__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__invert__`` | ``__int__`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__le__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__lshift__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__lt__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__matmul__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__mod__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__mul__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__ne__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__neg__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__or__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__pos__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__pow__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__rshift__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__sub__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__truediv__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | | ``__xor__`` | | -+------------------------+------------------------+-------------------------+-------------------------+ -| **Creation functions** | ``zeros`` | | ``arange`` | -| (``zarr.creation``) | | | | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``zeros_like`` | | ``asarray`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``ones`` | | ``eye`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``ones_like`` | | ``from_dlpack`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``full`` | | ``linspace`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``full_like`` | | ``meshgrid`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``empty`` | | ``tril`` | -+------------------------+------------------------+-------------------------+-------------------------+ -| | ``empty_like`` | | ``triu`` | -+------------------------+------------------------+-------------------------+-------------------------+ - -In addition to the core array API defined above, the Array class should -have the following Zarr specific properties: - -- ``.metadata`` (see Metadata Interface below) -- ``.attrs`` - (pulled from metadata object) -- ``.info`` - (repolicated from existing property †) - -*† In Zarr-Python 2, the info property listed the store to identify -initialized chunks. By default this will be turned off in 3.0 but will -be configurable.* - -**Indexing** - -Zarr-Python currently supports ``__getitem__`` style indexing and the -special ``oindex`` and ``vindex`` indexers. These are not part of the -current Array API standard (see -`data-apis/array-api#669 `__) -but they have been `proposed as a -NEP `__. -Zarr-Python will maintain these in 3.0. - -We are also exploring a new high-level indexing API that will enabled -optimized batch/concurrent loading of many chunks. We expect this to be -important to enable performant loading of data in the context of -sharding. See `this -discussion `__ -for more detail. - -Concurrent indexing across multiple arrays will be possible using the -AsyncArray API. - -**Async and Sync Array APIs** - -Most the logic to support Zarr Arrays will live in the ``AsyncArray`` -class. There are a few notable differences that should be called out. - -=============== ============ -Sync Method Async Method -=============== ============ -``__getitem__`` ``getitem`` -``__setitem__`` ``setitem`` -``__eq__`` ``equals`` -=============== ============ - -**Metadata interface** - -Zarr-Python 2.\* closely mirrors the V2 spec metadata schema in the -Array and Group classes. In 3.0, we plan to move the underlying metadata -representation to a separate interface (e.g. ``Array.metadata``). This -interface will return either a ``V2ArrayMetadata`` or -``V3ArrayMetadata`` object (both will inherit from a parent -``ArrayMetadataABC`` class. The ``V2ArrayMetadata`` and -``V3ArrayMetadata`` classes will be responsible for producing valid JSON -representations of their metadata, and yielding a consistent view to the -``Array`` or ``Group`` class. - -Group API -~~~~~~~~~ - -The main question is how closely we should follow the existing -Zarr-Python implementation / ``MutableMapping`` interface. The table -below shows the primary ``Group`` methods in Zarr-Python 2 and attempts -to identify if and how they would be implemented in 3.0. - -+---------------------+------------------+------------------+-----------------------+ -| V2 Group Methods | ``AsyncGroup`` | ``Group`` | ``h5py_compat.Group`` | -+=====================+==================+==================+=======================+ -| ``__len__`` | ``length`` | ``__len__`` | ``__len__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__iter__`` | ``__aiter__`` | ``__iter__`` | ``__iter__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__contains__`` | ``contains`` | ``__contains__`` | ``__contains__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__getitem__`` | ``getitem`` | ``__getitem__`` | ``__getitem__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__enter__`` | N/A | N/A | ``__enter__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``__exit__`` | N/A | N/A | ``__exit__`` | -+---------------------+------------------+------------------+-----------------------+ -| ``group_keys`` | ``group_keys`` | ``group_keys`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``groups`` | ``groups`` | ``groups`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``array_keys`` | ``array_key`` | ``array_keys`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``arrays`` | ``arrays`` | ``arrays`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``visit`` | ? | ? | ``visit`` | -+---------------------+------------------+------------------+-----------------------+ -| ``visitkeys`` | ? | ? | ? | -+---------------------+------------------+------------------+-----------------------+ -| ``visitvalues`` | ? | ? | ? | -+---------------------+------------------+------------------+-----------------------+ -| ``visititems`` | ? | ? | ``visititems`` | -+---------------------+------------------+------------------+-----------------------+ -| ``tree`` | ``tree`` | ``tree`` | ``Both`` | -+---------------------+------------------+------------------+-----------------------+ -| ``create_group`` | ``create_group`` | ``create_group`` | ``create_group`` | -+---------------------+------------------+------------------+-----------------------+ -| ``require_group`` | N/A | N/A | ``require_group`` | -+---------------------+------------------+------------------+-----------------------+ -| ``create_groups`` | ? | ? | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``require_groups`` | ? | ? | ? | -+---------------------+------------------+------------------+-----------------------+ -| ``create_dataset`` | N/A | N/A | ``create_dataset`` | -+---------------------+------------------+------------------+-----------------------+ -| ``require_dataset`` | N/A | N/A | ``require_dataset`` | -+---------------------+------------------+------------------+-----------------------+ -| ``create`` | ``create_array`` | ``create_array`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``empty`` | ``empty`` | ``empty`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``zeros`` | ``zeros`` | ``zeros`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``ones`` | ``ones`` | ``ones`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``full`` | ``full`` | ``full`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``array`` | ``create_array`` | ``create_array`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``empty_like`` | ``empty_like`` | ``empty_like`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``zeros_like`` | ``zeros_like`` | ``zeros_like`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``ones_like`` | ``ones_like`` | ``ones_like`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``full_like`` | ``full_like`` | ``full_like`` | N/A | -+---------------------+------------------+------------------+-----------------------+ -| ``move`` | ``move`` | ``move`` | ``move`` | -+---------------------+------------------+------------------+-----------------------+ - -**``zarr.h5compat.Group``** --- -Zarr-Python 2.\* made an attempt to align its API with that of -`h5py `__. With 3.0, we will -relax this alignment in favor of providing an explicit compatibility -module (``zarr.h5py_compat``). This module will expose the ``Group`` and -``Dataset`` APIs that map to Zarr-Python’s ``Group`` and ``Array`` -objects. - -Creation API -~~~~~~~~~~~~ - -Zarr-Python 2.\* bundles together the creation and serialization of Zarr -objects. Zarr-Python 3.\* will make it possible to create objects in -memory separate from serializing them. This will specifically enable -writing hierarchies of Zarr objects in a single batch step. For example: - -.. code:: python - - - arr1 = Array(shape=(10, 10), path="foo/bar", dtype="i4", store=store) - arr2 = Array(shape=(10, 10), path="foo/spam", dtype="f8", store=store) - - arr1.save() - arr2.save() - - # or equivalently - - zarr.save_many([arr1 ,arr2]) - -*Note: this batch creation API likely needs additional design effort -prior to implementation.* - -Plugin API -~~~~~~~~~~ - -Zarr V3 was designed to be extensible at multiple layers. Zarr-Python -will support these extensions through a combination of `Abstract Base -Classes `__ (ABCs) and -`Entrypoints `__. - -**ABCs** - -Zarr V3 will expose Abstract base classes for the following objects: - -- ``Store``, ``ReadStore``, ``ReadWriteStore``, ``ReadListStore``, and - ``ReadWriteListStore`` -- ``BaseArray``, ``SynchronousArray``, and ``AsynchronousArray`` -- ``BaseGroup``, ``SynchronousGroup``, and ``AsynchronousGroup`` -- ``Codec``, ``ArrayArrayCodec``, ``ArrayBytesCodec``, - ``BytesBytesCodec`` - -**Entrypoints** - -Lots more thinking here but the idea here is to provide entrypoints for -``data type``, ``chunk grid``, ``chunk key encoding``, ``codecs``, -``storage_transformers`` and ``stores``. These might look something -like: - -:: - - entry_points=""" - [zarr.codecs] - blosc_codec=codec_plugin:make_blosc_codec - zlib_codec=codec_plugin:make_zlib_codec - """ - -Python type hints and static analysis -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Target 100% Mypy coverage in 3.0 source. - -Observability -~~~~~~~~~~~~~ - -A persistent problem in Zarr-Python is diagnosing problems that span -many parts of the stack. To address this in 3.0, we will add a basic -logging framework that can be used to debug behavior at various levels -of the stack. We propose to add the separate loggers for the following -namespaces: - -- ``array`` -- ``group`` -- ``store`` -- ``codec`` - -These should be documented such that users know how to activate them and -developers know how to use them when developing extensions. - -Dependencies -~~~~~~~~~~~~ - -Today, Zarr-Python has the following required dependencies: - -.. code:: python - - dependencies = [ - 'asciitree', - 'numpy>=1.20,!=1.21.0', - 'fasteners', - 'numcodecs>=0.10.0', - ] - -What other dependencies should be considered? - -1. Attrs - Zarrita makes extensive use of the Attrs library -2. Fsspec - Zarrita has a hard dependency on Fsspec. This could be - easily relaxed though. - -Breaking changes relative to Zarr-Python 2.\* ---------------------------------------------- - -1. H5py compat moved to a stand alone module? -2. ``Group.__getitem__`` support moved to ``Group.members.__getitem__``? -3. Others? - -Open questions --------------- - -1. How to treat V2 - - a. Note: Zarrita currently implements a separate ``V2Array`` and - ``V3Array`` classes. This feels less than ideal. - b. We could easily convert metadata from v2 to the V3 Array, but what - about writing? - c. Ideally, we don’t have completely separate code paths. But if its - too complicated to support both within one interface, its probably - better. - -2. How and when to remove the current implementation of V3. - - a. It’s hidden behind a hard-to-use feature flag so we probably don’t - need to do anything. - -3. How to model runtime configuration? -4. Which extensions belong in Zarr-Python and which belong in separate - packages? - - a. We don’t need to take a strong position on this here. It’s likely - that someone will want to put Sharding in. That will be useful to - develop in parallel because it will give us a good test case for - the plugin interface. - -Testing -------- - -Zarr-python 3.0 adds a major new dimension to Zarr: Async support. This -also comes with a compatibility risk, we will need to thoroughly test -support in key execution environments. Testing plan: - Reuse the -existing test suite for testing the ``v3`` API. - ``xfail`` tests that -expose breaking changes with ``3.0 - breaking change`` description. This -will help identify additional and/or unintentional breaking changes - -Rework tests that were only testing internal APIs. - Add a set of -functional / integration tests targeting real-world workflows in various -contexts (e.g. w/ Dask) - -Development process -------------------- - -Zarr-Python 3.0 will introduce a number of new APIs and breaking changes -to existing APIs. In order to facilitate ongoing support for Zarr-Python -2.*, we will take on the following development process: - -- Create a ``v3`` branch that can be use for developing the core - functionality apart from the ``main`` branch. This will allow us to - support ongoing work and bug fixes on the ``main`` branch. -- Put the ``3.0`` APIs inside a ``zarr.v3`` module. Imports from this - namespace will all be new APIs that users can develop and test - against once the ``v3`` branch is merged to ``main``. -- Kickstart the process by pulling in the current state of ``zarrita`` - - which has many of the features described in this design. -- Release a series of 2.\* releases with the ``v3`` namespace -- When ``v3`` is complete, move contents of ``v3`` to the package root - -**Milestones** - -Below are a set of specific milestones leading toward the completion of -this process. As work begins, we expect this list to grow in -specificity. - -1. Port current version of Zarrita to Zarr-Python -2. Formalize Async interface by splitting ``Array`` and ``Group`` - objects into Sync and Async versions -3. Implement “fancy” indexing operations on the ``AsyncArray`` -4. Implement an abstract base class for the ``Store`` interface and a - wrapper ``Store`` to make use of existing ``MutableMapping`` stores. -5. Rework the existing unit test suite to use the ``v3`` namespace. -6. Develop a plugin interface for extensions -7. Develop a set of functional and integration tests -8. Work with downstream libraries (Xarray, Dask, etc.) to test new APIs - -TODOs ------ - -The following subjects are not covered in detail above but perhaps -should be. Including them here so they are not forgotten. - -1. [Store] Should Zarr provide an API for caching objects after first - read/list/etc. Read only stores? -2. [Array] buffer protocol support -3. [Array] ``meta_array`` support -4. [Extensions] Define how Zarr-Python will consume the various plugin - types -5. [Misc] H5py compatibility requires a bit more work and a champion to - drive it forward. -6. [Misc] Define ``chunk_store`` API in 3.0 -7. [Misc] Define ``synchronizer`` API in 3.0 - -References ----------- - -1. `Zarr-Python - repository `__ -2. `Zarr core specification (version 3.0) — Zarr specs - documentation `__ -3. `Zarrita repository `__ -4. `Async-Zarr `__ -5. `Zarr-Python Discussion - Topic `__ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000000..240f939d90 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,230 @@ +# Zarr-Python + +**Useful links**: +[Source Repository](https://github.com/zarr-developers/zarr-python) | +[Issue Tracker](https://github.com/zarr-developers/zarr-python/issues) | +[Developer Chat](https://ossci.zulipchat.com/) | +[Zarr specifications](https://zarr-specs.readthedocs.io) + + +Zarr is a powerful library for storage of n-dimensional arrays, supporting chunking, +compression, and various backends, making it a versatile choice for scientific and +large-scale data. + +Zarr-Python is a Python library for reading and writing Zarr groups and arrays. Highlights include: + +* Specification support for both Zarr format 2 and 3. +* Create and read from N-dimensional arrays using NumPy-like semantics. +* Flexible storage enables reading and writing from local, cloud and in-memory stores. +* High performance: Enables fast I/O with support for asynchronous I/O and multi-threading. +* Extensible: Customizable with user-defined codecs and stores. + +## Installation + +Zarr requires Python 3.11 or higher. You can install it via `pip`: + +```bash +pip install zarr +``` + +or `conda`: + +```bash +conda install --channel conda-forge zarr +``` + +## Quick Start 🚀 + +This section will help you get up and running with +the Zarr library in Python to efficiently manage and analyze multi-dimensional arrays. + +### Creating an Array + +To get started, you can create a simple Zarr array: + +```python +import zarr +import numpy as np + +# Create a 2D Zarr array +z = zarr.create_array( + store="data/example-1.zarr", + shape=(100, 100), + chunks=(10, 10), + dtype="f4" +) + +# Assign data to the array +z[:, :] = np.random.random((100, 100)) +z.info + +# Type : Array +# Zarr format : 3 +# Data type : DataType.float32 +# Shape : (100, 100) +# Chunk shape : (10, 10) +# Order : C +# Read-only : False +# Store type : LocalStore +# Codecs : [{'endian': }, {'level': 0, 'checksum': False}] +# No. bytes : 40000 (39.1K) +``` + +Here, we created a 2D array of shape `(100, 100)`, chunked into blocks of +`(10, 10)`, and filled it with random floating-point data. This array was +written to a `LocalStore` in the `data/example-1.zarr` directory. + +#### Compression and Filters + +Zarr supports data compression and filters. For example, to use Blosc compression: + +```python +z = zarr.create_array( + "data/example-3.zarr", + mode="w", shape=(100, 100), + chunks=(10, 10), dtype="f4", + compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) +) +z[:, :] = np.random.random((100, 100)) +z.info +# Type : Array +# Zarr format : 3 +# Data type : DataType.float32 +# Shape : (100, 100) +# Chunk shape : (10, 10) +# Order : C +# Read-only : False +# Store type : LocalStore +# Codecs : [{'endian': }, {'level': 0, 'checksum': False}] +# No. bytes : 40000 (39.1K) +``` + +This compresses the data using the Zstandard codec with shuffle enabled for better compression. + +### Hierarchical Groups + +Zarr allows you to create hierarchical groups, similar to directories: + +```python +# Create nested groups and add arrays +root = zarr.group("data/example-2.zarr") +foo = root.create_group(name="foo") +bar = root.create_array( + name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" +) +spam = foo.create_array(name="spam", shape=(10,), dtype="i4") + +# Assign values +bar[:, :] = np.random.random((100, 10)) +spam[:] = np.arange(10) + +# print the hierarchy +root.tree() +# / +# ├── bar (100, 10) float32 +# └── foo +# └── spam (10,) int32 +``` + +This creates a group with two datasets: `foo` and `bar`. + +#### Batch Hierarchy Creation + +Zarr provides tools for creating a collection of arrays and groups with a single function call. +Suppose we want to copy existing groups and arrays into a new storage backend: + +```python +# Create nested groups and add arrays +root = zarr.group("data/example-3.zarr", attributes={'name': 'root'}) +foo = root.create_group(name="foo") +bar = root.create_array( + name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" +) +nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} +print(nodes) +from zarr.storage import MemoryStore +new_nodes = dict(zarr.create_hierarchy(store=MemoryStore(), nodes=nodes)) +new_root = new_nodes[''] +assert new_root.attrs == root.attrs +``` + +Note that `zarr.create_hierarchy` will only initialize arrays and groups -- copying array data must +be done in a separate step. + +### Persistent Storage + +Zarr supports persistent storage to disk or cloud-compatible backends. While examples above +utilized a `zarr.storage.LocalStore`, a number of other storage options are available. + +Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage +using external libraries like [s3fs](https://s3fs.readthedocs.io) or +[gcsfs](https://gcsfs.readthedocs.io): + +```python +import s3fs + +z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") +z[:, :] = np.random.random((100, 100)) +``` + +A single-file store can also be created using the `zarr.storage.ZipStore`: + +```python +# Store the array in a ZIP file +store = zarr.storage.ZipStore("data/example-3.zip", mode='w') + +z = zarr.create_array( + store=store, + mode="w", + shape=(100, 100), + chunks=(10, 10), + dtype="f4" +) + +# write to the array +z[:, :] = np.random.random((100, 100)) + +# the ZipStore must be explicitly closed +store.close() +``` + +To open an existing array from a ZIP file: + +```python +# Open the ZipStore in read-only mode +store = zarr.storage.ZipStore("data/example-3.zip", read_only=True) + +z = zarr.open_array(store, mode='r') + +# read the data as a NumPy Array +z[:] +# array([[0.66734236, 0.15667458, 0.98720884, ..., 0.36229587, 0.67443246, +# 0.34315267], +# [0.65787303, 0.9544212 , 0.4830079 , ..., 0.33097172, 0.60423803, +# 0.45621237], +# [0.27632037, 0.9947008 , 0.42434934, ..., 0.94860053, 0.6226942 , +# 0.6386924 ], +# ..., +# [0.12854576, 0.934397 , 0.19524333, ..., 0.11838563, 0.4967675 , +# 0.43074256], +# [0.82029045, 0.4671437 , 0.8090906 , ..., 0.7814118 , 0.42650765, +# 0.95929915], +# [0.4335856 , 0.7565437 , 0.7828931 , ..., 0.48119593, 0.66220033, +# 0.6652362 ]], shape=(100, 100), dtype=float32) +``` + +Read more about Zarr's storage options in the [User Guide](user-guide/storage.md). + +## Project Status + +More information about the Zarr format can be found on the [main website](https://zarr.dev). + +If you are using Zarr-Python, we would [love to hear about it](https://github.com/zarr-developers/community/issues/19). + +### Funding and Support +The project is fiscally sponsored by [NumFOCUS](https://numfocus.org/), a US +501(c)(3) public charity, and development has been supported by the +[MRC Centre for Genomics and Global Health](https://www.cggh.org) +and the [Chan Zuckerberg Initiative](https://chanzuckerberg.com/). + +[Donate to Zarr](https://numfocus.org/donate-to-zarr) to support the project! diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 83d427e290..0000000000 --- a/docs/index.rst +++ /dev/null @@ -1,113 +0,0 @@ -.. _zarr_docs_mainpage: - -*********** -Zarr-Python -*********** - -.. toctree:: - :maxdepth: 1 - :hidden: - - quickstart - user-guide/index - API reference - release-notes - developers/index - about - -**Version**: |version| - -**Useful links**: -`Source Repository `_ | -`Issue Tracker `_ | -`Developer Chat `_ | -`Zarr specifications `_ - -Zarr-Python is a Python library for reading and writing Zarr groups and arrays. Highlights include: - -* Specification support for both Zarr format 2 and 3. -* Create and read from N-dimensional arrays using NumPy-like semantics. -* Flexible storage enables reading and writing from local, cloud and in-memory stores. -* High performance: Enables fast I/O with support for asynchronous I/O and multi-threading. -* Extensible: Customizable with user-defined codecs and stores. - -.. grid:: 2 - - .. grid-item-card:: - :img-top: _static/index_getting_started.svg - - Quick Start - ^^^^^^^^^^^ - - New to Zarr? Check out the quick start guide. It contains a brief - introduction to Zarr's main concepts and links to additional tutorials. - - +++ - - .. button-ref:: quickstart - :expand: - :color: dark - :click-parent: - - To the Quick Start - - .. grid-item-card:: - :img-top: _static/index_user_guide.svg - - Guide - ^^^^^ - - A detailed guide for how to use Zarr-Python. - - +++ - - .. button-ref:: user-guide/index - :expand: - :color: dark - :click-parent: - - To the user guide - - .. grid-item-card:: - :img-top: _static/index_api.svg - - API Reference - ^^^^^^^^^^^^^ - - The reference guide contains a detailed description of the functions, - modules, and objects included in Zarr. The reference describes how the - methods work and which parameters can be used. It assumes that you have an - understanding of the key concepts. - - +++ - - .. button-ref:: api/zarr/index - :expand: - :color: dark - :click-parent: - - To the API reference - - .. grid-item-card:: - :img-top: _static/index_contribute.svg - - Contributor's Guide - ^^^^^^^^^^^^^^^^^^^ - - Want to contribute to Zarr? We welcome contributions in the form of bug reports, - bug fixes, documentation, enhancement proposals and more. The contributing guidelines - will guide you through the process of improving Zarr. - - +++ - - .. button-ref:: developers/contributing - :expand: - :color: dark - :click-parent: - - To the contributor's guide - - -**Download documentation**: `PDF/Zipped HTML `_ - -.. _NumCodecs: https://numcodecs.readthedocs.io diff --git a/docs/overrides/main.html b/docs/overrides/main.html new file mode 100644 index 0000000000..d61a1f54dc --- /dev/null +++ b/docs/overrides/main.html @@ -0,0 +1,9 @@ + +{% extends "base.html" %} + +{% block outdated %} + You're not viewing the latest version. + + Click here to go to latest. + +{% endblock %} diff --git a/docs/overrides/stylesheets/extra.css b/docs/overrides/stylesheets/extra.css new file mode 100644 index 0000000000..3e2ef3d330 --- /dev/null +++ b/docs/overrides/stylesheets/extra.css @@ -0,0 +1,52 @@ +:root, +[data-md-color-scheme="default"] { + /* --md-primary-fg-color: #cf3f02; + --md-default-fg-color: #443f3f; */ + --boxShadowD: 0px 12px 24px 0px rgba(68, 63, 63, 0.08), + 0px 0px 4px 0px rgba(68, 63, 63, 0.08); +} +body { + margin: 0; + padding: 0; + /* font-size: 16px; */ +} +h1, +h2, +h3, +h4, +h5, +h6 { + font-family: var(--md-heading-font); + font-weight: bold; +} +.md-typeset h1, +.md-typeset h2 { + font-weight: normal; + color: var(--md-default-fg-color); +} +.md-typeset h3, +.md-typeset h4 { + font-weight: bold; + color: var(--md-default-fg-color); +} +.md-button, +.md-typeset .md-button { + font-family: var(--md-heading-font); +} +.md-content .supheading { + font-family: var(--md-heading-font); + text-transform: uppercase; + color: var(--md-primary-fg-color); + font-size: 0.75rem; + font-weight: bold; +} + +.md-header__button.md-logo img, +.md-header__button.md-logo svg { + height: 2rem; + width: auto; +} + +.md-header { + padding: 0.2rem 0; +} diff --git a/docs/quickstart.rst b/docs/quickstart.rst deleted file mode 100644 index 66bdae2a2e..0000000000 --- a/docs/quickstart.rst +++ /dev/null @@ -1,209 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - >>> - >>> import numpy as np - >>> np.random.seed(0) - -Quickstart -========== - -Welcome to the Zarr-Python Quickstart guide! This page will help you get up and running with -the Zarr library in Python to efficiently manage and analyze multi-dimensional arrays. - -Zarr is a powerful library for storage of n-dimensional arrays, supporting chunking, -compression, and various backends, making it a versatile choice for scientific and -large-scale data. - -Installation ------------- - -Zarr requires Python 3.11 or higher. You can install it via `pip`: - -.. code-block:: bash - - pip install zarr - -or `conda`: - -.. code-block:: bash - - conda install --channel conda-forge zarr - -Creating an Array ------------------ - -To get started, you can create a simple Zarr array:: - - >>> import zarr - >>> import numpy as np - >>> - >>> # Create a 2D Zarr array - >>> z = zarr.create_array( - ... store="data/example-1.zarr", - ... shape=(100, 100), - ... chunks=(10, 10), - ... dtype="f4" - ... ) - >>> - >>> # Assign data to the array - >>> z[:, :] = np.random.random((100, 100)) - >>> z.info - Type : Array - Zarr format : 3 - Data type : DataType.float32 - Shape : (100, 100) - Chunk shape : (10, 10) - Order : C - Read-only : False - Store type : LocalStore - Codecs : [{'endian': }, {'level': 0, 'checksum': False}] - No. bytes : 40000 (39.1K) - -Here, we created a 2D array of shape ``(100, 100)``, chunked into blocks of -``(10, 10)``, and filled it with random floating-point data. This array was -written to a ``LocalStore`` in the ``data/example-1.zarr`` directory. - -Compression and Filters -~~~~~~~~~~~~~~~~~~~~~~~ - -Zarr supports data compression and filters. For example, to use Blosc compression:: - - >>> z = zarr.create_array( - ... "data/example-3.zarr", - ... mode="w", shape=(100, 100), - ... chunks=(10, 10), dtype="f4", - ... compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) - ... ) - >>> z[:, :] = np.random.random((100, 100)) - >>> - >>> z.info - Type : Array - Zarr format : 3 - Data type : DataType.float32 - Shape : (100, 100) - Chunk shape : (10, 10) - Order : C - Read-only : False - Store type : LocalStore - Codecs : [{'endian': }, {'level': 0, 'checksum': False}] - No. bytes : 40000 (39.1K) - -This compresses the data using the Zstandard codec with shuffle enabled for better compression. - -Hierarchical Groups -------------------- - -Zarr allows you to create hierarchical groups, similar to directories:: - - >>> # Create nested groups and add arrays - >>> root = zarr.group("data/example-2.zarr") - >>> foo = root.create_group(name="foo") - >>> bar = root.create_array( - ... name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" - ... ) - >>> spam = foo.create_array(name="spam", shape=(10,), dtype="i4") - >>> - >>> # Assign values - >>> bar[:, :] = np.random.random((100, 10)) - >>> spam[:] = np.arange(10) - >>> - >>> # print the hierarchy - >>> root.tree() - / - ├── bar (100, 10) float32 - └── foo - └── spam (10,) int32 - - -This creates a group with two datasets: ``foo`` and ``bar``. - -Batch Hierarchy Creation -~~~~~~~~~~~~~~~~~~~~~~~~ - -Zarr provides tools for creating a collection of arrays and groups with a single function call. -Suppose we want to copy existing groups and arrays into a new storage backend: - - >>> # Create nested groups and add arrays - >>> root = zarr.group("data/example-3.zarr", attributes={'name': 'root'}) - >>> foo = root.create_group(name="foo") - >>> bar = root.create_array( - ... name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" - ... ) - >>> nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} - >>> print(nodes) - >>> from zarr.storage import MemoryStore - >>> new_nodes = dict(zarr.create_hierarchy(store=MemoryStore(), nodes=nodes)) - >>> new_root = new_nodes[''] - >>> assert new_root.attrs == root.attrs - -Note that :func:`zarr.create_hierarchy` will only initialize arrays and groups -- copying array data must -be done in a separate step. - -Persistent Storage ------------------- - -Zarr supports persistent storage to disk or cloud-compatible backends. While examples above -utilized a :class:`zarr.storage.LocalStore`, a number of other storage options are available. - -Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage -using external libraries like `s3fs `_ or -`gcsfs `_:: - - >>> import s3fs # doctest: +SKIP - >>> - >>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") # doctest: +SKIP - >>> z[:, :] = np.random.random((100, 100)) # doctest: +SKIP - -A single-file store can also be created using the the :class:`zarr.storage.ZipStore`:: - - >>> # Store the array in a ZIP file - >>> store = zarr.storage.ZipStore("data/example-3.zip", mode='w') - >>> - >>> z = zarr.create_array( - ... store=store, - ... mode="w", - ... shape=(100, 100), - ... chunks=(10, 10), - ... dtype="f4" - ... ) - >>> - >>> # write to the array - >>> z[:, :] = np.random.random((100, 100)) - >>> - >>> # the ZipStore must be explicitly closed - >>> store.close() - -To open an existing array from a ZIP file:: - - >>> # Open the ZipStore in read-only mode - >>> store = zarr.storage.ZipStore("data/example-3.zip", read_only=True) - >>> - >>> z = zarr.open_array(store, mode='r') - >>> - >>> # read the data as a NumPy Array - >>> z[:] - array([[0.66734236, 0.15667458, 0.98720884, ..., 0.36229587, 0.67443246, - 0.34315267], - [0.65787303, 0.9544212 , 0.4830079 , ..., 0.33097172, 0.60423803, - 0.45621237], - [0.27632037, 0.9947008 , 0.42434934, ..., 0.94860053, 0.6226942 , - 0.6386924 ], - ..., - [0.12854576, 0.934397 , 0.19524333, ..., 0.11838563, 0.4967675 , - 0.43074256], - [0.82029045, 0.4671437 , 0.8090906 , ..., 0.7814118 , 0.42650765, - 0.95929915], - [0.4335856 , 0.7565437 , 0.7828931 , ..., 0.48119593, 0.66220033, - 0.6652362 ]], shape=(100, 100), dtype=float32) - -Read more about Zarr's storage options in the :ref:`User Guide `. - -Next Steps ----------- - -Now that you're familiar with the basics, explore the following resources: - -- `User Guide `_ -- `API Reference `_ diff --git a/docs/release-notes.md b/docs/release-notes.md new file mode 100644 index 0000000000..0f576b4f4e --- /dev/null +++ b/docs/release-notes.md @@ -0,0 +1,210 @@ +# Release notes + +## 3.0.8 (2025-05-19) + +> **Warning** +> In versions 3.0.0 to 3.0.7 opening arrays or groups with `mode='a'` (the default for many builtin functions) +> would cause any existing paths in the store to be deleted. This is fixed in 3.0.8, and +> we recommend all users upgrade to avoid this bug that could cause unintentional data loss. + +### Features + +- Added a `print_debug_info` function for bug reports. ([#2913](https://github.com/zarr-developers/zarr-python/issues/2913)) + +### Bugfixes + +- Fix a bug that prevented the number of initialized chunks being counted properly. ([#2862](https://github.com/zarr-developers/zarr-python/issues/2862)) +- Fixed sharding with GPU buffers. ([#2978](https://github.com/zarr-developers/zarr-python/issues/2978)) +- Fix structured `dtype` fill value serialization for consolidated metadata ([#2998](https://github.com/zarr-developers/zarr-python/issues/2998)) +- It is now possible to specify no compressor when creating a zarr format 2 array. + This can be done by passing `compressor=None` to the various array creation routines. + + The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given. + To reproduce the behaviour in previous zarr-python versions when `compressor=None` was passed, pass `compressor='auto'` instead. ([#3039](https://github.com/zarr-developers/zarr-python/issues/3039)) +- Fixed the typing of `dimension_names` arguments throughout so that it now accepts iterables that contain `None` alongside `str`. ([#3045](https://github.com/zarr-developers/zarr-python/issues/3045)) +- Using various functions to open data with `mode='a'` no longer deletes existing data in the store. ([#3062](https://github.com/zarr-developers/zarr-python/issues/3062)) +- Internally use `typesize` constructor parameter for `numcodecs.blosc.Blosc` to improve compression ratios back to the v2-package levels. ([#2962](https://github.com/zarr-developers/zarr-python/issues/2962)) +- Specifying the memory order of Zarr format 2 arrays using the `order` keyword argument has been fixed. ([#2950](https://github.com/zarr-developers/zarr-python/issues/2950)) + +### Misc + +- [#2972](https://github.com/zarr-developers/zarr-python/issues/2972), [#3027](https://github.com/zarr-developers/zarr-python/issues/3027), [#3049](https://github.com/zarr-developers/zarr-python/issues/3049) + +## 3.0.7 (2025-04-22) + +### Features + +- Add experimental ObjectStore storage class based on obstore. ([#1661](https://github.com/zarr-developers/zarr-python/issues/1661)) +- Add `zarr.from_array` using concurrent streaming of source data ([#2622](https://github.com/zarr-developers/zarr-python/issues/2622)) + +### Bugfixes + +- 0-dimensional arrays are now returning a scalar. Therefore, the return type of `__getitem__` changed + to NDArrayLikeOrScalar. This change is to make the behavior of 0-dimensional arrays consistent with + `numpy` scalars. ([#2718](https://github.com/zarr-developers/zarr-python/issues/2718)) +- Fix `fill_value` serialization for `NaN` in `ArrayV2Metadata` and add property-based testing of round-trip serialization ([#2802](https://github.com/zarr-developers/zarr-python/issues/2802)) +- Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be + consistent with the behavior of `ArrayMetadata`. ([#2996](https://github.com/zarr-developers/zarr-python/issues/2996)) + +### Improved Documentation + +- Updated the 3.0 migration guide to include the removal of "." syntax for getting group members. ([#2991](https://github.com/zarr-developers/zarr-python/issues/2991), [#2997](https://github.com/zarr-developers/zarr-python/issues/2997)) + +### Misc + +- Define a new versioning policy based on Effective Effort Versioning. This replaces the old Semantic + Versioning-based policy. ([#2924](https://github.com/zarr-developers/zarr-python/issues/2924), [#2910](https://github.com/zarr-developers/zarr-python/issues/2910)) +- Make warning filters in the tests more specific, so warnings emitted by tests added in the future + are more likely to be caught instead of ignored. ([#2714](https://github.com/zarr-developers/zarr-python/issues/2714)) +- Avoid an unnecessary memory copy when writing Zarr to a local file ([#2944](https://github.com/zarr-developers/zarr-python/issues/2944)) + +## 3.0.6 (2025-03-20) + +### Bugfixes + +- Restore functionality of `del z.attrs['key']` to actually delete the key. ([#2908](https://github.com/zarr-developers/zarr-python/issues/2908)) + +## 3.0.5 (2025-03-07) + +### Bugfixes + +- Fixed a bug where `StorePath` creation would not apply standard path normalization to the `path` parameter, + which led to the creation of arrays and groups with invalid keys. ([#2850](https://github.com/zarr-developers/zarr-python/issues/2850)) +- Prevent update_attributes calls from deleting old attributes ([#2870](https://github.com/zarr-developers/zarr-python/issues/2870)) + +### Misc + +- [#2796](https://github.com/zarr-developers/zarr-python/issues/2796) + +## 3.0.4 (2025-02-23) + +### Features + +- Adds functions for concurrently creating multiple arrays and groups. ([#2665](https://github.com/zarr-developers/zarr-python/issues/2665)) + +### Bugfixes + +- Fixed a bug where `ArrayV2Metadata` could save `filters` as an empty array. ([#2847](https://github.com/zarr-developers/zarr-python/issues/2847)) +- Fix a bug when setting values of a smaller last chunk. ([#2851](https://github.com/zarr-developers/zarr-python/issues/2851)) + +### Misc + +- [#2828](https://github.com/zarr-developers/zarr-python/issues/2828) + +## 3.0.3 (2025-02-14) + +### Features + +- Improves performance of FsspecStore.delete_dir for remote filesystems supporting concurrent/batched deletes, e.g., s3fs. ([#2661](https://github.com/zarr-developers/zarr-python/issues/2661)) +- Added `zarr.config.enable_gpu` to update Zarr's configuration to use GPUs. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) +- Avoid reading chunks during writes where possible. [#757](https://github.com/zarr-developers/zarr-python/issues/757) ([#2784](https://github.com/zarr-developers/zarr-python/issues/2784)) +- `LocalStore` learned to `delete_dir`. This makes array and group deletes more efficient. ([#2804](https://github.com/zarr-developers/zarr-python/issues/2804)) +- Add `zarr.testing.strategies.array_metadata` to generate ArrayV2Metadata and ArrayV3Metadata instances. ([#2813](https://github.com/zarr-developers/zarr-python/issues/2813)) +- Add arbitrary `shards` to Hypothesis strategy for generating arrays. ([#2822](https://github.com/zarr-developers/zarr-python/issues/2822)) + +### Bugfixes + +- Fixed bug with Zarr using device memory, instead of host memory, for storing metadata when using GPUs. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) +- The array returned by `zarr.empty` and an empty `zarr.core.buffer.cpu.NDBuffer` will now be filled with the + specified fill value, or with zeros if no fill value is provided. + This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes. ([#2755](https://github.com/zarr-developers/zarr-python/issues/2755)) +- Fix zip-store path checking for stores with directories listed as files. ([#2758](https://github.com/zarr-developers/zarr-python/issues/2758)) +- Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list` ([#2778](https://github.com/zarr-developers/zarr-python/issues/2778)) +- Enable automatic removal of `needs release notes` with labeler action ([#2781](https://github.com/zarr-developers/zarr-python/issues/2781)) +- Use the proper label config ([#2785](https://github.com/zarr-developers/zarr-python/issues/2785)) +- Alters the behavior of `create_array` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object. ([#2795](https://github.com/zarr-developers/zarr-python/issues/2795)) +- Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types ([#2799](https://github.com/zarr-developers/zarr-python/issues/2799)) +- Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests ([#2801](https://github.com/zarr-developers/zarr-python/issues/2801)) +- Fix pickling for ZipStore ([#2807](https://github.com/zarr-developers/zarr-python/issues/2807)) +- Update numcodecs to not overwrite codec configuration ever. Closes [#2800](https://github.com/zarr-developers/zarr-python/issues/2800). ([#2811](https://github.com/zarr-developers/zarr-python/issues/2811)) +- Fix fancy indexing (e.g. arr[5, [0, 1]]) with the sharding codec ([#2817](https://github.com/zarr-developers/zarr-python/issues/2817)) + +### Improved Documentation + +- Added new user guide on GPU. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) + +## 3.0.2 (2025-01-31) + +### Features + +- Test `getsize()` and `getsize_prefix()` in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Test that a `ValueError` is raised for invalid byte range syntax in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Separate instantiating and opening a store in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Add a test for using Stores as a context managers in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Implemented `LogingStore.open()`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- `LoggingStore` is now a generic class. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Change StoreTest's `test_store_repr`, `test_store_supports_writes`, + `test_store_supports_partial_writes`, and `test_store_supports_listing` + to to be implemented using `@abstractmethod`, rather raising `NotImplementedError`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Test the error raised for invalid buffer arguments in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Test that data can be written to a store that's not yet open using the store.set method in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Adds a new function `init_array` for initializing an array in storage, and refactors `create_array` + to use `init_array`. `create_array` takes two new parameters: `data`, an optional array-like object, and `write_data`, a bool which defaults to `True`. + If `data` is given to `create_array`, then the `dtype` and `shape` attributes of `data` are used to define the + corresponding attributes of the resulting Zarr array. Additionally, if `data` given and `write_data` is `True`, + then the values in `data` will be written to the newly created array. ([#2761](https://github.com/zarr-developers/zarr-python/issues/2761)) + +### Bugfixes + +- Wrap sync fsspec filesystems with `AsyncFileSystemWrapper`. ([#2533](https://github.com/zarr-developers/zarr-python/issues/2533)) +- Added backwards compatibility for Zarr format 2 structured arrays. ([#2681](https://github.com/zarr-developers/zarr-python/issues/2681)) +- Update equality for `LoggingStore` and `WrapperStore` such that 'other' must also be a `LoggingStore` or `WrapperStore` respectively, rather than only checking the types of the stores they wrap. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Ensure that `ZipStore` is open before getting or setting any values. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Use stdout rather than stderr as the default stream for `LoggingStore`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Match the errors raised by read only stores in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) +- Fixed `ZipStore` to make sure the correct attributes are saved when instances are pickled. + This fixes a previous bug that prevent using `ZipStore` with a `ProcessPoolExecutor`. ([#2762](https://github.com/zarr-developers/zarr-python/issues/2762)) +- Updated the optional test dependencies to include `botocore` and `fsspec`. ([#2768](https://github.com/zarr-developers/zarr-python/issues/2768)) +- Fixed the fsspec tests to skip if `botocore` is not installed. + Previously they would have failed with an import error. ([#2768](https://github.com/zarr-developers/zarr-python/issues/2768)) +- Optimize full chunk writes. ([#2782](https://github.com/zarr-developers/zarr-python/issues/2782)) + +### Improved Documentation + +- Changed the machinery for creating changelog entries. + Now individual entries should be added as files to the `changes` directory in the `zarr-python` repository, instead of directly to the changelog file. ([#2736](https://github.com/zarr-developers/zarr-python/issues/2736)) + +### Other + +- Created a type alias `ChunkKeyEncodingLike` to model the union of `ChunkKeyEncoding` instances and the dict form of the + parameters of those instances. `ChunkKeyEncodingLike` should be used by high-level functions to provide a convenient + way for creating `ChunkKeyEncoding` objects. ([#2763](https://github.com/zarr-developers/zarr-python/issues/2763)) + +## 3.0.1 (Jan. 17, 2025) + +* Implement `zarr.from_array` using concurrent streaming ([#2622](https://github.com/zarr-developers/zarr-python/issues/2622)). + +### Bug fixes + +* Fixes `order` argument for Zarr format 2 arrays ([#2679](https://github.com/zarr-developers/zarr-python/issues/2679)). +* Fixes a bug that prevented reading Zarr format 2 data with consolidated + metadata written using `zarr-python` version 2 ([#2694](https://github.com/zarr-developers/zarr-python/issues/2694)). +* Ensure that compressor=None results in no compression when writing Zarr + format 2 data ([#2708](https://github.com/zarr-developers/zarr-python/issues/2708)). +* Fix for empty consolidated metadata dataset: backwards compatibility with + Zarr-Python 2 ([#2695](https://github.com/zarr-developers/zarr-python/issues/2695)). + +### Documentation + +* Add v3.0.0 release announcement banner ([#2677](https://github.com/zarr-developers/zarr-python/issues/2677)). +* Quickstart guide alignment with V3 API ([#2697](https://github.com/zarr-developers/zarr-python/issues/2697)). +* Fix doctest failures related to numcodecs 0.15 ([#2727](https://github.com/zarr-developers/zarr-python/issues/2727)). + +### Other + +* Removed some unnecessary files from the source distribution + to reduce its size. ([#2686](https://github.com/zarr-developers/zarr-python/issues/2686)). +* Enable codecov in GitHub actions ([#2682](https://github.com/zarr-developers/zarr-python/issues/2682)). +* Speed up hypothesis tests ([#2650](https://github.com/zarr-developers/zarr-python/issues/2650)). +* Remove multiple imports for an import name ([#2723](https://github.com/zarr-developers/zarr-python/issues/2723)). + +## 3.0.0 (Jan. 9, 2025) + +3.0.0 is a new major release of Zarr-Python, with many breaking changes. +See the [v3 migration guide](user-guide/v3_migration.md) for a listing of what's changed. + +Normal release note service will resume with further releases in the 3.0.0 +series. + +Release notes for the zarr-python 2.x and 1.x releases can be found here: +https://zarr.readthedocs.io/en/support-v2/release.html diff --git a/docs/release-notes.rst b/docs/release-notes.rst deleted file mode 100644 index a89046dd6d..0000000000 --- a/docs/release-notes.rst +++ /dev/null @@ -1,269 +0,0 @@ -Release notes -============= - -.. towncrier release notes start - -3.0.8 (2025-05-19) ------------------- - -.. warning:: - - In versions 3.0.0 to 3.0.7 opening arrays or groups with ``mode='a'`` (the default for many builtin functions) - would cause any existing paths in the store to be deleted. This is fixed in 3.0.8, and - we recommend all users upgrade to avoid this bug that could cause unintentional data loss. - -Features -~~~~~~~~ - -- Added a `print_debug_info` function for bug reports. (:issue:`2913`) - - -Bugfixes -~~~~~~~~ - -- Fix a bug that prevented the number of initialized chunks being counted properly. (:issue:`2862`) -- Fixed sharding with GPU buffers. (:issue:`2978`) -- Fix structured `dtype` fill value serialization for consolidated metadata (:issue:`2998`) -- It is now possible to specify no compressor when creating a zarr format 2 array. - This can be done by passing ``compressor=None`` to the various array creation routines. - - The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given. - To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead. (:issue:`3039`) -- Fixed the typing of ``dimension_names`` arguments throughout so that it now accepts iterables that contain `None` alongside `str`. (:issue:`3045`) -- Using various functions to open data with ``mode='a'`` no longer deletes existing data in the store. (:issue:`3062`) -- Internally use `typesize` constructor parameter for :class:`numcodecs.blosc.Blosc` to improve compression ratios back to the v2-package levels. (:issue:`2962`) -- Specifying the memory order of Zarr format 2 arrays using the ``order`` keyword argument has been fixed. (:issue:`2950`) - - -Misc -~~~~ - -- :issue:`2972`, :issue:`3027`, :issue:`3049` - - -3.0.7 (2025-04-22) ------------------- - -Features -~~~~~~~~ - -- Add experimental ObjectStore storage class based on obstore. (:issue:`1661`) -- Add ``zarr.from_array`` using concurrent streaming of source data (:issue:`2622`) - - -Bugfixes -~~~~~~~~ - -- 0-dimensional arrays are now returning a scalar. Therefore, the return type of ``__getitem__`` changed - to NDArrayLikeOrScalar. This change is to make the behavior of 0-dimensional arrays consistent with - ``numpy`` scalars. (:issue:`2718`) -- Fix `fill_value` serialization for `NaN` in `ArrayV2Metadata` and add property-based testing of round-trip serialization (:issue:`2802`) -- Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be - consistent with the behavior of `ArrayMetadata`. (:issue:`2996`) - - -Improved Documentation -~~~~~~~~~~~~~~~~~~~~~~ - -- Updated the 3.0 migration guide to include the removal of "." syntax for getting group members. (:issue:`2991`, :issue:`2997`) - - -Misc -~~~~ -- Define a new versioning policy based on Effective Effort Versioning. This replaces the old Semantic - Versioning-based policy. (:issue:`2924`, :issue:`2910`) -- Make warning filters in the tests more specific, so warnings emitted by tests added in the future - are more likely to be caught instead of ignored. (:issue:`2714`) -- Avoid an unnecessary memory copy when writing Zarr to a local file (:issue:`2944`) - - -3.0.6 (2025-03-20) ------------------- - -Bugfixes -~~~~~~~~ - -- Restore functionality of `del z.attrs['key']` to actually delete the key. (:issue:`2908`) - - -3.0.5 (2025-03-07) ------------------- - -Bugfixes -~~~~~~~~ - -- Fixed a bug where ``StorePath`` creation would not apply standard path normalization to the ``path`` parameter, - which led to the creation of arrays and groups with invalid keys. (:issue:`2850`) -- Prevent update_attributes calls from deleting old attributes (:issue:`2870`) - - -Misc -~~~~ - -- :issue:`2796` - -3.0.4 (2025-02-23) ------------------- - -Features -~~~~~~~~ - -- Adds functions for concurrently creating multiple arrays and groups. (:issue:`2665`) - -Bugfixes -~~~~~~~~ - -- Fixed a bug where ``ArrayV2Metadata`` could save ``filters`` as an empty array. (:issue:`2847`) -- Fix a bug when setting values of a smaller last chunk. (:issue:`2851`) - -Misc -~~~~ - -- :issue:`2828` - - -3.0.3 (2025-02-14) ------------------- - -Features -~~~~~~~~ - -- Improves performance of FsspecStore.delete_dir for remote filesystems supporting concurrent/batched deletes, e.g., s3fs. (:issue:`2661`) -- Added :meth:`zarr.config.enable_gpu` to update Zarr's configuration to use GPUs. (:issue:`2751`) -- Avoid reading chunks during writes where possible. :issue:`757` (:issue:`2784`) -- :py:class:`LocalStore` learned to ``delete_dir``. This makes array and group deletes more efficient. (:issue:`2804`) -- Add `zarr.testing.strategies.array_metadata` to generate ArrayV2Metadata and ArrayV3Metadata instances. (:issue:`2813`) -- Add arbitrary `shards` to Hypothesis strategy for generating arrays. (:issue:`2822`) - - -Bugfixes -~~~~~~~~ - -- Fixed bug with Zarr using device memory, instead of host memory, for storing metadata when using GPUs. (:issue:`2751`) -- The array returned by ``zarr.empty`` and an empty ``zarr.core.buffer.cpu.NDBuffer`` will now be filled with the - specified fill value, or with zeros if no fill value is provided. - This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes. (:issue:`2755`) -- Fix zip-store path checking for stores with directories listed as files. (:issue:`2758`) -- Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list` (:issue:`2778`) -- Enable automatic removal of `needs release notes` with labeler action (:issue:`2781`) -- Use the proper label config (:issue:`2785`) -- Alters the behavior of ``create_array`` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object. (:issue:`2795`) -- Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types (:issue:`2799`) -- Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests (:issue:`2801`) -- Fix pickling for ZipStore (:issue:`2807`) -- Update numcodecs to not overwrite codec configuration ever. Closes :issue:`2800`. (:issue:`2811`) -- Fix fancy indexing (e.g. arr[5, [0, 1]]) with the sharding codec (:issue:`2817`) - - -Improved Documentation -~~~~~~~~~~~~~~~~~~~~~~ - -- Added new user guide on :ref:`user-guide-gpu`. (:issue:`2751`) - - -3.0.2 (2025-01-31) ------------------- - -Features -~~~~~~~~ - -- Test ``getsize()`` and ``getsize_prefix()`` in ``StoreTests``. (:issue:`2693`) -- Test that a ``ValueError`` is raised for invalid byte range syntax in ``StoreTests``. (:issue:`2693`) -- Separate instantiating and opening a store in ``StoreTests``. (:issue:`2693`) -- Add a test for using Stores as a context managers in ``StoreTests``. (:issue:`2693`) -- Implemented ``LogingStore.open()``. (:issue:`2693`) -- ``LoggingStore`` is now a generic class. (:issue:`2693`) -- Change StoreTest's ``test_store_repr``, ``test_store_supports_writes``, - ``test_store_supports_partial_writes``, and ``test_store_supports_listing`` - to to be implemented using ``@abstractmethod``, rather raising ``NotImplementedError``. (:issue:`2693`) -- Test the error raised for invalid buffer arguments in ``StoreTests``. (:issue:`2693`) -- Test that data can be written to a store that's not yet open using the store.set method in ``StoreTests``. (:issue:`2693`) -- Adds a new function ``init_array`` for initializing an array in storage, and refactors ``create_array`` - to use ``init_array``. ``create_array`` takes two new parameters: ``data``, an optional array-like object, and ``write_data``, a bool which defaults to ``True``. - If ``data`` is given to ``create_array``, then the ``dtype`` and ``shape`` attributes of ``data`` are used to define the - corresponding attributes of the resulting Zarr array. Additionally, if ``data`` given and ``write_data`` is ``True``, - then the values in ``data`` will be written to the newly created array. (:issue:`2761`) - - -Bugfixes -~~~~~~~~ - -- Wrap sync fsspec filesystems with ``AsyncFileSystemWrapper``. (:issue:`2533`) -- Added backwards compatibility for Zarr format 2 structured arrays. (:issue:`2681`) -- Update equality for ``LoggingStore`` and ``WrapperStore`` such that 'other' must also be a ``LoggingStore`` or ``WrapperStore`` respectively, rather than only checking the types of the stores they wrap. (:issue:`2693`) -- Ensure that ``ZipStore`` is open before getting or setting any values. (:issue:`2693`) -- Use stdout rather than stderr as the default stream for ``LoggingStore``. (:issue:`2693`) -- Match the errors raised by read only stores in ``StoreTests``. (:issue:`2693`) -- Fixed ``ZipStore`` to make sure the correct attributes are saved when instances are pickled. - This fixes a previous bug that prevent using ``ZipStore`` with a ``ProcessPoolExecutor``. (:issue:`2762`) -- Updated the optional test dependencies to include ``botocore`` and ``fsspec``. (:issue:`2768`) -- Fixed the fsspec tests to skip if ``botocore`` is not installed. - Previously they would have failed with an import error. (:issue:`2768`) -- Optimize full chunk writes. (:issue:`2782`) - - -Improved Documentation -~~~~~~~~~~~~~~~~~~~~~~ - -- Changed the machinery for creating changelog entries. - Now individual entries should be added as files to the `changes` directory in the `zarr-python` repository, instead of directly to the changelog file. (:issue:`2736`) - -Other -~~~~~ - -- Created a type alias ``ChunkKeyEncodingLike`` to model the union of ``ChunkKeyEncoding`` instances and the dict form of the - parameters of those instances. ``ChunkKeyEncodingLike`` should be used by high-level functions to provide a convenient - way for creating ``ChunkKeyEncoding`` objects. (:issue:`2763`) - - -3.0.1 (Jan. 17, 2025) ---------------------- - -* Implement ``zarr.from_array`` using concurrent streaming (:issue:`2622`). - -Bug fixes -~~~~~~~~~ -* Fixes ``order`` argument for Zarr format 2 arrays (:issue:`2679`). - -* Fixes a bug that prevented reading Zarr format 2 data with consolidated - metadata written using ``zarr-python`` version 2 (:issue:`2694`). - -* Ensure that compressor=None results in no compression when writing Zarr - format 2 data (:issue:`2708`). - -* Fix for empty consolidated metadata dataset: backwards compatibility with - Zarr-Python 2 (:issue:`2695`). - -Documentation -~~~~~~~~~~~~~ -* Add v3.0.0 release announcement banner (:issue:`2677`). - -* Quickstart guide alignment with V3 API (:issue:`2697`). - -* Fix doctest failures related to numcodecs 0.15 (:issue:`2727`). - -Other -~~~~~ -* Removed some unnecessary files from the source distribution - to reduce its size. (:issue:`2686`). - -* Enable codecov in GitHub actions (:issue:`2682`). - -* Speed up hypothesis tests (:issue:`2650`). - -* Remove multiple imports for an import name (:issue:`2723`). - - -.. _release_3.0.0: - -3.0.0 (Jan. 9, 2025) --------------------- - -3.0.0 is a new major release of Zarr-Python, with many breaking changes. -See the :ref:`v3 migration guide` for a listing of what's changed. - -Normal release note service will resume with further releases in the 3.0.0 -series. - -Release notes for the zarr-python 2.x and 1.x releases can be found here: -https://zarr.readthedocs.io/en/support-v2/release.html diff --git a/docs/talks/scipy2019/submission.rst b/docs/talks/scipy2019/submission.rst deleted file mode 100644 index 57fd925b1f..0000000000 --- a/docs/talks/scipy2019/submission.rst +++ /dev/null @@ -1,144 +0,0 @@ -Zarr - scalable storage of tensor data for use in parallel and distributed computing -==================================================================================== - -SciPy 2019 submission. - - -Short summary -------------- - -Many scientific problems involve computing over large N-dimensional -typed arrays of data, and reading or writing data is often the major -bottleneck limiting speed or scalability. The Zarr project is -developing a simple, scalable approach to storage of such data in a -way that is compatible with a range of approaches to distributed and -parallel computing. We describe the Zarr protocol and data storage -format, and the current state of implementations for various -programming languages including Python. We also describe current uses -of Zarr in malaria genomics, the Human Cell Atlas, and the Pangeo -project. - - -Abstract --------- - -Background -~~~~~~~~~~ - -Across a broad range of scientific disciplines, data are naturally -represented and stored as N-dimensional typed arrays, also known as -tensors. The volume of data being generated is outstripping our -ability to analyse it, and scientific communities are looking for ways -to leverage modern multi-core CPUs and distributed computing -platforms, including cloud computing. Retrieval and storage of data is -often the major bottleneck, and new approaches to data storage are -needed to accelerate distributed computations and enable them to scale -on a variety of platforms. - -Methods -~~~~~~~ - -We have designed a new storage format and protocol for tensor data -[1_], and have released an open source Python implementation [2_, -3_]. Our approach builds on data storage concepts from HDF5 [4_], -particularly chunking and compression, and hierarchical organisation -of datasets. Key design goals include: a simple protocol and format -that can be implemented in other programming languages; support for -multiple concurrent readers or writers; support for a variety of -parallel computing environments, from multi-threaded execution on a -single CPU to multi-process execution across a multi-node cluster; -pluggable storage subsystem with support for file systems, key-value -databases and cloud object stores; pluggable encoding subsystem with -support for a variety of modern compressors. - -Results -~~~~~~~ - -We illustrate the use of Zarr with examples from several scientific -domains. Zarr is being used within the Pangeo project [5_], which is -building a community platform for big data geoscience. The Pangeo -community have converted a number of existing climate modelling and -satellite observation datasets to Zarr [6_], and have demonstrated -their use in computations using HPC and cloud computing -environments. Within the MalariaGEN project [7_], Zarr is used to -store genome variation data from next-generation sequencing of natural -populations of malaria parasites and mosquitoes [8_] and these data -are used as input to analyses of the evolution of these organisms in -response to selective pressure from anti-malarial drugs and -insecticides. Zarr is being used within the Human Cell Atlas (HCA) -project [9_], which is building a reference atlas of healthy human -cell types. This project hopes to leverage this information to better -understand the dysregulation of cellular states that underly human -disease. The Human Cell Atlas uses Zarr as the output data format -because it enables the project to easily generate matrices containing -user-selected subsets of cells. - -Conclusions -~~~~~~~~~~~ - -Zarr is generating interest across a range of scientific domains, and -work is ongoing to establish a community process to support further -development of the specifications and implementations in other -programming languages [10_, 11_, 12_] and building interoperability -with a similar project called N5 [13_]. Other packages within the -PyData ecosystem, notably Dask [14_], Xarray [15_] and Intake [16_], -have added capability to read and write Zarr, and together these -packages provide a compelling solution for large scale data science -using Python [17_]. Zarr has recently been presented in several -venues, including a webinar for the ESIP Federation tech dive series -[18_], and a talk at the AGU Fall Meeting 2018 [19_]. - - -References -~~~~~~~~~~ - -.. _1: https://zarr.readthedocs.io/en/stable/spec/v2.html -.. _2: https://github.com/zarr-developers/zarr-python -.. _3: https://github.com/zarr-developers/numcodecs -.. _4: https://www.hdfgroup.org/solutions/hdf5/ -.. _5: https://pangeo.io/ -.. _6: https://pangeo.io/catalog.html -.. _7: https://www.malariagen.net/ -.. _8: http://alimanfoo.github.io/2016/09/21/genotype-compression-benchmark.html -.. _9: https://www.humancellatlas.org/ -.. _10: https://github.com/constantinpape/z5 -.. _11: https://github.com/lasersonlab/ndarray.scala -.. _12: https://github.com/meggart/ZarrNative.jl -.. _13: https://github.com/saalfeldlab/n5 -.. _14: http://docs.dask.org/en/latest/array-creation.html -.. _15: http://xarray.pydata.org/en/stable/io.html -.. _16: https://github.com/ContinuumIO/intake-xarray -.. _17: http://matthewrocklin.com/blog/work/2018/01/22/pangeo-2 -.. _18: http://wiki.esipfed.org/index.php/Interoperability_and_Technology/Tech_Dive_Webinar_Series#8_March.2C_2018:_.22Zarr:_A_simple.2C_open.2C_scalable_solution_for_big_NetCDF.2FHDF_data_on_the_Cloud.22:_Alistair_Miles.2C_University_of_Oxford. -.. _19: https://agu.confex.com/agu/fm18/meetingapp.cgi/Paper/390015 - - -Authors -------- - -Project contributors are listed in alphabetical order by surname. - -* `Ryan Abernathey `_, Columbia University -* `Stephan Balmer `_, Meteotest -* `Ambrose Carr `_, Chan Zuckerberg Initiative -* `Tim Crone `_, Columbia University -* `Martin Durant `_, Anaconda, inc. -* `Jan Funke `_, HHMI Janelia -* `Darren Gallagher `_, Satavia -* `Fabian Gans `_, Max Planck Institute for Biogeochemistry -* `Shikhar Goenka `_, Satavia -* `Joe Hamman `_, NCAR -* `Stephan Hoyer `_, Google -* `Jerome Kelleher `_, University of Oxford -* `John Kirkham `_, HHMI Janelia -* `Alistair Miles `_, University of Oxford -* `Josh Moore `_, University of Dundee -* `Charles Noyes `_, University of Southern California -* `Tarik Onalan `_ -* `Constantin Pape `_, University of Heidelberg -* `Zain Patel `_, University of Cambridge -* `Matthew Rocklin `_, NVIDIA -* `Stephan Saafeld `_, HHMI Janelia -* `Vincent Schut `_, Satelligence -* `Justin Swaney `_, MIT -* `Ryan Williams `_, Chan Zuckerberg Initiative diff --git a/docs/user-guide/arrays.md b/docs/user-guide/arrays.md new file mode 100644 index 0000000000..b91f79a008 --- /dev/null +++ b/docs/user-guide/arrays.md @@ -0,0 +1,688 @@ +# Working with arrays + +## Creating an array + +Zarr has several functions for creating arrays. For example: + +```python +import zarr +store = zarr.storage.MemoryStore() +z = zarr.create_array(store=store, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +z +# +``` + +The code above creates a 2-dimensional array of 32-bit integers with 10000 rows +and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000 +columns (and so there will be 100 chunks in total). The data is written to a +`zarr.storage.MemoryStore` (e.g. an in-memory dict). See +[Persistent arrays](#persistent-arrays) for details on storing arrays in other stores. + +For a complete list of array creation routines see the `zarr` +module documentation. + +## Reading and writing data + +Zarr arrays support a similar interface to [NumPy](https://numpy.org/doc/stable/) +arrays for reading and writing data. For example, the entire array can be filled +with a scalar value: + +```python +z[:] = 42 +``` + +Regions of the array can also be written to, e.g.: + +```python +import numpy as np + +z[0, :] = np.arange(10000) +z[:, 0] = np.arange(10000) +``` + +The contents of the array can be retrieved by slicing, which will load the +requested region into memory as a NumPy array, e.g.: + +```python +z[0, 0] +# array(0, dtype=int32) +z[-1, -1] +# array(42, dtype=int32) +z[0, :] +# array([ 0, 1, 2, ..., 9997, 9998, 9999], +# shape=(10000,), dtype=int32) +z[:, 0] +# array([ 0, 1, 2, ..., 9997, 9998, 9999], +# shape=(10000,), dtype=int32) +z[:] +# array([[ 0, 1, 2, ..., 9997, 9998, 9999], +# [ 1, 42, 42, ..., 42, 42, 42], +# [ 2, 42, 42, ..., 42, 42, 42], +# ..., +# [9997, 42, 42, ..., 42, 42, 42], +# [9998, 42, 42, ..., 42, 42, 42], +# [9999, 42, 42, ..., 42, 42, 42]], +# shape=(10000, 10000), dtype=int32) +``` + +Read more about NumPy-style indexing can be found in the +[NumPy documentation](https://numpy.org/doc/stable/user/basics.indexing.html). + +## Persistent arrays + +In the examples above, compressed data for each chunk of the array was stored in +main memory. Zarr arrays can also be stored on a file system, enabling +persistence of data between sessions. To do this, we can change the store +argument to point to a filesystem path: + +```python +z1 = zarr.create_array(store='data/example-1.zarr', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +``` + +The array above will store its configuration metadata and all compressed chunk +data in a directory called `'data/example-1.zarr'` relative to the current working +directory. The `zarr.create_array` function provides a convenient way +to create a new persistent array or continue working with an existing +array. Note, there is no need to close an array: data are automatically +flushed to disk, and files are automatically closed whenever an array is modified. + +Persistent arrays support the same interface for reading and writing data, +e.g.: + +```python +z1[:] = 42 +z1[0, :] = np.arange(10000) +z1[:, 0] = np.arange(10000) +``` + +Check that the data have been written and can be read again: + +```python +z2 = zarr.open_array('data/example-1.zarr', mode='r') +np.all(z1[:] == z2[:]) +# np.True_ +``` + +If you are just looking for a fast and convenient way to save NumPy arrays to +disk then load back into memory later, the functions +`zarr.save` and `zarr.load` may be +useful. E.g.: + +```python +a = np.arange(10) +zarr.save('data/example-2.zarr', a) +zarr.load('data/example-2.zarr') +# array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) +``` + +Please note that there are a number of other options for persistent array +storage, see the Storage Guide for more details. + +## Resizing and appending + +A Zarr array can be resized, which means that any of its dimensions can be +increased or decreased in length. For example: + +```python +z = zarr.create_array(store='data/example-3.zarr', shape=(10000, 10000), dtype='int32',chunks=(1000, 1000)) +z[:] = 42 +z.shape +# (10000, 10000) +z.resize((20000, 10000)) +z.shape +# (20000, 10000) +``` + +Note that when an array is resized, the underlying data are not rearranged in +any way. If one or more dimensions are shrunk, any chunks falling outside the +new array shape will be deleted from the underlying store. + +`zarr.Array.append` is provided as a convenience function, which can be +used to append data to any axis. E.g.: + +```python +a = np.arange(10000000, dtype='int32').reshape(10000, 1000) +z = zarr.create_array(store='data/example-4.zarr', shape=a.shape, dtype=a.dtype, chunks=(1000, 100)) +z[:] = a +z.shape +# (10000, 1000) +z.append(a) +# (20000, 1000) +z.append(np.vstack([a, a]), axis=1) +# (20000, 2000) +z.shape +# (20000, 2000) +``` + +## Compressors + +A number of different compressors can be used with Zarr. Zarr includes Blosc, +Zstandard and Gzip compressors. Additional compressors are available through +a separate package called [NumCodecs](https://numcodecs.readthedocs.io/) which provides various +compressor libraries including LZ4, Zlib, BZ2 and LZMA. +Different compressors can be provided via the `compressors` keyword +argument accepted by all array creation functions. For example: + +```python +compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=3, shuffle=zarr.codecs.BloscShuffle.bitshuffle) +data = np.arange(100000000, dtype='int32').reshape(10000, 10000) +z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) +z[:] = data +z.compressors +# (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) +``` + +This array above will use Blosc as the primary compressor, using the Zstandard +algorithm (compression level 3) internally within Blosc, and with the +bit-shuffle filter applied. + +When using a compressor, it can be useful to get some diagnostics on the +compression ratio. Zarr arrays provide the `zarr.Array.info` property +which can be used to print useful diagnostics, e.g.: + +```python +z.info +# Type : Array +# Zarr format : 3 +# Data type : DataType.int32 +# Fill value : 0 +# Shape : (10000, 10000) +# Chunk shape : (1000, 1000) +# Order : C +# Read-only : False +# Store type : LocalStore +# Filters : () +# Serializer : BytesCodec(endian=) +# Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) +# No. bytes : 400000000 (381.5M) +``` + +The `zarr.Array.info_complete` method inspects the underlying store and +prints additional diagnostics, e.g.: + +```python +z.info_complete() +# Type : Array +# Zarr format : 3 +# Data type : DataType.int32 +# Fill value : 0 +# Shape : (10000, 10000) +# Chunk shape : (1000, 1000) +# Order : C +# Read-only : False +# Store type : LocalStore +# Filters : () +# Serializer : BytesCodec(endian=) +# Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) +# No. bytes : 400000000 (381.5M) +# No. bytes stored : 3558573 +# Storage ratio : 112.4 +# Chunks Initialized : 100 +``` + +!!! note + `zarr.Array.info_complete` will inspect the underlying store and may + be slow for large arrays. Use `zarr.Array.info` if detailed storage + statistics are not needed. + +If you don't specify a compressor, by default Zarr uses the Zstandard +compressor. + +In addition to Blosc and Zstandard, other compression libraries can also be used. For example, +here is an array using Gzip compression, level 1: + +```python +data = np.arange(100000000, dtype='int32').reshape(10000, 10000) +z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1)) +z[:] = data +z.compressors +# (GzipCodec(level=1),) +``` + +Here is an example using LZMA from [NumCodecs](https://numcodecs.readthedocs.io/) with a custom filter pipeline including LZMA's +built-in delta filter: + +```python +import lzma +from numcodecs.zarr3 import LZMA + +lzma_filters = [dict(id=lzma.FILTER_DELTA, dist=4), dict(id=lzma.FILTER_LZMA2, preset=1)] +compressors = LZMA(filters=lzma_filters) +data = np.arange(100000000, dtype='int32').reshape(10000, 10000) +z = zarr.create_array(store='data/example-7.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) +z.compressors +# (LZMA(codec_name='numcodecs.lzma', codec_config={'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]}),) +``` + +The default compressor can be changed by setting the value of the using Zarr's +configuration system, e.g.: + +```python +with zarr.config.set({'array.v2_default_compressor.numeric': {'id': 'blosc'}}): + z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2) +z.filters +# () +z.compressors +# (Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),) +``` + +To disable compression, set `compressors=None` when creating an array, e.g.: + +```python +z = zarr.create_array(store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None) +z.compressors +# () +``` + +## Filters + +In some cases, compression can be improved by transforming the data in some +way. For example, if nearby values tend to be correlated, then shuffling the +bytes within each numerical value or storing the difference between adjacent +values may increase compression ratio. Some compressors provide built-in filters +that apply transformations to the data prior to compression. For example, the +Blosc compressor has built-in implementations of byte- and bit-shuffle filters, +and the LZMA compressor has a built-in implementation of a delta +filter. However, to provide additional flexibility for implementing and using +filters in combination with different compressors, Zarr also provides a +mechanism for configuring filters outside of the primary compressor. + +Here is an example using a delta filter with the Blosc compressor: + +```python +from numcodecs.zarr3 import Delta + +filters = [Delta(dtype='int32')] +compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=1, shuffle=zarr.codecs.BloscShuffle.shuffle) +data = np.arange(100000000, dtype='int32').reshape(10000, 10000) +z = zarr.create_array(store='data/example-9.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), filters=filters, compressors=compressors) +z.info +# Type : Array +# Zarr format : 3 +# Data type : DataType.int32 +# Fill value : 0 +# Shape : (10000, 10000) +# Chunk shape : (1000, 1000) +# Order : C +# Read-only : False +# Store type : LocalStore +# Filters : (Delta(codec_name='numcodecs.delta', codec_config={'dtype': 'int32'}),) +# Serializer : BytesCodec(endian=) +# Compressors : (BloscCodec(typesize=4, cname=, clevel=1, shuffle=, blocksize=0),) +# No. bytes : 400000000 (381.5M) +``` + +For more information about available filter codecs, see the [Numcodecs](https://numcodecs.readthedocs.io/) documentation. + +## Advanced indexing + +Zarr arrays support several methods for advanced or "fancy" +indexing, which enable a subset of data items to be extracted or updated in an +array without loading the entire array into memory. + +Note that although this functionality is similar to some of the advanced +indexing capabilities available on NumPy arrays and on h5py datasets, **the Zarr +API for advanced indexing is different from both NumPy and h5py**, so please +read this section carefully. For a complete description of the indexing API, +see the documentation for the `zarr.Array` class. + +### Indexing with coordinate arrays + +Items from a Zarr array can be extracted by providing an integer array of +coordinates. E.g.: + +```python +data = np.arange(10) ** 2 +z = zarr.create_array(store='data/example-10.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +z[:] +# array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) +z.get_coordinate_selection([2, 5]) +# array([ 4, 25]) +``` + +Coordinate arrays can also be used to update data, e.g.: + +```python +z.set_coordinate_selection([2, 5], [-1, -2]) +z[:] +# array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) +``` + +For multidimensional arrays, coordinates must be provided for each dimension, +e.g.: + +```python +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-11.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +z[:] +# array([[ 0, 1, 2, 3, 4], +# [ 5, 6, 7, 8, 9], +# [10, 11, 12, 13, 14]]) +z.get_coordinate_selection(([0, 2], [1, 3])) +# array([ 1, 13]) +z.set_coordinate_selection(([0, 2], [1, 3]), [-1, -2]) +z[:] +# array([[ 0, -1, 2, 3, 4], +# [ 5, 6, 7, 8, 9], +# [10, 11, 12, -2, 14]]) +``` + +For convenience, coordinate indexing is also available via the `vindex` +property, as well as the square bracket operator, e.g.: + +```python +z.vindex[[0, 2], [1, 3]] +# array([-1, -2]) +z.vindex[[0, 2], [1, 3]] = [-3, -4] +z[:] +# array([[ 0, -3, 2, 3, 4], +# [ 5, 6, 7, 8, 9], +# [10, 11, 12, -4, 14]]) +z[[0, 2], [1, 3]] +# array([-3, -4]) +``` + +When the indexing arrays have different shapes, they are broadcast together. +That is, the following two calls are equivalent: + +```python +z[1, [1, 3]] +# array([6, 8]) +z[[1, 1], [1, 3]] +# array([6, 8]) +``` + +### Indexing with a mask array + +Items can also be extracted by providing a Boolean mask. E.g.: + +```python +data = np.arange(10) ** 2 +z = zarr.create_array(store='data/example-12.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +z[:] +# array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) +sel = np.zeros_like(z, dtype=bool) +sel[2] = True +sel[5] = True +z.get_mask_selection(sel) +# array([ 4, 25]) +z.set_mask_selection(sel, [-1, -2]) +z[:] +# array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) +``` + +Here's a multidimensional example: + +```python +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-13.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +z[:] +# array([[ 0, 1, 2, 3, 4], +# [ 5, 6, 7, 8, 9], +# [10, 11, 12, 13, 14]]) +sel = np.zeros_like(z, dtype=bool) +sel[0, 1] = True +sel[2, 3] = True +z.get_mask_selection(sel) +# array([ 1, 13]) +z.set_mask_selection(sel, [-1, -2]) +z[:] +# array([[ 0, -1, 2, 3, 4], +# [ 5, 6, 7, 8, 9], +# [10, 11, 12, -2, 14]]) +``` + +For convenience, mask indexing is also available via the `vindex` property, +e.g.: + +```python +z.vindex[sel] +# array([-1, -2]) +z.vindex[sel] = [-3, -4] +z[:] +# array([[ 0, -3, 2, 3, 4], +# [ 5, 6, 7, 8, 9], +# [10, 11, 12, -4, 14]]) +``` + +Mask indexing is conceptually the same as coordinate indexing, and is +implemented internally via the same machinery. Both styles of indexing allow +selecting arbitrary items from an array, also known as point selection. + +### Orthogonal indexing + +Zarr arrays also support methods for orthogonal indexing, which allows +selections to be made along each dimension of an array independently. For +example, this allows selecting a subset of rows and/or columns from a +2-dimensional array. E.g.: + +```python +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-14.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +z[:] +# array([[ 0, 1, 2, 3, 4], +# [ 5, 6, 7, 8, 9], +# [10, 11, 12, 13, 14]]) +z.get_orthogonal_selection(([0, 2], slice(None))) # select first and third rows +# array([[ 0, 1, 2, 3, 4], +# [10, 11, 12, 13, 14]]) +z.get_orthogonal_selection((slice(None), [1, 3])) # select second and fourth columns +# array([[ 1, 3], +# [ 6, 8], +# [11, 13]]) +z.get_orthogonal_selection(([0, 2], [1, 3])) # select rows [0, 2] and columns [1, 4] +# array([[ 1, 3], +# [11, 13]]) +``` + +Data can also be modified, e.g.: + +```python +z.set_orthogonal_selection(([0, 2], [1, 3]), [[-1, -2], [-3, -4]]) +``` + +For convenience, the orthogonal indexing functionality is also available via the +`oindex` property, e.g.: + +```python +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-15.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +z.oindex[[0, 2], :] # select first and third rows +# array([[ 0, 1, 2, 3, 4], +# [10, 11, 12, 13, 14]]) +z.oindex[:, [1, 3]] # select second and fourth columns +# array([[ 1, 3], +# [ 6, 8], +# [11, 13]]) +z.oindex[[0, 2], [1, 3]] # select rows [0, 2] and columns [1, 4] +# array([[ 1, 3], +# [11, 13]]) +z.oindex[[0, 2], [1, 3]] = [[-1, -2], [-3, -4]] +z[:] +# array([[ 0, -1, 2, -2, 4], +# [ 5, 6, 7, 8, 9], +# [10, -3, 12, -4, 14]]) +``` + +Any combination of integer, slice, 1D integer array and/or 1D Boolean array can +be used for orthogonal indexing. + +If the index contains at most one iterable, and otherwise contains only slices and integers, +orthogonal indexing is also available directly on the array: + +```python +data = np.arange(15).reshape(3, 5) +z = zarr.create_array(store='data/example-16.zarr', shape=data.shape, dtype=data.dtype) +z[:] = data +np.all(z.oindex[[0, 2], :] == z[[0, 2], :]) +# np.True_ +``` + +### Block Indexing + +Zarr also support block indexing, which allows selections of whole chunks based on their +logical indices along each dimension of an array. For example, this allows selecting +a subset of chunk aligned rows and/or columns from a 2-dimensional array. E.g.: + +```python +data = np.arange(100).reshape(10, 10) +z = zarr.create_array(store='data/example-17.zarr', shape=data.shape, dtype=data.dtype, chunks=(3, 3)) +z[:] = data +``` + +Retrieve items by specifying their block coordinates: + +```python +z.get_block_selection(1) +# array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], +# [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], +# [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) +``` + +Equivalent slicing: + +```python +z[3:6] +# array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], +# [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], +# [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) +``` + +For convenience, the block selection functionality is also available via the +`blocks` property, e.g.: + +```python +z.blocks[1] +# array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], +# [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], +# [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) +``` + +Block index arrays may be multidimensional to index multidimensional arrays. +For example: + +```python +z.blocks[0, 1:3] +# array([[ 3, 4, 5, 6, 7, 8], +# [13, 14, 15, 16, 17, 18], +# [23, 24, 25, 26, 27, 28]]) +``` + +Data can also be modified. Let's start by a simple 2D array: + +```python +z = zarr.create_array(store='data/example-18.zarr', shape=(6, 6), dtype=int, chunks=(2, 2)) +``` + +Set data for a selection of items: + +```python +z.set_block_selection((1, 0), 1) +z[...] +# array([[0, 0, 0, 0, 0, 0], +# [0, 0, 0, 0, 0, 0], +# [1, 1, 0, 0, 0, 0], +# [1, 1, 0, 0, 0, 0], +# [0, 0, 0, 0, 0, 0], +# [0, 0, 0, 0, 0, 0]]) +``` + +For convenience, this functionality is also available via the `blocks` property. +E.g.: + +```python +z.blocks[:, 2] = 7 +z[...] +# array([[0, 0, 0, 0, 7, 7], +# [0, 0, 0, 0, 7, 7], +# [1, 1, 0, 0, 7, 7], +# [1, 1, 0, 0, 7, 7], +# [0, 0, 0, 0, 7, 7], +# [0, 0, 0, 0, 7, 7]]) +``` + +Any combination of integer and slice can be used for block indexing: + +```python +z.blocks[2, 1:3] +# array([[0, 0, 7, 7], +# [0, 0, 7, 7]]) + +root = zarr.create_group('data/example-19.zarr') +foo = root.create_array(name='foo', shape=(1000, 100), chunks=(10, 10), dtype='float32') +bar = root.create_array(name='foo/bar', shape=(100,), dtype='int32') +foo[:, :] = np.random.random((1000, 100)) +bar[:] = np.arange(100) +root.tree() +# / +# └── foo (1000, 100) float32 +``` + +## Sharding + +Using small chunk shapes in very large arrays can lead to a very large number of chunks. +This can become a performance issue for file systems and object storage. +With Zarr format 3, a new sharding feature has been added to address this issue. + +With sharding, multiple chunks can be stored in a single storage object (e.g. a file). +Within a shard, chunks are compressed and serialized separately. +This allows individual chunks to be read independently. +However, when writing data, a full shard must be written in one go for optimal +performance and to avoid concurrency issues. +That means that shards are the units of writing and chunks are the units of reading. +Users need to configure the chunk and shard shapes accordingly. + +Sharded arrays can be created by providing the `shards` parameter to `zarr.create_array`. + +```python +a = zarr.create_array('data/example-20.zarr', shape=(10000, 10000), shards=(1000, 1000), chunks=(100, 100), dtype='uint8') +a[:] = (np.arange(10000 * 10000) % 256).astype('uint8').reshape(10000, 10000) +a.info_complete() +# Type : Array +# Zarr format : 3 +# Data type : DataType.uint8 +# Fill value : 0 +# Shape : (10000, 10000) +# Shard shape : (1000, 1000) +# Chunk shape : (100, 100) +# Order : C +# Read-only : False +# Store type : LocalStore +# Filters : () +# Serializer : BytesCodec(endian=) +# Compressors : (ZstdCodec(level=0, checksum=False),) +# No. bytes : 100000000 (95.4M) +# No. bytes stored : 3981552 +# Storage ratio : 25.1 +# Shards Initialized : 100 +``` + +In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used. +This means that 10*10 chunks are stored in each shard, and there are 10*10 shards in total. +Without the `shards` argument, there would be 10,000 chunks stored as individual files. + +## Missing features in 3.0 + +The following features have not been ported to 3.0 yet. + +### Object arrays + +See the Zarr-Python 2 documentation on [Object arrays](https://zarr.readthedocs.io/en/support-v2/tutorial.html#object-arrays) for more details. + +### Fixed-length string arrays + +See the Zarr-Python 2 documentation on [Fixed-length string arrays](https://zarr.readthedocs.io/en/support-v2/tutorial.html#string-arrays) for more details. + +### Datetime and Timedelta arrays + +See the Zarr-Python 2 documentation on [Datetime and Timedelta](https://zarr.readthedocs.io/en/support-v2/tutorial.html#datetimes-and-timedeltas) for more details. + +### Copying and migrating data + +See the Zarr-Python 2 documentation on [Copying and migrating data](https://zarr.readthedocs.io/en/support-v2/tutorial.html#copying-migrating-data) for more details. diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst deleted file mode 100644 index 5bd6b1500f..0000000000 --- a/docs/user-guide/arrays.rst +++ /dev/null @@ -1,658 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - -.. _user-guide-arrays: - -Working with arrays -=================== - -Creating an array ------------------ - -Zarr has several functions for creating arrays. For example:: - - >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> z = zarr.create_array(store=store, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - >>> z - - -The code above creates a 2-dimensional array of 32-bit integers with 10000 rows -and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000 -columns (and so there will be 100 chunks in total). The data is written to a -:class:`zarr.storage.MemoryStore` (e.g. an in-memory dict). See -:ref:`user-guide-persist` for details on storing arrays in other stores. - -For a complete list of array creation routines see the :mod:`zarr` -module documentation. - -.. _user-guide-array: - -Reading and writing data ------------------------- - -Zarr arrays support a similar interface to `NumPy `_ -arrays for reading and writing data. For example, the entire array can be filled -with a scalar value:: - - >>> z[:] = 42 - -Regions of the array can also be written to, e.g.:: - - >>> import numpy as np - >>> - >>> z[0, :] = np.arange(10000) - >>> z[:, 0] = np.arange(10000) - -The contents of the array can be retrieved by slicing, which will load the -requested region into memory as a NumPy array, e.g.:: - - >>> z[0, 0] - array(0, dtype=int32) - >>> z[-1, -1] - array(42, dtype=int32) - >>> z[0, :] - array([ 0, 1, 2, ..., 9997, 9998, 9999], - shape=(10000,), dtype=int32) - >>> z[:, 0] - array([ 0, 1, 2, ..., 9997, 9998, 9999], - shape=(10000,), dtype=int32) - >>> z[:] - array([[ 0, 1, 2, ..., 9997, 9998, 9999], - [ 1, 42, 42, ..., 42, 42, 42], - [ 2, 42, 42, ..., 42, 42, 42], - ..., - [9997, 42, 42, ..., 42, 42, 42], - [9998, 42, 42, ..., 42, 42, 42], - [9999, 42, 42, ..., 42, 42, 42]], - shape=(10000, 10000), dtype=int32) - -Read more about NumPy-style indexing can be found in the -`NumPy documentation `_. - -.. _user-guide-persist: - -Persistent arrays ------------------ - -In the examples above, compressed data for each chunk of the array was stored in -main memory. Zarr arrays can also be stored on a file system, enabling -persistence of data between sessions. To do this, we can change the store -argument to point to a filesystem path:: - - >>> z1 = zarr.create_array(store='data/example-1.zarr', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - -The array above will store its configuration metadata and all compressed chunk -data in a directory called ``'data/example-1.zarr'`` relative to the current working -directory. The :func:`zarr.create_array` function provides a convenient way -to create a new persistent array or continue working with an existing -array. Note, there is no need to close an array: data are automatically -flushed to disk, and files are automatically closed whenever an array is modified. - -Persistent arrays support the same interface for reading and writing data, -e.g.:: - - >>> z1[:] = 42 - >>> z1[0, :] = np.arange(10000) - >>> z1[:, 0] = np.arange(10000) - -Check that the data have been written and can be read again:: - - >>> z2 = zarr.open_array('data/example-1.zarr', mode='r') - >>> np.all(z1[:] == z2[:]) - np.True_ - -If you are just looking for a fast and convenient way to save NumPy arrays to -disk then load back into memory later, the functions -:func:`zarr.save` and :func:`zarr.load` may be -useful. E.g.:: - - >>> a = np.arange(10) - >>> zarr.save('data/example-2.zarr', a) - >>> zarr.load('data/example-2.zarr') - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - -Please note that there are a number of other options for persistent array -storage, see the :ref:`Storage Guide ` guide for more details. - -.. _user-guide-resize: - -Resizing and appending ----------------------- - -A Zarr array can be resized, which means that any of its dimensions can be -increased or decreased in length. For example:: - - >>> z = zarr.create_array(store='data/example-3.zarr', shape=(10000, 10000), dtype='int32',chunks=(1000, 1000)) - >>> z[:] = 42 - >>> z.shape - (10000, 10000) - >>> z.resize((20000, 10000)) - >>> z.shape - (20000, 10000) - -Note that when an array is resized, the underlying data are not rearranged in -any way. If one or more dimensions are shrunk, any chunks falling outside the -new array shape will be deleted from the underlying store. - -:func:`zarr.Array.append` is provided as a convenience function, which can be -used to append data to any axis. E.g.:: - - >>> a = np.arange(10000000, dtype='int32').reshape(10000, 1000) - >>> z = zarr.create_array(store='data/example-4.zarr', shape=a.shape, dtype=a.dtype, chunks=(1000, 100)) - >>> z[:] = a - >>> z.shape - (10000, 1000) - >>> z.append(a) - (20000, 1000) - >>> z.append(np.vstack([a, a]), axis=1) - (20000, 2000) - >>> z.shape - (20000, 2000) - -.. _user-guide-compress: - -Compressors ------------ - -A number of different compressors can be used with Zarr. Zarr includes Blosc, -Zstandard and Gzip compressors. Additional compressors are available through -a separate package called NumCodecs_ which provides various -compressor libraries including LZ4, Zlib, BZ2 and LZMA. -Different compressors can be provided via the ``compressors`` keyword -argument accepted by all array creation functions. For example:: - - >>> compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=3, shuffle=zarr.codecs.BloscShuffle.bitshuffle) - >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000) - >>> z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) - >>> z[:] = data - >>> z.compressors - (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) - -This array above will use Blosc as the primary compressor, using the Zstandard -algorithm (compression level 3) internally within Blosc, and with the -bit-shuffle filter applied. - -When using a compressor, it can be useful to get some diagnostics on the -compression ratio. Zarr arrays provide the :attr:`zarr.Array.info` property -which can be used to print useful diagnostics, e.g.:: - - >>> z.info - Type : Array - Zarr format : 3 - Data type : DataType.int32 - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : C - Read-only : False - Store type : LocalStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) - No. bytes : 400000000 (381.5M) - -The :func:`zarr.Array.info_complete` method inspects the underlying store and -prints additional diagnostics, e.g.:: - - >>> z.info_complete() - Type : Array - Zarr format : 3 - Data type : DataType.int32 - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : C - Read-only : False - Store type : LocalStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) - No. bytes : 400000000 (381.5M) - No. bytes stored : 3558573 - Storage ratio : 112.4 - Chunks Initialized : 100 - -.. note:: - :func:`zarr.Array.info_complete` will inspect the underlying store and may - be slow for large arrays. Use :attr:`zarr.Array.info` if detailed storage - statistics are not needed. - -If you don't specify a compressor, by default Zarr uses the Zstandard -compressor. - -In addition to Blosc and Zstandard, other compression libraries can also be used. For example, -here is an array using Gzip compression, level 1:: - - >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000) - >>> z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1)) - >>> z[:] = data - >>> z.compressors - (GzipCodec(level=1),) - -Here is an example using LZMA from NumCodecs_ with a custom filter pipeline including LZMA's -built-in delta filter:: - - >>> import lzma - >>> from numcodecs.zarr3 import LZMA - >>> - >>> lzma_filters = [dict(id=lzma.FILTER_DELTA, dist=4), dict(id=lzma.FILTER_LZMA2, preset=1)] - >>> compressors = LZMA(filters=lzma_filters) - >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000) - >>> z = zarr.create_array(store='data/example-7.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) - >>> z.compressors - (LZMA(codec_name='numcodecs.lzma', codec_config={'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]}),) - -The default compressor can be changed by setting the value of the using Zarr's -:ref:`user-guide-config`, e.g.:: - - >>> with zarr.config.set({'array.v2_default_compressor.numeric': {'id': 'blosc'}}): - ... z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2) - >>> z.filters - () - >>> z.compressors - (Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),) - -To disable compression, set ``compressors=None`` when creating an array, e.g.:: - - >>> z = zarr.create_array(store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None) - >>> z.compressors - () - -.. _user-guide-filters: - -Filters -------- - -In some cases, compression can be improved by transforming the data in some -way. For example, if nearby values tend to be correlated, then shuffling the -bytes within each numerical value or storing the difference between adjacent -values may increase compression ratio. Some compressors provide built-in filters -that apply transformations to the data prior to compression. For example, the -Blosc compressor has built-in implementations of byte- and bit-shuffle filters, -and the LZMA compressor has a built-in implementation of a delta -filter. However, to provide additional flexibility for implementing and using -filters in combination with different compressors, Zarr also provides a -mechanism for configuring filters outside of the primary compressor. - -Here is an example using a delta filter with the Blosc compressor:: - - >>> from numcodecs.zarr3 import Delta - >>> - >>> filters = [Delta(dtype='int32')] - >>> compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=1, shuffle=zarr.codecs.BloscShuffle.shuffle) - >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000) - >>> z = zarr.create_array(store='data/example-9.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), filters=filters, compressors=compressors) - >>> z.info - Type : Array - Zarr format : 3 - Data type : DataType.int32 - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : C - Read-only : False - Store type : LocalStore - Filters : (Delta(codec_name='numcodecs.delta', codec_config={'dtype': 'int32'}),) - Serializer : BytesCodec(endian=) - Compressors : (BloscCodec(typesize=4, cname=, clevel=1, shuffle=, blocksize=0),) - No. bytes : 400000000 (381.5M) - -For more information about available filter codecs, see the `Numcodecs -`_ documentation. - -.. _user-guide-indexing: - -Advanced indexing ------------------ - -Zarr arrays support several methods for advanced or "fancy" -indexing, which enable a subset of data items to be extracted or updated in an -array without loading the entire array into memory. - -Note that although this functionality is similar to some of the advanced -indexing capabilities available on NumPy arrays and on h5py datasets, **the Zarr -API for advanced indexing is different from both NumPy and h5py**, so please -read this section carefully. For a complete description of the indexing API, -see the documentation for the :class:`zarr.Array` class. - -Indexing with coordinate arrays -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Items from a Zarr array can be extracted by providing an integer array of -coordinates. E.g.:: - - >>> data = np.arange(10) ** 2 - >>> z = zarr.create_array(store='data/example-10.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) - >>> z.get_coordinate_selection([2, 5]) - array([ 4, 25]) - -Coordinate arrays can also be used to update data, e.g.:: - - >>> z.set_coordinate_selection([2, 5], [-1, -2]) - >>> z[:] - array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) - -For multidimensional arrays, coordinates must be provided for each dimension, -e.g.:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-11.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([[ 0, 1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, 13, 14]]) - >>> z.get_coordinate_selection(([0, 2], [1, 3])) - array([ 1, 13]) - >>> z.set_coordinate_selection(([0, 2], [1, 3]), [-1, -2]) - >>> z[:] - array([[ 0, -1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, -2, 14]]) - -For convenience, coordinate indexing is also available via the ``vindex`` -property, as well as the square bracket operator, e.g.:: - - >>> z.vindex[[0, 2], [1, 3]] - array([-1, -2]) - >>> z.vindex[[0, 2], [1, 3]] = [-3, -4] - >>> z[:] - array([[ 0, -3, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, -4, 14]]) - >>> z[[0, 2], [1, 3]] - array([-3, -4]) - -When the indexing arrays have different shapes, they are broadcast together. -That is, the following two calls are equivalent:: - - >>> z[1, [1, 3]] - array([6, 8]) - >>> z[[1, 1], [1, 3]] - array([6, 8]) - -Indexing with a mask array -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Items can also be extracted by providing a Boolean mask. E.g.:: - - >>> data = np.arange(10) ** 2 - >>> z = zarr.create_array(store='data/example-12.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) - >>> sel = np.zeros_like(z, dtype=bool) - >>> sel[2] = True - >>> sel[5] = True - >>> z.get_mask_selection(sel) - array([ 4, 25]) - >>> z.set_mask_selection(sel, [-1, -2]) - >>> z[:] - array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) - -Here's a multidimensional example:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-13.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([[ 0, 1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, 13, 14]]) - >>> sel = np.zeros_like(z, dtype=bool) - >>> sel[0, 1] = True - >>> sel[2, 3] = True - >>> z.get_mask_selection(sel) - array([ 1, 13]) - >>> z.set_mask_selection(sel, [-1, -2]) - >>> z[:] - array([[ 0, -1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, -2, 14]]) - -For convenience, mask indexing is also available via the ``vindex`` property, -e.g.:: - - >>> z.vindex[sel] - array([-1, -2]) - >>> z.vindex[sel] = [-3, -4] - >>> z[:] - array([[ 0, -3, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, -4, 14]]) - -Mask indexing is conceptually the same as coordinate indexing, and is -implemented internally via the same machinery. Both styles of indexing allow -selecting arbitrary items from an array, also known as point selection. - -Orthogonal indexing -~~~~~~~~~~~~~~~~~~~ - -Zarr arrays also support methods for orthogonal indexing, which allows -selections to be made along each dimension of an array independently. For -example, this allows selecting a subset of rows and/or columns from a -2-dimensional array. E.g.:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-14.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z[:] - array([[ 0, 1, 2, 3, 4], - [ 5, 6, 7, 8, 9], - [10, 11, 12, 13, 14]]) - >>> z.get_orthogonal_selection(([0, 2], slice(None))) # select first and third rows - array([[ 0, 1, 2, 3, 4], - [10, 11, 12, 13, 14]]) - >>> z.get_orthogonal_selection((slice(None), [1, 3])) # select second and fourth columns - array([[ 1, 3], - [ 6, 8], - [11, 13]]) - >>> z.get_orthogonal_selection(([0, 2], [1, 3])) # select rows [0, 2] and columns [1, 4] - array([[ 1, 3], - [11, 13]]) - -Data can also be modified, e.g.:: - - >>> z.set_orthogonal_selection(([0, 2], [1, 3]), [[-1, -2], [-3, -4]]) - -For convenience, the orthogonal indexing functionality is also available via the -``oindex`` property, e.g.:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-15.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> z.oindex[[0, 2], :] # select first and third rows - array([[ 0, 1, 2, 3, 4], - [10, 11, 12, 13, 14]]) - >>> z.oindex[:, [1, 3]] # select second and fourth columns - array([[ 1, 3], - [ 6, 8], - [11, 13]]) - >>> z.oindex[[0, 2], [1, 3]] # select rows [0, 2] and columns [1, 4] - array([[ 1, 3], - [11, 13]]) - >>> z.oindex[[0, 2], [1, 3]] = [[-1, -2], [-3, -4]] - >>> z[:] - array([[ 0, -1, 2, -2, 4], - [ 5, 6, 7, 8, 9], - [10, -3, 12, -4, 14]]) - -Any combination of integer, slice, 1D integer array and/or 1D Boolean array can -be used for orthogonal indexing. - -If the index contains at most one iterable, and otherwise contains only slices and integers, -orthogonal indexing is also available directly on the array:: - - >>> data = np.arange(15).reshape(3, 5) - >>> z = zarr.create_array(store='data/example-16.zarr', shape=data.shape, dtype=data.dtype) - >>> z[:] = data - >>> np.all(z.oindex[[0, 2], :] == z[[0, 2], :]) - np.True_ - -Block Indexing -~~~~~~~~~~~~~~ - -Zarr also support block indexing, which allows selections of whole chunks based on their -logical indices along each dimension of an array. For example, this allows selecting -a subset of chunk aligned rows and/or columns from a 2-dimensional array. E.g.:: - - >>> data = np.arange(100).reshape(10, 10) - >>> z = zarr.create_array(store='data/example-17.zarr', shape=data.shape, dtype=data.dtype, chunks=(3, 3)) - >>> z[:] = data - -Retrieve items by specifying their block coordinates:: - - >>> z.get_block_selection(1) - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - -Equivalent slicing:: - - >>> z[3:6] - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - -For convenience, the block selection functionality is also available via the -`blocks` property, e.g.:: - - >>> z.blocks[1] - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - -Block index arrays may be multidimensional to index multidimensional arrays. -For example:: - - >>> z.blocks[0, 1:3] - array([[ 3, 4, 5, 6, 7, 8], - [13, 14, 15, 16, 17, 18], - [23, 24, 25, 26, 27, 28]]) - -Data can also be modified. Let's start by a simple 2D array:: - - >>> z = zarr.create_array(store='data/example-18.zarr', shape=(6, 6), dtype=int, chunks=(2, 2)) - -Set data for a selection of items:: - - >>> z.set_block_selection((1, 0), 1) - >>> z[...] - array([[0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0]]) - -For convenience, this functionality is also available via the ``blocks`` property. -E.g.:: - - >>> z.blocks[:, 2] = 7 - >>> z[...] - array([[0, 0, 0, 0, 7, 7], - [0, 0, 0, 0, 7, 7], - [1, 1, 0, 0, 7, 7], - [1, 1, 0, 0, 7, 7], - [0, 0, 0, 0, 7, 7], - [0, 0, 0, 0, 7, 7]]) - -Any combination of integer and slice can be used for block indexing:: - - >>> z.blocks[2, 1:3] - array([[0, 0, 7, 7], - [0, 0, 7, 7]]) - >>> - >>> root = zarr.create_group('data/example-19.zarr') - >>> foo = root.create_array(name='foo', shape=(1000, 100), chunks=(10, 10), dtype='float32') - >>> bar = root.create_array(name='foo/bar', shape=(100,), dtype='int32') - >>> foo[:, :] = np.random.random((1000, 100)) - >>> bar[:] = np.arange(100) - >>> root.tree() - / - └── foo (1000, 100) float32 - - -.. _user-guide-sharding: - -Sharding --------- - -Using small chunk shapes in very large arrays can lead to a very large number of chunks. -This can become a performance issue for file systems and object storage. -With Zarr format 3, a new sharding feature has been added to address this issue. - -With sharding, multiple chunks can be stored in a single storage object (e.g. a file). -Within a shard, chunks are compressed and serialized separately. -This allows individual chunks to be read independently. -However, when writing data, a full shard must be written in one go for optimal -performance and to avoid concurrency issues. -That means that shards are the units of writing and chunks are the units of reading. -Users need to configure the chunk and shard shapes accordingly. - -Sharded arrays can be created by providing the ``shards`` parameter to :func:`zarr.create_array`. - - >>> a = zarr.create_array('data/example-20.zarr', shape=(10000, 10000), shards=(1000, 1000), chunks=(100, 100), dtype='uint8') - >>> a[:] = (np.arange(10000 * 10000) % 256).astype('uint8').reshape(10000, 10000) - >>> a.info_complete() - Type : Array - Zarr format : 3 - Data type : DataType.uint8 - Fill value : 0 - Shape : (10000, 10000) - Shard shape : (1000, 1000) - Chunk shape : (100, 100) - Order : C - Read-only : False - Store type : LocalStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 100000000 (95.4M) - No. bytes stored : 3981552 - Storage ratio : 25.1 - Shards Initialized : 100 - -In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used. -This means that 10*10 chunks are stored in each shard, and there are 10*10 shards in total. -Without the ``shards`` argument, there would be 10,000 chunks stored as individual files. - -Missing features in 3.0 ------------------------ - - -The following features have not been ported to 3.0 yet. - -.. _user-guide-objects: - -Object arrays -~~~~~~~~~~~~~ - -See the Zarr-Python 2 documentation on `Object arrays `_ for more details. - -.. _user-guide-strings: - -Fixed-length string arrays -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -See the Zarr-Python 2 documentation on `Fixed-length string arrays `_ for more details. - -.. _user-guide-datetime: - -Datetime and Timedelta arrays -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -See the Zarr-Python 2 documentation on `Datetime and Timedelta `_ for more details. - -.. _user-guide-copy: - -Copying and migrating data -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -See the Zarr-Python 2 documentation on `Copying and migrating data `_ for more details. diff --git a/docs/user-guide/attributes.md b/docs/user-guide/attributes.md new file mode 100644 index 0000000000..216720c75f --- /dev/null +++ b/docs/user-guide/attributes.md @@ -0,0 +1,29 @@ +# Working with attributes + +Zarr arrays and groups support custom key/value attributes, which can be useful for +storing application-specific metadata. For example: + +```python +import zarr +store = zarr.storage.MemoryStore() +root = zarr.create_group(store=store) +root.attrs['foo'] = 'bar' +z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32') +z.attrs['baz'] = 42 +z.attrs['qux'] = [1, 4, 7, 12] +sorted(root.attrs) +# ['foo'] +'foo' in root.attrs +# True +root.attrs['foo'] +# 'bar' +sorted(z.attrs) +# ['baz', 'qux'] +z.attrs['baz'] +# 42 +z.attrs['qux'] +# [1, 4, 7, 12] +``` + +Internally Zarr uses JSON to store array attributes, so attribute values must be +JSON serializable. diff --git a/docs/user-guide/attributes.rst b/docs/user-guide/attributes.rst deleted file mode 100644 index ed48623e29..0000000000 --- a/docs/user-guide/attributes.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. _user-guide-attrs: - -Working with attributes -======================= - -Zarr arrays and groups support custom key/value attributes, which can be useful for -storing application-specific metadata. For example:: - - >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> root = zarr.create_group(store=store) - >>> root.attrs['foo'] = 'bar' - >>> z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32') - >>> z.attrs['baz'] = 42 - >>> z.attrs['qux'] = [1, 4, 7, 12] - >>> sorted(root.attrs) - ['foo'] - >>> 'foo' in root.attrs - True - >>> root.attrs['foo'] - 'bar' - >>> sorted(z.attrs) - ['baz', 'qux'] - >>> z.attrs['baz'] - 42 - >>> z.attrs['qux'] - [1, 4, 7, 12] - -Internally Zarr uses JSON to store array attributes, so attribute values must be -JSON serializable. diff --git a/docs/user-guide/config.md b/docs/user-guide/config.md new file mode 100644 index 0000000000..b471a6c536 --- /dev/null +++ b/docs/user-guide/config.md @@ -0,0 +1,92 @@ +# Runtime configuration + +`zarr.config` is responsible for managing the configuration of zarr and +is based on the [donfig](https://github.com/pytroll/donfig) Python library. + +Configuration values can be set using code like the following: + +```python +import zarr + +zarr.config.set({'array.order': 'F'}) +# + +# revert this change so it doesn't impact the rest of the docs +zarr.config.set({'array.order': 'C'}) +# +``` + +Alternatively, configuration values can be set using environment variables, e.g. +`ZARR_ARRAY__ORDER=F`. + +The configuration can also be read from a YAML file in standard locations. +For more information, see the +[donfig documentation](https://donfig.readthedocs.io/en/latest/). + +Configuration options include the following: + +- Default Zarr format `default_zarr_version` +- Default array order in memory `array.order` +- Default filters, serializers and compressors, e.g. `array.v3_default_filters`, `array.v3_default_serializer`, `array.v3_default_compressors`, `array.v2_default_filters` and `array.v2_default_compressor` +- Whether empty chunks are written to storage `array.write_empty_chunks` +- Async and threading options, e.g. `async.concurrency` and `threading.max_workers` +- Selections of implementations of codecs, codec pipelines and buffers +- Enabling GPU support with `zarr.config.enable_gpu()`. See GPU support for more. + +For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, +first register the implementations in the registry and then select them in the config. +For example, an implementation of the bytes codec in a class `'custompackage.NewBytesCodec'`, +requires the value of `codecs.bytes.name` to be `'custompackage.NewBytesCodec'`. + +This is the current default configuration: + +```python +zarr.config.pprint() +# {'array': {'order': 'C', +# 'v2_default_compressor': {'bytes': {'checksum': False, +# 'id': 'zstd', +# 'level': 0}, +# 'numeric': {'checksum': False, +# 'id': 'zstd', +# 'level': 0}, +# 'string': {'checksum': False, +# 'id': 'zstd', +# 'level': 0}}, +# 'v2_default_filters': {'bytes': [{'id': 'vlen-bytes'}], +# 'numeric': None, +# 'raw': None, +# 'string': [{'id': 'vlen-utf8'}]}, +# 'v3_default_compressors': {'bytes': [{'configuration': {'checksum': False, +# 'level': 0}, +# 'name': 'zstd'}], +# 'numeric': [{'configuration': {'checksum': False, +# 'level': 0}, +# 'name': 'zstd'}], +# 'string': [{'configuration': {'checksum': False, +# 'level': 0}, +# 'name': 'zstd'}]}, +# 'v3_default_filters': {'bytes': [], 'numeric': [], 'string': []}, +# 'v3_default_serializer': {'bytes': {'name': 'vlen-bytes'}, +# 'numeric': {'configuration': {'endian': 'little'}, +# 'name': 'bytes'}, +# 'string': {'name': 'vlen-utf8'}}, +# 'write_empty_chunks': False}, +# 'async': {'concurrency': 10, 'timeout': None}, +# 'buffer': 'zarr.core.buffer.cpu.Buffer', +# 'codec_pipeline': {'batch_size': 1, +# 'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'}, +# 'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec', +# 'bytes': 'zarr.codecs.bytes.BytesCodec', +# 'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec', +# 'endian': 'zarr.codecs.bytes.BytesCodec', +# 'gzip': 'zarr.codecs.gzip.GzipCodec', +# 'sharding_indexed': 'zarr.codecs.sharding.ShardingCodec', +# 'transpose': 'zarr.codecs.transpose.TransposeCodec', +# 'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec', +# 'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec', +# 'zstd': 'zarr.codecs.zstd.ZstdCodec'}, +# 'default_zarr_format': 3, +# 'json_indent': 2, +# 'ndbuffer': 'zarr.core.buffer.cpu.NDBuffer', +# 'threading': {'max_workers': None}} +``` diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst deleted file mode 100644 index 91ffe50b91..0000000000 --- a/docs/user-guide/config.rst +++ /dev/null @@ -1,91 +0,0 @@ -.. _user-guide-config: - -Runtime configuration -===================== - -``zarr.config`` is responsible for managing the configuration of zarr and -is based on the `donfig `_ Python library. - -Configuration values can be set using code like the following:: - - >>> import zarr - >>> - >>> zarr.config.set({'array.order': 'F'}) - - >>> - >>> # revert this change so it doesn't impact the rest of the docs - >>> zarr.config.set({'array.order': 'C'}) - - -Alternatively, configuration values can be set using environment variables, e.g. -``ZARR_ARRAY__ORDER=F``. - -The configuration can also be read from a YAML file in standard locations. -For more information, see the -`donfig documentation `_. - -Configuration options include the following: - -- Default Zarr format ``default_zarr_version`` -- Default array order in memory ``array.order`` -- Default filters, serializers and compressors, e.g. ``array.v3_default_filters``, ``array.v3_default_serializer``, ``array.v3_default_compressors``, ``array.v2_default_filters`` and ``array.v2_default_compressor`` -- Whether empty chunks are written to storage ``array.write_empty_chunks`` -- Async and threading options, e.g. ``async.concurrency`` and ``threading.max_workers`` -- Selections of implementations of codecs, codec pipelines and buffers -- Enabling GPU support with ``zarr.config.enable_gpu()``. See :ref:`user-guide-gpu` for more. - -For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, -first register the implementations in the registry and then select them in the config. -For example, an implementation of the bytes codec in a class ``'custompackage.NewBytesCodec'``, -requires the value of ``codecs.bytes.name`` to be ``'custompackage.NewBytesCodec'``. - -This is the current default configuration:: - - >>> zarr.config.pprint() - {'array': {'order': 'C', - 'v2_default_compressor': {'bytes': {'checksum': False, - 'id': 'zstd', - 'level': 0}, - 'numeric': {'checksum': False, - 'id': 'zstd', - 'level': 0}, - 'string': {'checksum': False, - 'id': 'zstd', - 'level': 0}}, - 'v2_default_filters': {'bytes': [{'id': 'vlen-bytes'}], - 'numeric': None, - 'raw': None, - 'string': [{'id': 'vlen-utf8'}]}, - 'v3_default_compressors': {'bytes': [{'configuration': {'checksum': False, - 'level': 0}, - 'name': 'zstd'}], - 'numeric': [{'configuration': {'checksum': False, - 'level': 0}, - 'name': 'zstd'}], - 'string': [{'configuration': {'checksum': False, - 'level': 0}, - 'name': 'zstd'}]}, - 'v3_default_filters': {'bytes': [], 'numeric': [], 'string': []}, - 'v3_default_serializer': {'bytes': {'name': 'vlen-bytes'}, - 'numeric': {'configuration': {'endian': 'little'}, - 'name': 'bytes'}, - 'string': {'name': 'vlen-utf8'}}, - 'write_empty_chunks': False}, - 'async': {'concurrency': 10, 'timeout': None}, - 'buffer': 'zarr.core.buffer.cpu.Buffer', - 'codec_pipeline': {'batch_size': 1, - 'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'}, - 'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec', - 'bytes': 'zarr.codecs.bytes.BytesCodec', - 'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec', - 'endian': 'zarr.codecs.bytes.BytesCodec', - 'gzip': 'zarr.codecs.gzip.GzipCodec', - 'sharding_indexed': 'zarr.codecs.sharding.ShardingCodec', - 'transpose': 'zarr.codecs.transpose.TransposeCodec', - 'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec', - 'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec', - 'zstd': 'zarr.codecs.zstd.ZstdCodec'}, - 'default_zarr_format': 3, - 'json_indent': 2, - 'ndbuffer': 'zarr.core.buffer.cpu.NDBuffer', - 'threading': {'max_workers': None}} diff --git a/docs/user-guide/consolidated_metadata.md b/docs/user-guide/consolidated_metadata.md new file mode 100644 index 0000000000..20c1cf5554 --- /dev/null +++ b/docs/user-guide/consolidated_metadata.md @@ -0,0 +1,117 @@ +# Consolidated metadata + +!!! warning + The Consolidated Metadata feature in Zarr-Python is considered experimental for v3 + stores. [zarr-specs#309](https://github.com/zarr-developers/zarr-specs/pull/309) + has proposed a formal extension to the v3 specification to support consolidated metadata. + +Zarr-Python implements the [Consolidated Metadata](https://github.com/zarr-developers/zarr-specs/pull/309) for v2 and v3 stores. +Consolidated metadata can reduce the time needed to load the metadata for an +entire hierarchy, especially when the metadata is being served over a network. +Consolidated metadata essentially stores all the metadata for a hierarchy in the +metadata of the root Group. + +## Usage + +If consolidated metadata is present in a Zarr Group's metadata then it is used +by default. The initial read to open the group will need to communicate with +the store (reading from a file for a `zarr.storage.LocalStore`, making a +network request for a `zarr.storage.FsspecStore`). After that, any subsequent +metadata reads get child Group or Array nodes will *not* require reads from the store. + +In Python, the consolidated metadata is available on the `.consolidated_metadata` +attribute of the `GroupMetadata` object. + +```python +import zarr + +store = zarr.storage.MemoryStore() +group = zarr.create_group(store=store) +group.create_array(shape=(1,), name='a', dtype='float64') +# +group.create_array(shape=(2, 2), name='b', dtype='float64') +# +group.create_array(shape=(3, 3, 3), name='c', dtype='float64') +# +zarr.consolidate_metadata(store) +# +``` + +If we open that group, the Group's metadata has a `zarr.core.group.ConsolidatedMetadata` +that can be used.: + +```python +consolidated = zarr.open_group(store=store) +consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata +from pprint import pprint +pprint(dict(sorted(consolidated_metadata.items()))) +# {'a': ArrayV3Metadata(shape=(1,), +# data_type=, +# chunk_grid=RegularChunkGrid(chunk_shape=(1,)), +# chunk_key_encoding=DefaultChunkKeyEncoding(name='default', +# separator='/'), +# fill_value=np.float64(0.0), +# codecs=(BytesCodec(endian=), +# ZstdCodec(level=0, checksum=False)), +# attributes={}, +# dimension_names=None, +# zarr_format=3, +# node_type='array', +# storage_transformers=()), +# 'b': ArrayV3Metadata(shape=(2, 2), +# data_type=, +# chunk_grid=RegularChunkGrid(chunk_shape=(2, 2)), +# chunk_key_encoding=DefaultChunkKeyEncoding(name='default', +# separator='/'), +# fill_value=np.float64(0.0), +# codecs=(BytesCodec(endian=), +# ZstdCodec(level=0, checksum=False)), +# attributes={}, +# dimension_names=None, +# zarr_format=3, +# node_type='array', +# storage_transformers=()), +# 'c': ArrayV3Metadata(shape=(3, 3, 3), +# data_type=, +# chunk_grid=RegularChunkGrid(chunk_shape=(3, 3, 3)), +# chunk_key_encoding=DefaultChunkKeyEncoding(name='default', +# separator='/'), +# fill_value=np.float64(0.0), +# codecs=(BytesCodec(endian=), +# ZstdCodec(level=0, checksum=False)), +# attributes={}, +# dimension_names=None, +# zarr_format=3, +# node_type='array', +# storage_transformers=())} +``` + +Operations on the group to get children automatically use the consolidated metadata.: + +```python +consolidated['a'] # no read / HTTP request to the Store is required +# +``` + +With nested groups, the consolidated metadata is available on the children, recursively.: + +```python +child = group.create_group('child', attributes={'kind': 'child'}) +grandchild = child.create_group('child', attributes={'kind': 'grandchild'}) +consolidated = zarr.consolidate_metadata(store) + +consolidated['child'].metadata.consolidated_metadata +# ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False) +``` + +## Synchronization and Concurrency + +Consolidated metadata is intended for read-heavy use cases on slowly changing +hierarchies. For hierarchies where new nodes are constantly being added, +removed, or modified, consolidated metadata may not be desirable. + +1. It will add some overhead to each update operation, since the metadata + would need to be re-consolidated to keep it in sync with the store. +2. Readers using consolidated metadata will regularly see a "past" version + of the metadata, at the time they read the root node with its consolidated + metadata. diff --git a/docs/user-guide/consolidated_metadata.rst b/docs/user-guide/consolidated_metadata.rst deleted file mode 100644 index 3c015dcfca..0000000000 --- a/docs/user-guide/consolidated_metadata.rst +++ /dev/null @@ -1,116 +0,0 @@ -.. _user-guide-consolidated-metadata: - -Consolidated metadata -===================== - -.. warning:: - The Consolidated Metadata feature in Zarr-Python is considered experimental for v3 - stores. `zarr-specs#309 `_ - has proposed a formal extension to the v3 specification to support consolidated metadata. - -Zarr-Python implements the `Consolidated Metadata`_ for v2 and v3 stores. -Consolidated metadata can reduce the time needed to load the metadata for an -entire hierarchy, especially when the metadata is being served over a network. -Consolidated metadata essentially stores all the metadata for a hierarchy in the -metadata of the root Group. - -Usage ------ - -If consolidated metadata is present in a Zarr Group's metadata then it is used -by default. The initial read to open the group will need to communicate with -the store (reading from a file for a :class:`zarr.storage.LocalStore`, making a -network request for a :class:`zarr.storage.FsspecStore`). After that, any subsequent -metadata reads get child Group or Array nodes will *not* require reads from the store. - -In Python, the consolidated metadata is available on the ``.consolidated_metadata`` -attribute of the ``GroupMetadata`` object. - - >>> import zarr - >>> - >>> store = zarr.storage.MemoryStore() - >>> group = zarr.create_group(store=store) - >>> group.create_array(shape=(1,), name='a', dtype='float64') - - >>> group.create_array(shape=(2, 2), name='b', dtype='float64') - - >>> group.create_array(shape=(3, 3, 3), name='c', dtype='float64') - - >>> zarr.consolidate_metadata(store) - - -If we open that group, the Group's metadata has a :class:`zarr.core.group.ConsolidatedMetadata` -that can be used.: - - >>> consolidated = zarr.open_group(store=store) - >>> consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata - >>> from pprint import pprint - >>> pprint(dict(sorted(consolidated_metadata.items()))) - {'a': ArrayV3Metadata(shape=(1,), - data_type=, - chunk_grid=RegularChunkGrid(chunk_shape=(1,)), - chunk_key_encoding=DefaultChunkKeyEncoding(name='default', - separator='/'), - fill_value=np.float64(0.0), - codecs=(BytesCodec(endian=), - ZstdCodec(level=0, checksum=False)), - attributes={}, - dimension_names=None, - zarr_format=3, - node_type='array', - storage_transformers=()), - 'b': ArrayV3Metadata(shape=(2, 2), - data_type=, - chunk_grid=RegularChunkGrid(chunk_shape=(2, 2)), - chunk_key_encoding=DefaultChunkKeyEncoding(name='default', - separator='/'), - fill_value=np.float64(0.0), - codecs=(BytesCodec(endian=), - ZstdCodec(level=0, checksum=False)), - attributes={}, - dimension_names=None, - zarr_format=3, - node_type='array', - storage_transformers=()), - 'c': ArrayV3Metadata(shape=(3, 3, 3), - data_type=, - chunk_grid=RegularChunkGrid(chunk_shape=(3, 3, 3)), - chunk_key_encoding=DefaultChunkKeyEncoding(name='default', - separator='/'), - fill_value=np.float64(0.0), - codecs=(BytesCodec(endian=), - ZstdCodec(level=0, checksum=False)), - attributes={}, - dimension_names=None, - zarr_format=3, - node_type='array', - storage_transformers=())} - -Operations on the group to get children automatically use the consolidated metadata.: - - >>> consolidated['a'] # no read / HTTP request to the Store is required - - -With nested groups, the consolidated metadata is available on the children, recursively.: - - >>> child = group.create_group('child', attributes={'kind': 'child'}) - >>> grandchild = child.create_group('child', attributes={'kind': 'grandchild'}) - >>> consolidated = zarr.consolidate_metadata(store) - >>> - >>> consolidated['child'].metadata.consolidated_metadata - ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False) - -Synchronization and Concurrency -------------------------------- - -Consolidated metadata is intended for read-heavy use cases on slowly changing -hierarchies. For hierarchies where new nodes are constantly being added, -removed, or modified, consolidated metadata may not be desirable. - -1. It will add some overhead to each update operation, since the metadata - would need to be re-consolidated to keep it in sync with the store. -2. Readers using consolidated metadata will regularly see a "past" version - of the metadata, at the time they read the root node with its consolidated - metadata. - -.. _Consolidated Metadata: https://github.com/zarr-developers/zarr-specs/pull/309 diff --git a/docs/user-guide/extending.rst b/docs/user-guide/extending.md similarity index 52% rename from docs/user-guide/extending.rst rename to docs/user-guide/extending.md index 7647703fbb..d950794047 100644 --- a/docs/user-guide/extending.rst +++ b/docs/user-guide/extending.md @@ -1,20 +1,17 @@ - -Extending Zarr -============== +# Extending Zarr Zarr-Python 3 was designed to be extensible. This means that you can extend the library by writing custom classes and plugins. Currently, Zarr can be extended in the following ways: -Custom codecs -------------- +## Custom codecs -.. note:: +!!! note This section explains how custom codecs can be created for Zarr format 3 arrays. For Zarr format 2, codecs should subclass the - `numcodecs.abc.Codec `_ + [numcodecs.abc.Codec](https://numcodecs.readthedocs.io/en/stable/abc.html#numcodecs.abc.Codec) base class and register through - `numcodecs.registry.register_codec `_. + [numcodecs.registry.register_codec](https://numcodecs.readthedocs.io/en/stable/registry.html#numcodecs.registry.register_codec). There are three types of codecs in Zarr: - array-to-array @@ -24,68 +21,65 @@ There are three types of codecs in Zarr: Array-to-array codecs are used to transform the array data before serializing to bytes. Examples include delta encoding or scaling codecs. Array-to-bytes codecs are used for serializing the array data to bytes. In Zarr, the main codec to use for numeric arrays -is the :class:`zarr.codecs.BytesCodec`. Bytes-to-bytes codecs transform the serialized bytestreams +is the `zarr.codecs.BytesCodec`. Bytes-to-bytes codecs transform the serialized bytestreams of the array data. Examples include compression codecs, such as -:class:`zarr.codecs.GzipCodec`, :class:`zarr.codecs.BloscCodec` or -:class:`zarr.codecs.ZstdCodec`, and codecs that add a checksum to the bytestream, such as -:class:`zarr.codecs.Crc32cCodec`. +`zarr.codecs.GzipCodec`, `zarr.codecs.BloscCodec` or +`zarr.codecs.ZstdCodec`, and codecs that add a checksum to the bytestream, such as +`zarr.codecs.Crc32cCodec`. Custom codecs for Zarr are implemented by subclassing the relevant base class, see -:class:`zarr.abc.codec.ArrayArrayCodec`, :class:`zarr.abc.codec.ArrayBytesCodec` and -:class:`zarr.abc.codec.BytesBytesCodec`. Most custom codecs should implemented the -``_encode_single`` and ``_decode_single`` methods. These methods operate on single chunks -of the array data. Alternatively, custom codecs can implement the ``encode`` and ``decode`` +`zarr.abc.codec.ArrayArrayCodec`, `zarr.abc.codec.ArrayBytesCodec` and +`zarr.abc.codec.BytesBytesCodec`. Most custom codecs should implemented the +`_encode_single` and `_decode_single` methods. These methods operate on single chunks +of the array data. Alternatively, custom codecs can implement the `encode` and `decode` methods, which operate on batches of chunks, in case the codec is intended to implement its own batch processing. Custom codecs should also implement the following methods: -- ``compute_encoded_size``, which returns the byte size of the encoded data given the byte - size of the original data. It should raise ``NotImplementedError`` for codecs with +- `compute_encoded_size`, which returns the byte size of the encoded data given the byte + size of the original data. It should raise `NotImplementedError` for codecs with variable-sized outputs, such as compression codecs. -- ``validate`` (optional), which can be used to check that the codec metadata is compatible with the +- `validate` (optional), which can be used to check that the codec metadata is compatible with the array metadata. It should raise errors if not. -- ``resolve_metadata`` (optional), which is important for codecs that change the shape, +- `resolve_metadata` (optional), which is important for codecs that change the shape, dtype or fill value of a chunk. -- ``evolve_from_array_spec`` (optional), which can be useful for automatically filling in +- `evolve_from_array_spec` (optional), which can be useful for automatically filling in codec configuration metadata from the array metadata. To use custom codecs in Zarr, they need to be registered using the -`entrypoint mechanism `_. -Commonly, entrypoints are declared in the ``pyproject.toml`` of your package under the -``[project.entry-points."zarr.codecs"]`` section. Zarr will automatically discover and +[entrypoint mechanism](https://packaging.python.org/en/latest/specifications/entry-points/). +Commonly, entrypoints are declared in the `pyproject.toml` of your package under the +`[project.entry-points."zarr.codecs"]` section. Zarr will automatically discover and load all codecs registered with the entrypoint mechanism from imported modules. -.. code-block:: toml - - [project.entry-points."zarr.codecs"] - "custompackage.fancy_codec" = "custompackage:FancyCodec" +```toml +[project.entry-points."zarr.codecs"] +"custompackage.fancy_codec" = "custompackage:FancyCodec" +``` New codecs need to have their own unique identifier. To avoid naming collisions, it is strongly recommended to prefix the codec identifier with a unique name. For example, -the codecs from ``numcodecs`` are prefixed with ``numcodecs.``, e.g. ``numcodecs.delta``. +the codecs from `numcodecs` are prefixed with `numcodecs.`, e.g. `numcodecs.delta`. -.. note:: +!!! note Note that the extension mechanism for the Zarr format 3 is still under development. Requirements for custom codecs including the choice of codec identifiers might change in the future. It is also possible to register codecs as replacements for existing codecs. This might be useful for providing specialized implementations, such as GPU-based codecs. In case of -multiple codecs, the :mod:`zarr.core.config` mechanism can be used to select the preferred +multiple codecs, the `zarr.core.config` mechanism can be used to select the preferred implementation. -Custom stores -------------- +## Custom stores Coming soon. -Custom array buffers --------------------- +## Custom array buffers Coming soon. -Other extensions ----------------- +## Other extensions In the future, Zarr will support writing custom custom data types and chunk grids. diff --git a/docs/user-guide/gpu.md b/docs/user-guide/gpu.md new file mode 100644 index 0000000000..994667b515 --- /dev/null +++ b/docs/user-guide/gpu.md @@ -0,0 +1,32 @@ +# Using GPUs with Zarr + +Zarr can use GPUs to accelerate your workload by running +`zarr.config.enable_gpu`. + +!!! note + `zarr-python` currently supports reading the ndarray data into device (GPU) + memory as the final stage of the codec pipeline. Data will still be read into + or copied to host (CPU) memory for encoding and decoding. + + In the future, codecs will be available compressing and decompressing data on + the GPU, avoiding the need to move data between the host and device for + compression and decompression. + +## Reading data into device memory + +`zarr.config.enable_gpu` configures Zarr to use GPU memory for the data +buffers used internally by Zarr. + +```python +import zarr +import cupy as cp # doctest: +SKIP +zarr.config.enable_gpu() # doctest: +SKIP +store = zarr.storage.MemoryStore() # doctest: +SKIP +z = zarr.create_array( # doctest: +SKIP + store=store, shape=(100, 100), chunks=(10, 10), dtype="float32", +) +type(z[:10, :10]) # doctest: +SKIP +# cupy.ndarray +``` + +Note that the output type is a `cupy.ndarray` rather than a NumPy array. diff --git a/docs/user-guide/gpu.rst b/docs/user-guide/gpu.rst deleted file mode 100644 index 4d3492f8bd..0000000000 --- a/docs/user-guide/gpu.rst +++ /dev/null @@ -1,37 +0,0 @@ -.. _user-guide-gpu: - -Using GPUs with Zarr -==================== - -Zarr can use GPUs to accelerate your workload by running -:meth:`zarr.config.enable_gpu`. - -.. note:: - - `zarr-python` currently supports reading the ndarray data into device (GPU) - memory as the final stage of the codec pipeline. Data will still be read into - or copied to host (CPU) memory for encoding and decoding. - - In the future, codecs will be available compressing and decompressing data on - the GPU, avoiding the need to move data between the host and device for - compression and decompression. - -Reading data into device memory -------------------------------- - -:meth:`zarr.config.enable_gpu` configures Zarr to use GPU memory for the data -buffers used internally by Zarr. - -.. code-block:: python - - >>> import zarr - >>> import cupy as cp # doctest: +SKIP - >>> zarr.config.enable_gpu() # doctest: +SKIP - >>> store = zarr.storage.MemoryStore() # doctest: +SKIP - >>> z = zarr.create_array( # doctest: +SKIP - ... store=store, shape=(100, 100), chunks=(10, 10), dtype="float32", - ... ) - >>> type(z[:10, :10]) # doctest: +SKIP - cupy.ndarray - -Note that the output type is a ``cupy.ndarray`` rather than a NumPy array. diff --git a/docs/user-guide/groups.md b/docs/user-guide/groups.md new file mode 100644 index 0000000000..0a19679a7f --- /dev/null +++ b/docs/user-guide/groups.md @@ -0,0 +1,172 @@ +# Working with groups + +Zarr supports hierarchical organization of arrays via groups. As with arrays, +groups can be stored in memory, on disk, or via other storage systems that +support a similar interface. + +To create a group, use the `zarr.group` function: + +```python +import zarr +store = zarr.storage.MemoryStore() +root = zarr.create_group(store=store) +root +# +``` + +Groups have a similar API to the Group class from [h5py](https://www.h5py.org/). For example, groups can contain other groups: + +```python +foo = root.create_group('foo') +bar = foo.create_group('bar') +``` + +Groups can also contain arrays, e.g.: + +```python +z1 = bar.create_array(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +z1 +# +``` + +Members of a group can be accessed via the suffix notation, e.g.: + +```python +root['foo'] +# +``` + +The '/' character can be used to access multiple levels of the hierarchy in one +call, e.g.: + +```python +root['foo/bar'] +# +root['foo/bar/baz'] +# +``` + +The `zarr.Group.tree` method can be used to print a tree +representation of the hierarchy, e.g.: + +```python +root.tree() +# / +# └── foo +# └── bar +# └── baz (10000, 10000) int32 +``` + +The `zarr.open_group` function provides a convenient way to create or +re-open a group stored in a directory on the file-system, with sub-groups stored in +sub-directories, e.g.: + +```python +root = zarr.open_group('data/group.zarr', mode='w') +root +# + +z = root.create_array(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +z +# +``` + +For more information on groups see the `zarr.Group` API docs. + +## Batch Group Creation + +You can also create multiple groups concurrently with a single function call. `zarr.create_hierarchy` takes +a `zarr.storage.Store` instance and a dict of `key : metadata` pairs, parses that dict, and +writes metadata documents to storage: + +```python +from zarr import create_hierarchy +from zarr.core.group import GroupMetadata +from zarr.storage import LocalStore +node_spec = {'a/b/c': GroupMetadata()} +nodes_created = dict(create_hierarchy(store=LocalStore(root='data'), nodes=node_spec)) +print(sorted(nodes_created.items(), key=lambda kv: len(kv[0]))) +# [('', ), ('a', ), ('a/b', ), ('a/b/c', )] +``` + +Note that we only specified a single group named `a/b/c`, but 4 groups were created. These additional groups +were created to ensure that the desired node `a/b/c` is connected to the root group `''` by a sequence +of intermediate groups. `zarr.create_hierarchy` normalizes the `nodes` keyword argument to +ensure that the resulting hierarchy is complete, i.e. all groups or arrays are connected to the root +of the hierarchy via intermediate groups. + +Because `zarr.create_hierarchy` concurrently creates metadata documents, it's more efficient +than repeated calls to `create_group` or `create_array`, provided you can statically define +the metadata for the groups and arrays you want to create. + +## Array and group diagnostics + +Diagnostic information about arrays and groups is available via the `info` +property. E.g.: + +```python +store = zarr.storage.MemoryStore() +root = zarr.group(store=store) +foo = root.create_group('foo') +bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64') +bar[:] = 42 +baz = foo.create_array(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='float32') +baz[:] = 4.2 +root.info +# Name : +# Type : Group +# Zarr format : 3 +# Read-only : False +# Store type : MemoryStore +foo.info +# Name : foo +# Type : Group +# Zarr format : 3 +# Read-only : False +# Store type : MemoryStore +bar.info_complete() +# Type : Array +# Zarr format : 3 +# Data type : DataType.int64 +# Fill value : 0 +# Shape : (1000000,) +# Chunk shape : (100000,) +# Order : C +# Read-only : False +# Store type : MemoryStore +# Filters : () +# Serializer : BytesCodec(endian=) +# Compressors : (ZstdCodec(level=0, checksum=False),) +# No. bytes : 8000000 (7.6M) +# No. bytes stored : 1614 +# Storage ratio : 4956.6 +# Chunks Initialized : 10 +baz.info +# Type : Array +# Zarr format : 3 +# Data type : DataType.float32 +# Fill value : 0.0 +# Shape : (1000, 1000) +# Chunk shape : (100, 100) +# Order : C +# Read-only : False +# Store type : MemoryStore +# Filters : () +# Serializer : BytesCodec(endian=) +# Compressors : (ZstdCodec(level=0, checksum=False),) +# No. bytes : 4000000 (3.8M) +``` + +Groups also have the `zarr.Group.tree` method, e.g.: + +```python +root.tree() +# / +# └── foo +# ├── bar (1000000,) int64 +# └── baz (1000, 1000) float32 +``` + +!!! note + `zarr.Group.tree` requires the optional [rich](https://rich.readthedocs.io/en/stable/) + dependency. It can be installed with the `[tree]` extra. diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst deleted file mode 100644 index 99234bad4e..0000000000 --- a/docs/user-guide/groups.rst +++ /dev/null @@ -1,172 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - -.. _user-guide-groups: - -Working with groups -=================== - -Zarr supports hierarchical organization of arrays via groups. As with arrays, -groups can be stored in memory, on disk, or via other storage systems that -support a similar interface. - -To create a group, use the :func:`zarr.group` function:: - - >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> root = zarr.create_group(store=store) - >>> root - - -Groups have a similar API to the Group class from `h5py -`_. For example, groups can contain other groups:: - - >>> foo = root.create_group('foo') - >>> bar = foo.create_group('bar') - -Groups can also contain arrays, e.g.:: - - >>> z1 = bar.create_array(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - >>> z1 - - -Members of a group can be accessed via the suffix notation, e.g.:: - - >>> root['foo'] - - -The '/' character can be used to access multiple levels of the hierarchy in one -call, e.g.:: - - >>> root['foo/bar'] - - >>> root['foo/bar/baz'] - - -The :func:`zarr.Group.tree` method can be used to print a tree -representation of the hierarchy, e.g.:: - - >>> root.tree() - / - └── foo - └── bar - └── baz (10000, 10000) int32 - - -The :func:`zarr.open_group` function provides a convenient way to create or -re-open a group stored in a directory on the file-system, with sub-groups stored in -sub-directories, e.g.:: - - >>> root = zarr.open_group('data/group.zarr', mode='w') - >>> root - - >>> - >>> z = root.create_array(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - >>> z - - -.. TODO: uncomment after __enter__ and __exit__ are implemented -.. Groups can be used as context managers (in a ``with`` statement). -.. If the underlying store has a ``close`` method, it will be called on exit. - -For more information on groups see the :class:`zarr.Group` API docs. - -.. _user-guide-diagnostics: - -Batch Group Creation --------------------- - -You can also create multiple groups concurrently with a single function call. :func:`zarr.create_hierarchy` takes -a :class:`zarr.storage.Store` instance and a dict of ``key : metadata`` pairs, parses that dict, and -writes metadata documents to storage: - - >>> from zarr import create_hierarchy - >>> from zarr.core.group import GroupMetadata - >>> from zarr.storage import LocalStore - >>> node_spec = {'a/b/c': GroupMetadata()} - >>> nodes_created = dict(create_hierarchy(store=LocalStore(root='data'), nodes=node_spec)) - >>> print(sorted(nodes_created.items(), key=lambda kv: len(kv[0]))) - [('', ), ('a', ), ('a/b', ), ('a/b/c', )] - -Note that we only specified a single group named ``a/b/c``, but 4 groups were created. These additional groups -were created to ensure that the desired node ``a/b/c`` is connected to the root group ``''`` by a sequence -of intermediate groups. :func:`zarr.create_hierarchy` normalizes the ``nodes`` keyword argument to -ensure that the resulting hierarchy is complete, i.e. all groups or arrays are connected to the root -of the hierarchy via intermediate groups. - -Because :func:`zarr.create_hierarchy` concurrently creates metadata documents, it's more efficient -than repeated calls to :func:`create_group` or :func:`create_array`, provided you can statically define -the metadata for the groups and arrays you want to create. - -Array and group diagnostics ---------------------------- - -Diagnostic information about arrays and groups is available via the ``info`` -property. E.g.:: - - >>> store = zarr.storage.MemoryStore() - >>> root = zarr.group(store=store) - >>> foo = root.create_group('foo') - >>> bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64') - >>> bar[:] = 42 - >>> baz = foo.create_array(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='float32') - >>> baz[:] = 4.2 - >>> root.info - Name : - Type : Group - Zarr format : 3 - Read-only : False - Store type : MemoryStore - >>> foo.info - Name : foo - Type : Group - Zarr format : 3 - Read-only : False - Store type : MemoryStore - >>> bar.info_complete() - Type : Array - Zarr format : 3 - Data type : DataType.int64 - Fill value : 0 - Shape : (1000000,) - Chunk shape : (100000,) - Order : C - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 8000000 (7.6M) - No. bytes stored : 1614 - Storage ratio : 4956.6 - Chunks Initialized : 10 - >>> baz.info - Type : Array - Zarr format : 3 - Data type : DataType.float32 - Fill value : 0.0 - Shape : (1000, 1000) - Chunk shape : (100, 100) - Order : C - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 4000000 (3.8M) - -Groups also have the :func:`zarr.Group.tree` method, e.g.:: - - >>> root.tree() - / - └── foo - ├── bar (1000000,) int64 - └── baz (1000, 1000) float32 - - -.. note:: - - :func:`zarr.Group.tree` requires the optional `rich `_ - dependency. It can be installed with the ``[tree]`` extra. diff --git a/docs/user-guide/index.rst b/docs/user-guide/index.rst deleted file mode 100644 index c50713332b..0000000000 --- a/docs/user-guide/index.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. _user-guide: - -User guide -========== - -.. toctree:: - :maxdepth: 1 - - installation - arrays - groups - attributes - storage - config - v3_migration - -Advanced Topics ---------------- - -.. toctree:: - :maxdepth: 1 - - performance - consolidated_metadata - extending - gpu - - -.. Coming soon - async diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md new file mode 100644 index 0000000000..486722cd0d --- /dev/null +++ b/docs/user-guide/installation.md @@ -0,0 +1,50 @@ +# Installation + +## Required dependencies + +Required dependencies include: + +- [Python](https://docs.python.org/3/) (3.11 or later) +- [packaging](https://packaging.pypa.io) (22.0 or later) +- [numpy](https://numpy.org) (1.25 or later) +- [numcodecs[crc32c]](https://numcodecs.readthedocs.io) (0.14 or later) +- [typing_extensions](https://typing-extensions.readthedocs.io) (4.9 or later) +- [donfig](https://donfig.readthedocs.io) (0.8 or later) + +## pip + +Zarr is available on [PyPI](https://pypi.org/project/zarr/). Install it using `pip`: + +```console +$ pip install zarr +``` + +There are a number of optional dependency groups you can install for extra functionality. +These can be installed using `pip install "zarr[]"`, e.g. `pip install "zarr[gpu]"` + +- `gpu`: support for GPUs +- `remote`: support for reading/writing to remote data stores + +Additional optional dependencies include `rich`, `universal_pathlib`. These must be installed separately. + +## conda + +Zarr is also published to [conda-forge](https://conda-forge.org). Install it using `conda`: + +```console +$ conda install -c conda-forge zarr +``` + +Conda does not support optional dependencies, so you will have to manually install any packages +needed to enable extra functionality. + +## Dependency support + +Zarr has endorsed [Scientific-Python SPEC 0](https://scientific-python.org/specs/spec-0000/) and now follows the version support window as outlined below: + +- Python: 36 months after initial release +- Core package dependencies (e.g. NumPy): 24 months after initial release + +## Development + +To install the latest development version of Zarr, see the contributing guide. diff --git a/docs/user-guide/installation.rst b/docs/user-guide/installation.rst deleted file mode 100644 index a79f0763cb..0000000000 --- a/docs/user-guide/installation.rst +++ /dev/null @@ -1,54 +0,0 @@ -Installation -============ - -Required dependencies ---------------------- - -Required dependencies include: - -- `Python `_ (3.11 or later) -- `packaging `_ (22.0 or later) -- `numpy `_ (1.25 or later) -- `numcodecs[crc32c] `_ (0.14 or later) -- `typing_extensions `_ (4.9 or later) -- `donfig `_ (0.8 or later) - -pip ---- - -Zarr is available on `PyPI `_. Install it using ``pip``: - -.. code-block:: console - - $ pip install zarr - -There are a number of optional dependency groups you can install for extra functionality. -These can be installed using ``pip install "zarr[]"``, e.g. ``pip install "zarr[gpu]"`` - -- ``gpu``: support for GPUs -- ``remote``: support for reading/writing to remote data stores - -Additional optional dependencies include ``rich``, ``universal_pathlib``. These must be installed separately. - -conda ------ - -Zarr is also published to `conda-forge `_. Install it using ``conda``: - -.. code-block:: console - - $ conda install -c conda-forge zarr - -Conda does not support optional dependencies, so you will have to manually install any packages -needed to enable extra functionality. - -Dependency support ------------------- -Zarr has endorsed `Scientific-Python SPEC 0 `_ and now follows the version support window as outlined below: - -- Python: 36 months after initial release -- Core package dependencies (e.g. NumPy): 24 months after initial release - -Development ------------ -To install the latest development version of Zarr, see the :ref:`contributing guide `. diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md new file mode 100644 index 0000000000..171b04ca08 --- /dev/null +++ b/docs/user-guide/performance.md @@ -0,0 +1,263 @@ +# Optimizing performance + +## Chunk optimizations + +### Chunk size and shape + +In general, chunks of at least 1 megabyte (1M) uncompressed size seem to provide +better performance, at least when using the Blosc compression library. + +The optimal chunk shape will depend on how you want to access the data. E.g., +for a 2-dimensional array, if you only ever take slices along the first +dimension, then chunk across the second dimension. If you know you want to chunk +across an entire dimension you can use the full size of that dimension within the +`chunks` argument, e.g.: + +```python +import zarr +z1 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(100, 10000), dtype='int32') +z1.chunks +# (100, 10000) +``` + +Alternatively, if you only ever take slices along the second dimension, then +chunk across the first dimension, e.g.: + +```python +z2 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 100), dtype='int32') +z2.chunks +# (10000, 100) +``` + +If you require reasonable performance for both access patterns then you need to +find a compromise, e.g.: + +```python +z3 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') +z3.chunks +# (1000, 1000) +``` + +If you are feeling lazy, you can let Zarr guess a chunk shape for your data by +providing `chunks='auto'`, although please note that the algorithm for guessing +a chunk shape is based on simple heuristics and may be far from optimal. E.g.: + +```python +z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32') +z4.chunks +# (625, 625) +``` + +If you know you are always going to be loading the entire array into memory, you +can turn off chunks by providing `chunks` equal to `shape`, in which case there +will be one single chunk for the array: + +```python +z5 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 10000), dtype='int32') +z5.chunks +# (10000, 10000) +``` + +### Sharding + +If you have large arrays but need small chunks to efficiently access the data, you can +use sharding. Sharding provides a mechanism to store multiple chunks in a single +storage object or file. This can be useful because traditional file systems and object +storage systems may have performance issues storing and accessing many files. +Additionally, small files can be inefficient to store if they are smaller than the +block size of the file system. + +Picking a good combination of chunk shape and shard shape is important for performance. +The chunk shape determines what unit of your data can be read independently, while the +shard shape determines what unit of your data can be written efficiently. + +For an example, consider you have a 100 GB array and need to read small chunks of 1 MB. +Without sharding, each chunk would be one file resulting in 100,000 files. That can +already cause performance issues on some file systems. +With sharding, you could use a shard size of 1 GB. This would result in 1000 chunks per +file and 100 files in total, which seems manageable for most storage systems. +You would still be able to read each 1 MB chunk independently, but you would need to +write your data in 1 GB increments. + +To use sharding, you need to specify the `shards` parameter when creating the array. + +```python +z6 = zarr.create_array(store={}, shape=(10000, 10000, 1000), shards=(1000, 1000, 1000), chunks=(100, 100, 100), dtype='uint8') +z6.info +# Type : Array +# Zarr format : 3 +# Data type : DataType.uint8 +# Fill value : 0 +# Shape : (10000, 10000, 1000) +# Shard shape : (1000, 1000, 1000) +# Chunk shape : (100, 100, 100) +# Order : C +# Read-only : False +# Store type : MemoryStore +# Filters : () +# Serializer : BytesCodec(endian=) +# Compressors : (ZstdCodec(level=0, checksum=False),) +# No. bytes : 100000000000 (93.1G) +``` + +### Chunk memory layout + +The order of bytes **within each chunk** of an array can be changed via the +`order` config option, to use either C or Fortran layout. For +multi-dimensional arrays, these two layouts may provide different compression +ratios, depending on the correlation structure within the data. E.g.: + +```python +import numpy as np + +a = np.arange(100000000, dtype='int32').reshape(10000, 10000).T +c = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype, config={'order': 'C'}) +c[:] = a +c.info_complete() +# Type : Array +# Zarr format : 3 +# Data type : DataType.int32 +# Fill value : 0 +# Shape : (10000, 10000) +# Chunk shape : (1000, 1000) +# Order : C +# Read-only : False +# Store type : MemoryStore +# Filters : () +# Serializer : BytesCodec(endian=) +# Compressors : (ZstdCodec(level=0, checksum=False),) +# No. bytes : 400000000 (381.5M) +# No. bytes stored : 342588911 +# Storage ratio : 1.2 +# Chunks Initialized : 100 + +with zarr.config.set({'array.order': 'F'}): + f = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype) + f[:] = a +f.info_complete() +# Type : Array +# Zarr format : 3 +# Data type : DataType.int32 +# Fill value : 0 +# Shape : (10000, 10000) +# Chunk shape : (1000, 1000) +# Order : F +# Read-only : False +# Store type : MemoryStore +# Filters : () +# Serializer : BytesCodec(endian=) +# Compressors : (ZstdCodec(level=0, checksum=False),) +# No. bytes : 400000000 (381.5M) +# No. bytes stored : 342588911 +# Storage ratio : 1.2 +# Chunks Initialized : 100 +``` + +In the above example, Fortran order gives a better compression ratio. This is an +artificial example but illustrates the general point that changing the order of +bytes within chunks of an array may improve the compression ratio, depending on +the structure of the data, the compression algorithm used, and which compression +filters (e.g., byte-shuffle) have been applied. + +### Empty chunks + +It is possible to configure how Zarr handles the storage of chunks that are "empty" +(i.e., every element in the chunk is equal to the array's fill value). When creating +an array with `write_empty_chunks=False`, Zarr will check whether a chunk is empty before compression and storage. If a chunk is empty, +then Zarr does not store it, and instead deletes the chunk from storage +if the chunk had been previously stored. + +This optimization prevents storing redundant objects and can speed up reads, but the cost is +added computation during array writes, since the contents of +each chunk must be compared to the fill value, and these advantages are contingent on the content of the array. +If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above. +In this case, creating an array with `write_empty_chunks=True` (the default) will instruct Zarr to write every chunk without checking for emptiness. + +The following example illustrates the effect of the `write_empty_chunks` flag on +the time required to write an array with different values.: + +```python +import zarr +import numpy as np +import time + +def timed_write(write_empty_chunks): + """ + Measure the time required and number of objects created when writing + to a Zarr array with random ints or fill value. + """ + chunks = (8192,) + shape = (chunks[0] * 1024,) + data = np.random.randint(0, 255, shape) + dtype = 'uint8' + arr = zarr.create_array( + f'data/example-{write_empty_chunks}.zarr', + shape=shape, + chunks=chunks, + dtype=dtype, + fill_value=0, + config={'write_empty_chunks': write_empty_chunks} + ) + # initialize all chunks + arr[:] = 100 + result = [] + for value in (data, arr.fill_value): + start = time.time() + arr[:] = value + elapsed = time.time() - start + result.append((elapsed, arr.nchunks_initialized)) + return result + +# log results +for write_empty_chunks in (True, False): + full, empty = timed_write(write_empty_chunks) + print(f'\nwrite_empty_chunks={write_empty_chunks}:\n\tRandom Data: {full[0]:.4f}s, {full[1]} objects stored\n\t Empty Data: {empty[0]:.4f}s, {empty[1]} objects stored\n') +# write_empty_chunks=True: +# Random Data: ..., 1024 objects stored +# Empty Data: ...s, 1024 objects stored +# +# write_empty_chunks=False: +# Random Data: ...s, 1024 objects stored +# Empty Data: ...s, 0 objects stored +``` + +In this example, writing random data is slightly slower with `write_empty_chunks=True`, +but writing empty data is substantially faster and generates far fewer objects in storage. + +### Changing chunk shapes (rechunking) + +Coming soon. + +## Parallel computing and synchronization + +Coming soon. + +## Pickle support + +Zarr arrays and groups can be pickled, as long as the underlying store object can be +pickled. With the exception of the `zarr.storage.MemoryStore`, any of the +storage classes provided in the `zarr.storage` module can be pickled. + +If an array or group is backed by a persistent store such as the a `zarr.storage.LocalStore`, +`zarr.storage.ZipStore` or `zarr.storage.FsspecStore` then the store data +**are not** pickled. The only thing that is pickled is the necessary parameters to allow the store +to re-open any underlying files or databases upon being unpickled. + +E.g., pickle/unpickle an local store array: + +```python +import pickle +data = np.arange(100000) +z1 = zarr.create_array(store='data/example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype) +z1[:] = data +s = pickle.dumps(z1) +z2 = pickle.loads(s) +z1 == z2 +# True +np.all(z1[:] == z2[:]) +# np.True_ +``` + +## Configuring Blosc + +Coming soon. diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst deleted file mode 100644 index 88329f11b8..0000000000 --- a/docs/user-guide/performance.rst +++ /dev/null @@ -1,278 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - -.. _user-guide-performance: - -Optimizing performance -====================== - -.. _user-guide-chunks: - -Chunk optimizations -------------------- - -.. _user-guide-chunks-shape: - -Chunk size and shape -~~~~~~~~~~~~~~~~~~~~ - -In general, chunks of at least 1 megabyte (1M) uncompressed size seem to provide -better performance, at least when using the Blosc compression library. - -The optimal chunk shape will depend on how you want to access the data. E.g., -for a 2-dimensional array, if you only ever take slices along the first -dimension, then chunk across the second dimension. If you know you want to chunk -across an entire dimension you can use the full size of that dimension within the -``chunks`` argument, e.g.:: - - >>> import zarr - >>> z1 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(100, 10000), dtype='int32') - >>> z1.chunks - (100, 10000) - -Alternatively, if you only ever take slices along the second dimension, then -chunk across the first dimension, e.g.:: - - >>> z2 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 100), dtype='int32') - >>> z2.chunks - (10000, 100) - -If you require reasonable performance for both access patterns then you need to -find a compromise, e.g.:: - - >>> z3 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') - >>> z3.chunks - (1000, 1000) - -If you are feeling lazy, you can let Zarr guess a chunk shape for your data by -providing ``chunks='auto'``, although please note that the algorithm for guessing -a chunk shape is based on simple heuristics and may be far from optimal. E.g.:: - - >>> z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32') - >>> z4.chunks - (625, 625) - -If you know you are always going to be loading the entire array into memory, you -can turn off chunks by providing ``chunks`` equal to ``shape``, in which case there -will be one single chunk for the array:: - - >>> z5 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 10000), dtype='int32') - >>> z5.chunks - (10000, 10000) - - -Sharding -~~~~~~~~ - -If you have large arrays but need small chunks to efficiently access the data, you can -use sharding. Sharding provides a mechanism to store multiple chunks in a single -storage object or file. This can be useful because traditional file systems and object -storage systems may have performance issues storing and accessing many files. -Additionally, small files can be inefficient to store if they are smaller than the -block size of the file system. - -Picking a good combination of chunk shape and shard shape is important for performance. -The chunk shape determines what unit of your data can be read independently, while the -shard shape determines what unit of your data can be written efficiently. - -For an example, consider you have a 100 GB array and need to read small chunks of 1 MB. -Without sharding, each chunk would be one file resulting in 100,000 files. That can -already cause performance issues on some file systems. -With sharding, you could use a shard size of 1 GB. This would result in 1000 chunks per -file and 100 files in total, which seems manageable for most storage systems. -You would still be able to read each 1 MB chunk independently, but you would need to -write your data in 1 GB increments. - -To use sharding, you need to specify the ``shards`` parameter when creating the array. - - >>> z6 = zarr.create_array(store={}, shape=(10000, 10000, 1000), shards=(1000, 1000, 1000), chunks=(100, 100, 100), dtype='uint8') - >>> z6.info - Type : Array - Zarr format : 3 - Data type : DataType.uint8 - Fill value : 0 - Shape : (10000, 10000, 1000) - Shard shape : (1000, 1000, 1000) - Chunk shape : (100, 100, 100) - Order : C - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 100000000000 (93.1G) - -.. _user-guide-chunks-order: - -Chunk memory layout -~~~~~~~~~~~~~~~~~~~ - -The order of bytes **within each chunk** of an array can be changed via the -``order`` config option, to use either C or Fortran layout. For -multi-dimensional arrays, these two layouts may provide different compression -ratios, depending on the correlation structure within the data. E.g.:: - - >>> import numpy as np - >>> - >>> a = np.arange(100000000, dtype='int32').reshape(10000, 10000).T - >>> c = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype, config={'order': 'C'}) - >>> c[:] = a - >>> c.info_complete() - Type : Array - Zarr format : 3 - Data type : DataType.int32 - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : C - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 400000000 (381.5M) - No. bytes stored : 342588911 - Storage ratio : 1.2 - Chunks Initialized : 100 - >>> with zarr.config.set({'array.order': 'F'}): - ... f = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype) - ... f[:] = a - >>> f.info_complete() - Type : Array - Zarr format : 3 - Data type : DataType.int32 - Fill value : 0 - Shape : (10000, 10000) - Chunk shape : (1000, 1000) - Order : F - Read-only : False - Store type : MemoryStore - Filters : () - Serializer : BytesCodec(endian=) - Compressors : (ZstdCodec(level=0, checksum=False),) - No. bytes : 400000000 (381.5M) - No. bytes stored : 342588911 - Storage ratio : 1.2 - Chunks Initialized : 100 - -In the above example, Fortran order gives a better compression ratio. This is an -artificial example but illustrates the general point that changing the order of -bytes within chunks of an array may improve the compression ratio, depending on -the structure of the data, the compression algorithm used, and which compression -filters (e.g., byte-shuffle) have been applied. - -.. _user-guide-chunks-empty-chunks: - -Empty chunks -~~~~~~~~~~~~ - -It is possible to configure how Zarr handles the storage of chunks that are "empty" -(i.e., every element in the chunk is equal to the array's fill value). When creating -an array with ``write_empty_chunks=False``, Zarr will check whether a chunk is empty before compression and storage. If a chunk is empty, -then Zarr does not store it, and instead deletes the chunk from storage -if the chunk had been previously stored. - -This optimization prevents storing redundant objects and can speed up reads, but the cost is -added computation during array writes, since the contents of -each chunk must be compared to the fill value, and these advantages are contingent on the content of the array. -If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above. -In this case, creating an array with ``write_empty_chunks=True`` (the default) will instruct Zarr to write every chunk without checking for emptiness. - -The following example illustrates the effect of the ``write_empty_chunks`` flag on -the time required to write an array with different values.:: - - >>> import zarr - >>> import numpy as np - >>> import time - >>> - >>> def timed_write(write_empty_chunks): - ... """ - ... Measure the time required and number of objects created when writing - ... to a Zarr array with random ints or fill value. - ... """ - ... chunks = (8192,) - ... shape = (chunks[0] * 1024,) - ... data = np.random.randint(0, 255, shape) - ... dtype = 'uint8' - ... arr = zarr.create_array( - ... f'data/example-{write_empty_chunks}.zarr', - ... shape=shape, - ... chunks=chunks, - ... dtype=dtype, - ... fill_value=0, - ... config={'write_empty_chunks': write_empty_chunks} - ... ) - ... # initialize all chunks - ... arr[:] = 100 - ... result = [] - ... for value in (data, arr.fill_value): - ... start = time.time() - ... arr[:] = value - ... elapsed = time.time() - start - ... result.append((elapsed, arr.nchunks_initialized)) - ... return result - ... # log results - >>> for write_empty_chunks in (True, False): - ... full, empty = timed_write(write_empty_chunks) - ... print(f'\nwrite_empty_chunks={write_empty_chunks}:\n\tRandom Data: {full[0]:.4f}s, {full[1]} objects stored\n\t Empty Data: {empty[0]:.4f}s, {empty[1]} objects stored\n') - write_empty_chunks=True: - Random Data: ..., 1024 objects stored - Empty Data: ...s, 1024 objects stored - - write_empty_chunks=False: - Random Data: ...s, 1024 objects stored - Empty Data: ...s, 0 objects stored - - -In this example, writing random data is slightly slower with ``write_empty_chunks=True``, -but writing empty data is substantially faster and generates far fewer objects in storage. - -.. _user-guide-rechunking: - -Changing chunk shapes (rechunking) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Coming soon. - -.. _user-guide-sync: - -Parallel computing and synchronization --------------------------------------- - -Coming soon. - -.. _user-guide-pickle: - -Pickle support --------------- - -Zarr arrays and groups can be pickled, as long as the underlying store object can be -pickled. With the exception of the :class:`zarr.storage.MemoryStore`, any of the -storage classes provided in the :mod:`zarr.storage` module can be pickled. - -If an array or group is backed by a persistent store such as the a :class:`zarr.storage.LocalStore`, -:class:`zarr.storage.ZipStore` or :class:`zarr.storage.FsspecStore` then the store data -**are not** pickled. The only thing that is pickled is the necessary parameters to allow the store -to re-open any underlying files or databases upon being unpickled. - -E.g., pickle/unpickle an local store array:: - - >>> import pickle - >>> data = np.arange(100000) - >>> z1 = zarr.create_array(store='data/example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype) - >>> z1[:] = data - >>> s = pickle.dumps(z1) - >>> z2 = pickle.loads(s) - >>> z1 == z2 - True - >>> np.all(z1[:] == z2[:]) - np.True_ - -.. _user-guide-tips-blosc: - -Configuring Blosc ------------------ - -Coming soon. diff --git a/docs/user-guide/storage.md b/docs/user-guide/storage.md new file mode 100644 index 0000000000..7b9c8ff0a2 --- /dev/null +++ b/docs/user-guide/storage.md @@ -0,0 +1,140 @@ +# Storage guide + +Zarr-Python supports multiple storage backends, including: local file systems, +Zip files, remote stores via [fsspec](https://filesystem-spec.readthedocs.io) (S3, HTTP, etc.), and in-memory stores. In +Zarr-Python 3, stores must implement the abstract store API from +`zarr.abc.store.Store`. + +!!! note + Unlike Zarr-Python 2 where the store interface was built around a generic `MutableMapping` + API, Zarr-Python 3 utilizes a custom store API that utilizes Python's AsyncIO library. + +## Implicit Store Creation + +In most cases, it is not required to create a `Store` object explicitly. Passing a string +to Zarr's top level API will result in the store being created automatically.: + +```python +import zarr + +# Implicitly create a writable LocalStore +zarr.create_group(store='data/foo/bar') +# + +# Implicitly create a read-only FsspecStore +zarr.open_group( + store='s3://noaa-nwm-retro-v2-zarr-pds', + mode='r', + storage_options={'anon': True} +) +# > + +# Implicitly creates a MemoryStore +data = {} +zarr.create_group(store=data) +# +``` + +## Explicit Store Creation + +In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four +built-in store: `zarr.storage.LocalStore`, `zarr.storage.FsspecStore`, +`zarr.storage.ZipStore`, `zarr.storage.MemoryStore`, and `zarr.storage.ObjectStore`. + +### Local Store + +The `zarr.storage.LocalStore` stores data in a nested set of directories on a local +filesystem.: + +```python +store = zarr.storage.LocalStore('data/foo/bar', read_only=True) +zarr.open_group(store=store, mode='r') +# +``` + +### Zip Store + +The `zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single +Zip file. The [Zip Store specification](https://github.com/zarr-developers/zarr-specs/pull/311) is currently in draft form.: + +```python +store = zarr.storage.ZipStore('data.zip', mode='w') +zarr.create_array(store=store, shape=(2,), dtype='float64') +# +``` + +### Remote Store + +The `zarr.storage.FsspecStore` stores the contents of a Zarr hierarchy in following the same +logical layout as the `LocalStore`, except the store is assumed to be on a remote storage system +such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The +`zarr.storage.FsspecStore` is backed by [fsspec](https://filesystem-spec.readthedocs.io) and can support any backend +that implements the [AbstractFileSystem](https://filesystem-spec.readthedocs.io/en/stable/api.html#fsspec.spec.AbstractFileSystem) +API. `storage_options` can be used to configure the fsspec backend.: + +```python +store = zarr.storage.FsspecStore.from_url( + 's3://noaa-nwm-retro-v2-zarr-pds', + read_only=True, + storage_options={'anon': True} +) +zarr.open_group(store=store, mode='r') +# > +``` + +### Memory Store + +The `zarr.storage.MemoryStore` a in-memory store that allows for serialization of +Zarr data (metadata and chunks) to a dictionary.: + +```python +data = {} +store = zarr.storage.MemoryStore(data) +# TODO: replace with create_array after #2463 +zarr.create_array(store=store, shape=(2,), dtype='float64') +# +``` + +### Object Store + +`zarr.storage.ObjectStore` stores the contents of the Zarr hierarchy using any ObjectStore +[storage implementation](https://developmentseed.org/obstore/latest/api/store/), including AWS S3 (`obstore.store.S3Store`), Google Cloud Storage (`obstore.store.GCSStore`), and Azure Blob Storage (`obstore.store.AzureStore`). This store is backed by [obstore](https://developmentseed.org/obstore/latest/), which +builds on the production quality Rust library [object_store](https://docs.rs/object_store/latest/object_store/). + +```python +from zarr.storage import ObjectStore +from obstore.store import MemoryStore + +store = ObjectStore(MemoryStore()) +zarr.create_array(store=store, shape=(2,), dtype='float64') +# +``` + +Here's an example of using ObjectStore for accessing remote data: + +```python +from zarr.storage import ObjectStore +from obstore.store import S3Store + +s3_store = S3Store('noaa-nwm-retro-v2-zarr-pds', skip_signature=True, region="us-west-2") +store = zarr.storage.ObjectStore(store=s3_store, read_only=True) +group = zarr.open_group(store=store, mode='r') +group.info +# Name : +# Type : Group +# Zarr format : 2 +# Read-only : True +# Store type : ObjectStore +# No. members : 12 +# No. arrays : 12 +# No. groups : 0 +``` + +!!! warning + The `zarr.storage.ObjectStore` class is experimental. + +## Developing custom stores + +Zarr-Python `zarr.abc.store.Store` API is meant to be extended. The Store Abstract Base +Class includes all of the methods needed to be a fully operational store in Zarr Python. +Zarr also provides a test harness for custom stores: `zarr.testing.store.StoreTests`. diff --git a/docs/user-guide/storage.rst b/docs/user-guide/storage.rst deleted file mode 100644 index 4215cbaf20..0000000000 --- a/docs/user-guide/storage.rst +++ /dev/null @@ -1,148 +0,0 @@ -.. only:: doctest - - >>> import shutil - >>> shutil.rmtree('data', ignore_errors=True) - -.. _user-guide-storage: - -Storage guide -============= - -Zarr-Python supports multiple storage backends, including: local file systems, -Zip files, remote stores via fsspec_ (S3, HTTP, etc.), and in-memory stores. In -Zarr-Python 3, stores must implement the abstract store API from -:class:`zarr.abc.store.Store`. - -.. note:: - Unlike Zarr-Python 2 where the store interface was built around a generic ``MutableMapping`` - API, Zarr-Python 3 utilizes a custom store API that utilizes Python's AsyncIO library. - -Implicit Store Creation ------------------------ - -In most cases, it is not required to create a ``Store`` object explicitly. Passing a string -to Zarr's top level API will result in the store being created automatically.: - - >>> import zarr - >>> - >>> # Implicitly create a writable LocalStore - >>> zarr.create_group(store='data/foo/bar') - - >>> - >>> # Implicitly create a read-only FsspecStore - >>> zarr.open_group( - ... store='s3://noaa-nwm-retro-v2-zarr-pds', - ... mode='r', - ... storage_options={'anon': True} - ... ) - > - >>> - >>> # Implicitly creates a MemoryStore - >>> data = {} - >>> zarr.create_group(store=data) - - -Explicit Store Creation ------------------------ - -In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four -built-in store: :class:`zarr.storage.LocalStore`, :class:`zarr.storage.FsspecStore`, -:class:`zarr.storage.ZipStore`, :class:`zarr.storage.MemoryStore`, and :class:`zarr.storage.ObjectStore`. - -Local Store -~~~~~~~~~~~ - -The :class:`zarr.storage.LocalStore` stores data in a nested set of directories on a local -filesystem.: - - >>> store = zarr.storage.LocalStore('data/foo/bar', read_only=True) - >>> zarr.open_group(store=store, mode='r') - - -Zip Store -~~~~~~~~~ - -The :class:`zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single -Zip file. The `Zip Store specification`_ is currently in draft form.: - - >>> store = zarr.storage.ZipStore('data.zip', mode='w') - >>> zarr.create_array(store=store, shape=(2,), dtype='float64') - - -Remote Store -~~~~~~~~~~~~ - -The :class:`zarr.storage.FsspecStore` stores the contents of a Zarr hierarchy in following the same -logical layout as the ``LocalStore``, except the store is assumed to be on a remote storage system -such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The -:class:`zarr.storage.FsspecStore` is backed by `fsspec`_ and can support any backend -that implements the `AbstractFileSystem `_ -API. ``storage_options`` can be used to configure the fsspec backend.: - - >>> store = zarr.storage.FsspecStore.from_url( - ... 's3://noaa-nwm-retro-v2-zarr-pds', - ... read_only=True, - ... storage_options={'anon': True} - ... ) - >>> zarr.open_group(store=store, mode='r') - > - -Memory Store -~~~~~~~~~~~~ - -The :class:`zarr.storage.MemoryStore` a in-memory store that allows for serialization of -Zarr data (metadata and chunks) to a dictionary.: - - >>> data = {} - >>> store = zarr.storage.MemoryStore(data) - >>> # TODO: replace with create_array after #2463 - >>> zarr.create_array(store=store, shape=(2,), dtype='float64') - - -Object Store -~~~~~~~~~~~~ - -:class:`zarr.storage.ObjectStore` stores the contents of the Zarr hierarchy using any ObjectStore -`storage implementation `_, including AWS S3 (:class:`obstore.store.S3Store`), Google Cloud Storage (:class:`obstore.store.GCSStore`), and Azure Blob Storage (:class:`obstore.store.AzureStore`). This store is backed by `obstore `_, which -builds on the production quality Rust library `object_store `_. - - - >>> from zarr.storage import ObjectStore - >>> from obstore.store import MemoryStore - >>> - >>> store = ObjectStore(MemoryStore()) - >>> zarr.create_array(store=store, shape=(2,), dtype='float64') - - -Here's an example of using ObjectStore for accessing remote data: - - >>> from zarr.storage import ObjectStore - >>> from obstore.store import S3Store - >>> - >>> s3_store = S3Store('noaa-nwm-retro-v2-zarr-pds', skip_signature=True, region="us-west-2") - >>> store = zarr.storage.ObjectStore(store=s3_store, read_only=True) - >>> group = zarr.open_group(store=store, mode='r') - >>> group.info - Name : - Type : Group - Zarr format : 2 - Read-only : True - Store type : ObjectStore - No. members : 12 - No. arrays : 12 - No. groups : 0 - -.. warning:: - The :class:`zarr.storage.ObjectStore` class is experimental. - -.. _user-guide-custom-stores: - -Developing custom stores ------------------------- - -Zarr-Python :class:`zarr.abc.store.Store` API is meant to be extended. The Store Abstract Base -Class includes all of the methods needed to be a fully operational store in Zarr Python. -Zarr also provides a test harness for custom stores: :class:`zarr.testing.store.StoreTests`. - -.. _Zip Store Specification: https://github.com/zarr-developers/zarr-specs/pull/311 -.. _fsspec: https://filesystem-spec.readthedocs.io diff --git a/docs/user-guide/v3_migration.md b/docs/user-guide/v3_migration.md new file mode 100644 index 0000000000..9f98b95f22 --- /dev/null +++ b/docs/user-guide/v3_migration.md @@ -0,0 +1,220 @@ +# 3.0 Migration Guide + +Zarr-Python 3 represents a major refactor of the Zarr-Python codebase. Some of the +goals motivating this refactor included: + +* adding support for the Zarr format 3 specification (along with the Zarr format 2 specification) +* cleaning up internal and user facing APIs +* improving performance (particularly in high latency storage environments like + cloud object stores) + +To accommodate this, Zarr-Python 3 introduces a number of changes to the API, including a number +of significant breaking changes and deprecations. + +This page provides a guide explaining breaking changes and deprecations to help you +migrate your code from version 2 to version 3. If we have missed anything, please +open a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) +so we can improve this guide. + +## Compatibility target + +The goals described above necessitated some breaking changes to the API (hence the +major version update), but where possible we have maintained backwards compatibility +in the most widely used parts of the API. This in the `zarr.Array` and +`zarr.Group` classes and the "top-level API" (e.g. `zarr.open_array` and +`zarr.open_group`). + +## Getting ready for 3.0 + +Before migrating to Zarr-Python 3, we suggest projects that depend on Zarr-Python take +the following actions in order: + +1. Pin the supported Zarr-Python version to `zarr>=2,<3`. This is a best practice + and will protect your users from any incompatibilities that may arise during the + release of Zarr-Python 3. This pin can be removed after migrating to Zarr-Python 3. +2. Limit your imports from the Zarr-Python package. Most of the primary API `zarr.*` + will be compatible in Zarr-Python 3. However, the following breaking API changes are + planned: + + - `numcodecs.*` will no longer be available in `zarr.*`. To migrate, import codecs + directly from `numcodecs`: + + ```python + from numcodecs import Blosc + # instead of: + # from zarr import Blosc + ``` + + - The `zarr.v3_api_available` feature flag is being removed. In Zarr-Python 3 + the v3 API is always available, so you shouldn't need to use this flag. + - The following internal modules are being removed or significantly changed. If + your application relies on imports from any of the below modules, you will need + to either a) modify your application to no longer rely on these imports or b) + vendor the parts of the specific modules that you need. + + * `zarr.attrs` has gone, with no replacement + * `zarr.codecs` has gone, use `numcodecs` instead + * `zarr.context` has gone, with no replacement + * `zarr.core` remains but should be considered private API + * `zarr.hierarchy` has gone, with no replacement (use `zarr.Group` inplace of `zarr.hierarchy.Group`) + * `zarr.indexing` has gone, with no replacement + * `zarr.meta` has gone, with no replacement + * `zarr.meta_v1` has gone, with no replacement + * `zarr.sync` has gone, with no replacement + * `zarr.types` has gone, with no replacement + * `zarr.util` has gone, with no replacement + * `zarr.n5` has gone, see below for an alternative N5 options + +3. Test that your package works with version 3. +4. Update the pin to include `zarr>=3,<4`. + +## Zarr-Python 2 support window + +Zarr-Python 2.x is still available, though we recommend migrating to Zarr-Python 3 for +its performance improvements and new features. Security and bug fixes will be made to +the 2.x series for at least six months following the first Zarr-Python 3 release. +If you need to use the latest Zarr-Python 2 release, you can install it with: + +```console +$ pip install "zarr==2.*" +``` + +!!! note + Development and maintenance of the 2.x release series has moved to the + [support/v2](https://github.com/zarr-developers/zarr-python/tree/support/v2) branch. + Issues and pull requests related to this branch are tagged with the + [V2](https://github.com/zarr-developers/zarr-python/labels/V2) label. + +## Migrating to Zarr-Python 3 + +The following sections provide details on breaking changes in Zarr-Python 3. + +### The Array class + +1. Disallow direct construction - the signature for initializing the `Array` class has changed + significantly. Please use `zarr.create_array` or `zarr.open_array` instead of + directly constructing the `zarr.Array` class. + +2. Defaulting to `zarr_format=3` - newly created arrays will use the version 3 of the + Zarr specification. To continue using version 2, set `zarr_format=2` when creating arrays + or set `default_zarr_version=2` in Zarr's runtime configuration. + +### The Group class + +1. Disallow direct construction - use `zarr.open_group` or `zarr.create_group` + instead of directly constructing the `zarr.Group` class. +2. Most of the h5py compatibility methods are deprecated and will issue warnings if used. + The following functions are drop in replacements that have the same signature and functionality: + + - Use `zarr.Group.create_array` in place of `zarr.Group.create_dataset` + - Use `zarr.Group.require_array` in place of `zarr.Group.require_dataset` +3. Disallow "." syntax for getting group members. To get a member of a group named `foo`, + use `group["foo"]` in place of `group.foo`. + +### The Store class + +The Store API has changed significant in Zarr-Python 3. The most notable changes to the +Store API are: + +#### Store Import Paths + +Several store implementations have moved from the top-level module to `zarr.storage`: + +```diff title="Store import changes from v2 to v3" +# Before (v2) +- from zarr import MemoryStore, DirectoryStore ++ from zarr.storage import MemoryStore, LocalStore # LocalStore replaces DirectoryStore +``` + +Common replacements: + +| v2 Import | v3 Import | +|=========================|====================================| +| `zarr.MemoryStore` | `zarr.storage.MemoryStore` | +| `zarr.DirectoryStore` | `zarr.storage.LocalStore` | +| `zarr.TempStore` | Use `tempfile.TemporaryDirectory` | +| | with `LocalStore` | + +1. Replaced the `MutableMapping` base class in favor of a custom abstract base class + (`zarr.abc.store.Store`). +2. Switched to an asynchronous interface for all store methods that result in IO. This + change ensures that all store methods are non-blocking and are as performant as + possible. + +Beyond the changes store interface, a number of deprecated stores were also removed in +Zarr-Python 3. See issue #1274 for more details on the removal of these stores. + +- `N5Store` - see https://github.com/zarr-developers/n5py for an alternative interface to + N5 formatted data. +- `ABSStore` - use the `zarr.storage.FsspecStore` instead along with fsspec's + [adlfs backend](https://github.com/fsspec/adlfs). + +The following stores have been removed altogether. Users who need these stores will have to +implement their own version in zarr-python v3. + +- `DBMStore` +- `LMDBStore` +- `SQLiteStore` +- `MongoDBStore` +- `RedisStore` + +At present, the latter five stores in this list do not have an equivalent in Zarr-Python 3. +If you are interested in developing a custom store that targets these backends, see +developing custom stores or open an +[issue](https://github.com/zarr-developers/zarr-python/issues) to discuss your use case. + +### Dependencies + +When installing using `pip`: + +- The new `remote` dependency group can be used to install a supported version of + `fsspec`, required for remote data access. +- The new `gpu` dependency group can be used to install a supported version of + `cuda`, required for GPU functionality. +- The `jupyter` optional dependency group has been removed, since v3 contains no + jupyter specific functionality. + +### Miscellaneous + +- The keyword argument `zarr_version` available in most creation functions in `zarr` + (e.g. `zarr.create`, `zarr.open`, `zarr.group`, `zarr.array`) has + been deprecated in favor of `zarr_format`. + +## 🚧 Work in Progress 🚧 + +Zarr-Python 3 is still under active development, and is not yet fully complete. +The following list summarizes areas of the codebase that we expect to build out +after the 3.0.0 release. If features listed below are important to your use case +of Zarr-Python, please open (or comment on) a +[GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new). + +- The following functions / methods have not been ported to Zarr-Python 3 yet: + + * `zarr.copy` (issue #2407) + * `zarr.copy_all` (issue #2407) + * `zarr.copy_store` (issue #2407) + * `zarr.Group.move` (issue #2108) + +- The following features (corresponding to function arguments to functions in + `zarr`) have not been ported to Zarr-Python 3 yet. Using these features + will raise a warning or a `NotImplementedError`: + + * `cache_attrs` + * `cache_metadata` + * `chunk_store` (issue #2495) + * `meta_array` + * `object_codec` (issue #2617) + * `synchronizer` (issue #1596) + * `dimension_separator` + +- The following features that were supported by Zarr-Python 2 have not been ported + to Zarr-Python 3 yet: + + * Structured arrays / dtypes (issue #2134) + * Fixed-length string dtypes (issue #2347) + * Datetime and timedelta dtypes (issue #2616) + * Object dtypes (issue #2617) + * Ragged arrays (issue #2618) + * Groups and Arrays do not implement `__enter__` and `__exit__` protocols (issue #2619) + * Big Endian dtypes (issue #2324) + * Default filters for object dtypes for Zarr format 2 arrays (issue #2627) diff --git a/docs/user-guide/v3_migration.rst b/docs/user-guide/v3_migration.rst deleted file mode 100644 index a6258534e4..0000000000 --- a/docs/user-guide/v3_migration.rst +++ /dev/null @@ -1,238 +0,0 @@ -.. _v3 migration guide: - -3.0 Migration Guide -=================== - -Zarr-Python 3 represents a major refactor of the Zarr-Python codebase. Some of the -goals motivating this refactor included: - -* adding support for the Zarr format 3 specification (along with the Zarr format 2 specification) -* cleaning up internal and user facing APIs -* improving performance (particularly in high latency storage environments like - cloud object stores) - -To accommodate this, Zarr-Python 3 introduces a number of changes to the API, including a number -of significant breaking changes and deprecations. - -This page provides a guide explaining breaking changes and deprecations to help you -migrate your code from version 2 to version 3. If we have missed anything, please -open a `GitHub issue `_ -so we can improve this guide. - -Compatibility target --------------------- - -The goals described above necessitated some breaking changes to the API (hence the -major version update), but where possible we have maintained backwards compatibility -in the most widely used parts of the API. This in the :class:`zarr.Array` and -:class:`zarr.Group` classes and the "top-level API" (e.g. :func:`zarr.open_array` and -:func:`zarr.open_group`). - -Getting ready for 3.0 ---------------------- - -Before migrating to Zarr-Python 3, we suggest projects that depend on Zarr-Python take -the following actions in order: - -1. Pin the supported Zarr-Python version to ``zarr>=2,<3``. This is a best practice - and will protect your users from any incompatibilities that may arise during the - release of Zarr-Python 3. This pin can be removed after migrating to Zarr-Python 3. -2. Limit your imports from the Zarr-Python package. Most of the primary API ``zarr.*`` - will be compatible in Zarr-Python 3. However, the following breaking API changes are - planned: - - - ``numcodecs.*`` will no longer be available in ``zarr.*``. To migrate, import codecs - directly from ``numcodecs``: - - .. code-block:: python - - from numcodecs import Blosc - # instead of: - # from zarr import Blosc - - - The ``zarr.v3_api_available`` feature flag is being removed. In Zarr-Python 3 - the v3 API is always available, so you shouldn't need to use this flag. - - The following internal modules are being removed or significantly changed. If - your application relies on imports from any of the below modules, you will need - to either a) modify your application to no longer rely on these imports or b) - vendor the parts of the specific modules that you need. - - * ``zarr.attrs`` has gone, with no replacement - * ``zarr.codecs`` has gone, use ``numcodecs`` instead - * ``zarr.context`` has gone, with no replacement - * ``zarr.core`` remains but should be considered private API - * ``zarr.hierarchy`` has gone, with no replacement (use ``zarr.Group`` inplace of ``zarr.hierarchy.Group``) - * ``zarr.indexing`` has gone, with no replacement - * ``zarr.meta`` has gone, with no replacement - * ``zarr.meta_v1`` has gone, with no replacement - * ``zarr.sync`` has gone, with no replacement - * ``zarr.types`` has gone, with no replacement - * ``zarr.util`` has gone, with no replacement - * ``zarr.n5`` has gone, see below for an alternative N5 options - -3. Test that your package works with version 3. -4. Update the pin to include ``zarr>=3,<4``. - -Zarr-Python 2 support window ----------------------------- - -Zarr-Python 2.x is still available, though we recommend migrating to Zarr-Python 3 for -its performance improvements and new features. Security and bug fixes will be made to -the 2.x series for at least six months following the first Zarr-Python 3 release. -If you need to use the latest Zarr-Python 2 release, you can install it with: - -.. code-block:: console - - $ pip install "zarr==2.*" - -.. note:: - Development and maintenance of the 2.x release series has moved to the - `support/v2 `_ branch. - Issues and pull requests related to this branch are tagged with the - `V2 `_ label. - -Migrating to Zarr-Python 3 --------------------------- - -The following sections provide details on breaking changes in Zarr-Python 3. - -The Array class -~~~~~~~~~~~~~~~ - -1. Disallow direct construction - the signature for initializing the ``Array`` class has changed - significantly. Please use :func:`zarr.create_array` or :func:`zarr.open_array` instead of - directly constructing the :class:`zarr.Array` class. - -2. Defaulting to ``zarr_format=3`` - newly created arrays will use the version 3 of the - Zarr specification. To continue using version 2, set ``zarr_format=2`` when creating arrays - or set ``default_zarr_version=2`` in Zarr's :ref:`runtime configuration `. - -The Group class -~~~~~~~~~~~~~~~ - -1. Disallow direct construction - use :func:`zarr.open_group` or :func:`zarr.create_group` - instead of directly constructing the :class:`zarr.Group` class. -2. Most of the h5py compatibility methods are deprecated and will issue warnings if used. - The following functions are drop in replacements that have the same signature and functionality: - - - Use :func:`zarr.Group.create_array` in place of :func:`zarr.Group.create_dataset` - - Use :func:`zarr.Group.require_array` in place of :func:`zarr.Group.require_dataset` -3. Disallow "." syntax for getting group members. To get a member of a group named ``foo``, - use ``group["foo"]`` in place of ``group.foo``. - -The Store class -~~~~~~~~~~~~~~~ - -The Store API has changed significant in Zarr-Python 3. The most notable changes to the -Store API are: - -Store Import Paths -^^^^^^^^^^^^^^^^^^ -Several store implementations have moved from the top-level module to ``zarr.storage``: - -.. code-block:: diff - :caption: Store import changes from v2 to v3 - - # Before (v2) - - from zarr import MemoryStore, DirectoryStore - + from zarr.storage import MemoryStore, LocalStore # LocalStore replaces DirectoryStore - -Common replacements: - -+-------------------------+------------------------------------+ -| v2 Import | v3 Import | -+=========================+====================================+ -| ``zarr.MemoryStore`` | ``zarr.storage.MemoryStore`` | -+-------------------------+------------------------------------+ -| ``zarr.DirectoryStore`` | ``zarr.storage.LocalStore`` | -+-------------------------+------------------------------------+ -| ``zarr.TempStore`` | Use ``tempfile.TemporaryDirectory``| -| | with ``LocalStore`` | -+-------------------------+------------------------------------+ - -1. Replaced the ``MutableMapping`` base class in favor of a custom abstract base class - (:class:`zarr.abc.store.Store`). -2. Switched to an asynchronous interface for all store methods that result in IO. This - change ensures that all store methods are non-blocking and are as performant as - possible. - -Beyond the changes store interface, a number of deprecated stores were also removed in -Zarr-Python 3. See :issue:`1274` for more details on the removal of these stores. - -- ``N5Store`` - see https://github.com/zarr-developers/n5py for an alternative interface to - N5 formatted data. -- ``ABSStore`` - use the :class:`zarr.storage.FsspecStore` instead along with fsspec's - `adlfs backend `_. - -The following stores have been removed altogether. Users who need these stores will have to -implement their own version in zarr-python v3. - -- ``DBMStore`` -- ``LMDBStore`` -- ``SQLiteStore`` -- ``MongoDBStore`` -- ``RedisStore`` - -At present, the latter five stores in this list do not have an equivalent in Zarr-Python 3. -If you are interested in developing a custom store that targets these backends, see -:ref:`developing custom stores ` or open an -`issue `_ to discuss your use case. - -Dependencies -~~~~~~~~~~~~ - -When installing using ``pip``: - -- The new ``remote`` dependency group can be used to install a supported version of - ``fsspec``, required for remote data access. -- The new ``gpu`` dependency group can be used to install a supported version of - ``cuda``, required for GPU functionality. -- The ``jupyter`` optional dependency group has been removed, since v3 contains no - jupyter specific functionality. - -Miscellaneous -~~~~~~~~~~~~~ - -- The keyword argument ``zarr_version`` available in most creation functions in :mod:`zarr` - (e.g. :func:`zarr.create`, :func:`zarr.open`, :func:`zarr.group`, :func:`zarr.array`) has - been deprecated in favor of ``zarr_format``. - -🚧 Work in Progress 🚧 ----------------------- - -Zarr-Python 3 is still under active development, and is not yet fully complete. -The following list summarizes areas of the codebase that we expect to build out -after the 3.0.0 release. If features listed below are important to your use case -of Zarr-Python, please open (or comment on) a -`GitHub issue `_. - -- The following functions / methods have not been ported to Zarr-Python 3 yet: - - * :func:`zarr.copy` (:issue:`2407`) - * :func:`zarr.copy_all` (:issue:`2407`) - * :func:`zarr.copy_store` (:issue:`2407`) - * :func:`zarr.Group.move` (:issue:`2108`) - -- The following features (corresponding to function arguments to functions in - :mod:`zarr`) have not been ported to Zarr-Python 3 yet. Using these features - will raise a warning or a ``NotImplementedError``: - - * ``cache_attrs`` - * ``cache_metadata`` - * ``chunk_store`` (:issue:`2495`) - * ``meta_array`` - * ``object_codec`` (:issue:`2617`) - * ``synchronizer`` (:issue:`1596`) - * ``dimension_separator`` - -- The following features that were supported by Zarr-Python 2 have not been ported - to Zarr-Python 3 yet: - - * Structured arrays / dtypes (:issue:`2134`) - * Fixed-length string dtypes (:issue:`2347`) - * Datetime and timedelta dtypes (:issue:`2616`) - * Object dtypes (:issue:`2617`) - * Ragged arrays (:issue:`2618`) - * Groups and Arrays do not implement ``__enter__`` and ``__exit__`` protocols (:issue:`2619`) - * Big Endian dtypes (:issue:`2324`) - * Default filters for object dtypes for Zarr format 2 arrays (:issue:`2627`) diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000000..4ae5155ce9 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,154 @@ +# Based on https://github.com/developmentseed/obspec/blob/main/mkdocs.yml +site_name: zarr-python +repo_name: zarr-developers/zarr-python +repo_url: https://github.com/zarr-developers/zarr-python +site_description: An implementation of chunked, compressed, N-dimensional arrays for Python. +site_author: Alistair Miles +site_url: https://zarr.readthedocs.io/ +docs_dir: docs + +extra: + version: + alias: true + provider: mike + +nav: + - "index.md" + - User Guide: + - user-guide/installation.md + - user-guide/arrays.md + - user-guide/groups.md + - user-guide/attributes.md + - user-guide/storage.md + - user-guide/config.md + - user-guide/v3_migration.md + - user-guide/performance.md + - user-guide/extending.md + - user-guide/gpu.md + - user-guide/consolidated_metadata.md + - API Reference: + - api/array.md + - api/group.md + - api/create.md + - api/open.md + - api/load.md + - api/save.md + - api/convenience.md + - api/config.md + - api/codecs.md + - api/errors.md + - api/registry.md + - api/storage.md + - api/testing.md + - API: + - api/api_async.md + - api/api_sync.md + - ABC: + - api/abc/codec.md + - api/abc/metadata.md + - api/abc/store.md + - deprecated: + - api/deprecated/convenience.md + - api/deprecated/creation.md + - release-notes.md + - contributing.md +watch: + - src/zarr + - docs + +theme: + language: en + name: material + custom_dir: docs/overrides + logo: _static/logo_horizontal.svg + palette: + # Palette toggle for automatic mode + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + primary: blue grey + accent: pink + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: grey + accent: pink + toggle: + icon: material/brightness-4 + name: Switch to system preference + + font: + text: Roboto + code: Roboto Mono + + features: + - content.code.annotate + - content.code.copy + - navigation.indexes + - navigation.instant + - navigation.tracking + - search.suggest + - search.share + +extra_css: + - overrides/stylesheets/extra.css + +plugins: + - search + - mkdocstrings: + enable_inventory: true + handlers: + python: + paths: [src/zarr] + options: + # We set allow_inspection: false to ensure that all docstrings come + # from the pyi files, not the Rust-facing doc comments. + allow_inspection: false + docstring_section_style: list + docstring_style: numpy + line_length: 60 + separate_signature: true + show_root_heading: true + show_signature_annotations: true + show_source: false + show_symbol_type_toc: true + signature_crossrefs: true + + inventories: + - https://docs.python.org/3/objects.inv + +# https://github.com/developmentseed/titiler/blob/50934c929cca2fa8d3c408d239015f8da429c6a8/docs/mkdocs.yml#L115-L140 +markdown_extensions: + - admonition + - attr_list + - codehilite: + guess_lang: false + - def_list + - footnotes + - md_in_html + - pymdownx.arithmatex + - pymdownx.betterem + - pymdownx.caret: + insert: false + - pymdownx.details + - pymdownx.escapeall: + hardbreak: true + nbsp: true + - pymdownx.magiclink: + hide_protocol: true + repo_url_shortener: true + - pymdownx.smartsymbols + - pymdownx.superfences + - pymdownx.tasklist: + custom_checkbox: true + - pymdownx.tilde + - toc: + permalink: true diff --git a/pyproject.toml b/pyproject.toml index 1f270b435f..256a84c03d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,15 +90,12 @@ remote_tests = [ optional = ["rich", "universal-pathlib"] docs = [ # Doc building - 'sphinx==8.1.3', - 'sphinx-autobuild>=2021.3.14', - 'sphinx-autoapi==3.4.0', - 'sphinx_design', - 'sphinx-issues', - 'sphinx-copybutton', - 'sphinx-reredirects', - 'pydata-sphinx-theme', - 'numpydoc', + "mkdocs-material[imaging]>=9.6.14", + "mkdocs>=1.6.1", + "mkdocstrings>=0.29.1", + "mkdocstrings-python>=1.16.10", + "mike>=2.1.3", + "markdown-exec[ansi]", # Changelog generation 'towncrier', # Optional dependencies to run examples @@ -200,8 +197,9 @@ list-env = "pip list" features = ['docs'] [tool.hatch.envs.docs.scripts] -build = "cd docs && make html" -serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0" +serve = "mkdocs serve" +build = "mkdocs build" +readthedocs = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r site $READTHEDOCS_OUTPUT/html" [tool.hatch.envs.upstream] python = "3.13" @@ -443,9 +441,9 @@ checks = [ [tool.towncrier] directory = 'changes' -filename = "docs/release-notes.rst" -underlines = ["-", "~", "^"] -issue_format = ":issue:`{issue}`" +filename = "docs/release-notes.md" +underlines = ["", "", ""] +issue_format = "[#{issue}](https://github.com/zarr-developers/zarr-python/issues{issue})" [tool.codespell] ignore-words-list = "astroid" From 2d8650bfc7e21c7c2d6981a9499e6707935c0c55 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:26:48 -0400 Subject: [PATCH 02/64] Update towncrier instructions --- docs/contributing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing.md b/docs/contributing.md index b53bffe8a0..8342fd25b2 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -233,7 +233,7 @@ If an existing Zarr format version changes, or a new version of the Zarr format #### Pre-release 1. Make sure that all pull requests which will be included in the release have been properly documented as changelog files in `changes`. -2. Run `towncrier build --version x.y.z` to create the changelog. +2. Run `hatch env run --env docs -- towncrier build --version x.y.z` to create the changelog. #### Releasing From 1eb4292a4a0ba526b8b39cfaba1f2a677ef097f9 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:39:26 -0400 Subject: [PATCH 03/64] Update readthedocs config --- .readthedocs.yaml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 05239456a0..76e6b30d1e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,12 +7,19 @@ build: jobs: pre_build: - | - if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; - then - towncrier build --version Unreleased --yes; + if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; then + hatch env run --env docs -- towncrier build --version Unreleased --yes fi - commands: - - mamba install -c conda-forge -c nodefaults hatch - - hatch env run --env docs build - - hatch env run --env docs readthedocs + post_build: + - hatch env run --env docs build + - hatch env run --env docs readthedocs + +conda: + channels: + - conda-forge + dependencies: + - hatch + +mkdocs: + configuration: mkdocs.yml From 631b180586bdd38d12eccee431d039181a765f89 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:43:31 -0400 Subject: [PATCH 04/64] Remove conda section --- .readthedocs.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 76e6b30d1e..f660be9ba2 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -14,12 +14,5 @@ build: - hatch env run --env docs build - hatch env run --env docs readthedocs - -conda: - channels: - - conda-forge - dependencies: - - hatch - mkdocs: configuration: mkdocs.yml From d9f16882c01802b68751b58223312d351900aa97 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:48:15 -0400 Subject: [PATCH 05/64] Install hatch for rtd --- .readthedocs.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f660be9ba2..4ec49ba700 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,6 +7,7 @@ build: jobs: pre_build: - | + python -m pip install hatch if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; then hatch env run --env docs -- towncrier build --version Unreleased --yes fi From 97c6b178035969a26b0d83391d165aaefe091757 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:53:43 -0400 Subject: [PATCH 06/64] Move build step --- .readthedocs.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 4ec49ba700..48f71ca006 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,8 +11,9 @@ build: if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; then hatch env run --env docs -- towncrier build --version Unreleased --yes fi - post_build: + build: - hatch env run --env docs build + post_build: - hatch env run --env docs readthedocs mkdocs: From 81f7ba8151114e4cfb04ea2adbf02f83b44f44e3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:58:15 -0400 Subject: [PATCH 07/64] Start a shell with hatch --- .readthedocs.yaml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 48f71ca006..990ca9d449 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,13 +8,11 @@ build: pre_build: - | python -m pip install hatch - if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; then - hatch env run --env docs -- towncrier build --version Unreleased --yes + hatch shell docs + if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; + then + towncrier build --version Unreleased --yes; fi - build: - - hatch env run --env docs build - post_build: - - hatch env run --env docs readthedocs mkdocs: configuration: mkdocs.yml From 1f18a56043392bdd100e052da973abfa758184ce Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 16:08:59 -0400 Subject: [PATCH 08/64] Use commands --- .readthedocs.yaml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 990ca9d449..ae609495d5 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -4,15 +4,11 @@ build: os: ubuntu-22.04 tools: python: "3.12" - jobs: - pre_build: - - | - python -m pip install hatch - hatch shell docs - if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; - then - towncrier build --version Unreleased --yes; - fi - -mkdocs: - configuration: mkdocs.yml + commands: + - | + python -m pip install hatch + if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; then + hatch env run --env docs -- towncrier build --version Unreleased --yes + fi + hatch env run --env docs build + hatch env run --env docs readthedocs From d65a2009ce415312517dc0d79c157e6245feb810 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 16:14:43 -0400 Subject: [PATCH 09/64] Don't use hatch --- .readthedocs.yaml | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index ae609495d5..96bc537bd3 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -4,11 +4,20 @@ build: os: ubuntu-22.04 tools: python: "3.12" - commands: - - | - python -m pip install hatch - if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; then - hatch env run --env docs -- towncrier build --version Unreleased --yes - fi - hatch env run --env docs build - hatch env run --env docs readthedocs + jobs: + pre_build: + - | + if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; + then + towncrier build --version Unreleased --yes; + fi + +mkdocs: + configuration: mkdocs.yml + +python: + install: + - method: pip + path: . + extra_requirements: + - docs From a6ed57e821a6325b0c0294bb893a911314659bbf Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 16:18:40 -0400 Subject: [PATCH 10/64] Figure out redirects later --- mkdocs.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 1ca6f2cc95..4ae5155ce9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -103,20 +103,6 @@ extra_css: plugins: - search - - redirects: - redirect_maps: - spec: https://zarr-specs.readthedocs.io - spec/v1: https://zarr-specs.readthedocs.io/en/latest/v1/v1.0.html - spec/v2: https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html - spec/v3: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html - license: https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt - tutorial: user-guide - getting-started: quickstart - roadmap: developers/roadmap.html - installation: user-guide/installation.html - api: api/zarr/index - release: release-notes.html - release-notes: release-notes.html - mkdocstrings: enable_inventory: true handlers: From 4e92c3fdaceedd1173561d7800620a1b8baffa98 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 16:22:28 -0400 Subject: [PATCH 11/64] Format docstrings with ruff --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c037f1a99b..85d3240751 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,13 +98,14 @@ docs = [ "mike>=2.1.3", "mkdocs-redirects>=1.2.0", "markdown-exec[ansi]", + "ruff", # Changelog generation 'towncrier', # Optional dependencies to run examples 'numcodecs[msgpack]', 'rich', 's3fs>=2023.10.0', - 'astroid<4' + 'astroid<4', ] From b5aef2740b36a05f5d590180c8e732dce82aeb4f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 16:22:38 -0400 Subject: [PATCH 12/64] git ignore docs output --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index cfc455b715..f2f41270ca 100644 --- a/.gitignore +++ b/.gitignore @@ -49,7 +49,8 @@ coverage.xml # Django stuff: *.log -# Sphinx documentation +# Documentation +site/ docs/_build/ docs/data data From 0e9238553cf43ad3fbd3cb08c2d01f730bdb3ac1 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 6 Jun 2025 16:29:53 -0400 Subject: [PATCH 13/64] Specify towncrier start --- docs/release-notes.md | 10 ++++++---- pyproject.toml | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/release-notes.md b/docs/release-notes.md index 0f576b4f4e..5c12704237 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -1,11 +1,13 @@ # Release notes + + + ## 3.0.8 (2025-05-19) -> **Warning** -> In versions 3.0.0 to 3.0.7 opening arrays or groups with `mode='a'` (the default for many builtin functions) -> would cause any existing paths in the store to be deleted. This is fixed in 3.0.8, and -> we recommend all users upgrade to avoid this bug that could cause unintentional data loss. +!!! warning + + In versions 3.0.0 to 3.0.7 opening arrays or groups with `mode='a'` (the default for many builtin functions) would cause any existing paths in the store to be deleted. This is fixed in 3.0.8, and we recommend all users upgrade to avoid this bug that could cause unintentional data loss. ### Features diff --git a/pyproject.toml b/pyproject.toml index 85d3240751..bd0f02cdb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -446,6 +446,7 @@ directory = 'changes' filename = "docs/release-notes.md" underlines = ["", "", ""] issue_format = "[#{issue}](https://github.com/zarr-developers/zarr-python/issues{issue})" +start_string = "\n" [tool.codespell] ignore-words-list = "astroid" From d4859517677e34e589e1ff1366ae8ddc9e07eca0 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 28 Jun 2025 06:41:26 -0400 Subject: [PATCH 14/64] Add redirects --- mkdocs.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mkdocs.yml b/mkdocs.yml index 4b8bc60f19..1db9a74a32 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -126,6 +126,19 @@ plugins: inventories: - https://docs.python.org/3/objects.inv + - redirects: + redirect_maps: + 'spec/index.md': 'https://zarr-specs.readthedocs.io' + 'spec/v1.md': 'https://zarr-specs.readthedocs.io/en/latest/v1/v1.0.html' + 'spec/v2.md': 'https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html' + 'spec/v3.md': 'https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html' + 'license.md': 'https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt' + 'tutorial.md': 'user-guide/installation.md' + 'getting-started.md': 'index.md##quick-start' + 'roadmap.md': 'https://zarr.readthedocs.io/en/v3.0.8/developers/roadmap.html' + 'installation.md': 'user-guide/installation.md' + 'api.md': 'api/open.md' + 'release.md': 'release-notes.md' # https://github.com/developmentseed/titiler/blob/50934c929cca2fa8d3c408d239015f8da429c6a8/docs/mkdocs.yml#L115-L140 markdown_extensions: From 112744bc686b64785ffcdfbae6b9f88403798acc Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 28 Jun 2025 06:42:34 -0400 Subject: [PATCH 15/64] Don't use version extra --- mkdocs.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 1db9a74a32..3469c0c15c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -7,11 +7,6 @@ site_author: Alistair Miles site_url: https://zarr.readthedocs.io/ docs_dir: docs -extra: - version: - alias: true - provider: mike - nav: - "index.md" - User Guide: From a3db0d59433e8a66a31a45405d62be09a128f343 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 28 Jun 2025 06:46:25 -0400 Subject: [PATCH 16/64] Convert changes to markdown --- changes/{2774.feature.rst => 2774.feature.md} | 0 changes/{2871.feature.rst => 2871.feature.md} | 0 changes/{2874.feature.rst => 2874.feature.md} | 2 +- changes/{3127.bugfix.rst => 3127.bugfix.md} | 0 changes/{3128.bugfix.rst => 3128.bugfix.md} | 2 +- changes/3130.feature.md | 1 + changes/3130.feature.rst | 1 - changes/{3138.feature.rst => 3138.feature.md} | 0 changes/{3140.bugfix.rst => 3140.bugfix.md} | 0 changes/{3156.bugfix.rst => 3156.bugfix.md} | 0 changes/{3170.bugfix.rst => 3170.bugfix.md} | 0 11 files changed, 3 insertions(+), 3 deletions(-) rename changes/{2774.feature.rst => 2774.feature.md} (100%) rename changes/{2871.feature.rst => 2871.feature.md} (100%) rename changes/{2874.feature.rst => 2874.feature.md} (88%) rename changes/{3127.bugfix.rst => 3127.bugfix.md} (100%) rename changes/{3128.bugfix.rst => 3128.bugfix.md} (88%) create mode 100644 changes/3130.feature.md delete mode 100644 changes/3130.feature.rst rename changes/{3138.feature.rst => 3138.feature.md} (100%) rename changes/{3140.bugfix.rst => 3140.bugfix.md} (100%) rename changes/{3156.bugfix.rst => 3156.bugfix.md} (100%) rename changes/{3170.bugfix.rst => 3170.bugfix.md} (100%) diff --git a/changes/2774.feature.rst b/changes/2774.feature.md similarity index 100% rename from changes/2774.feature.rst rename to changes/2774.feature.md diff --git a/changes/2871.feature.rst b/changes/2871.feature.md similarity index 100% rename from changes/2871.feature.rst rename to changes/2871.feature.md diff --git a/changes/2874.feature.rst b/changes/2874.feature.md similarity index 88% rename from changes/2874.feature.rst rename to changes/2874.feature.md index 4c50532ae0..c146f2f4ca 100644 --- a/changes/2874.feature.rst +++ b/changes/2874.feature.md @@ -6,4 +6,4 @@ variable-length string data type, but the old metadata representation can still used when reading arrays. The logic for automatically choosing the chunk encoding for a given data type has also changed, and this necessitated changes to the ``config`` API. -For more on this new feature, see the `documentation `_ \ No newline at end of file +For more on this new feature, see the [documentation](user-guide/data_types.md \ No newline at end of file diff --git a/changes/3127.bugfix.rst b/changes/3127.bugfix.md similarity index 100% rename from changes/3127.bugfix.rst rename to changes/3127.bugfix.md diff --git a/changes/3128.bugfix.rst b/changes/3128.bugfix.md similarity index 88% rename from changes/3128.bugfix.rst rename to changes/3128.bugfix.md index b93416070e..96860088a5 100644 --- a/changes/3128.bugfix.rst +++ b/changes/3128.bugfix.md @@ -1 +1 @@ -Fix `zarr.open` default for argument `mode` when `store` is `read_only` \ No newline at end of file +Fix `zarr.open` default for argument `mode` when `store` is `read_only`. \ No newline at end of file diff --git a/changes/3130.feature.md b/changes/3130.feature.md new file mode 100644 index 0000000000..81e43ab2da --- /dev/null +++ b/changes/3130.feature.md @@ -0,0 +1 @@ +Port more stateful testing actions from [Icechunk](https://icechunk.io). diff --git a/changes/3130.feature.rst b/changes/3130.feature.rst deleted file mode 100644 index 7a64582f06..0000000000 --- a/changes/3130.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Port more stateful testing actions from `Icechunk `_. diff --git a/changes/3138.feature.rst b/changes/3138.feature.md similarity index 100% rename from changes/3138.feature.rst rename to changes/3138.feature.md diff --git a/changes/3140.bugfix.rst b/changes/3140.bugfix.md similarity index 100% rename from changes/3140.bugfix.rst rename to changes/3140.bugfix.md diff --git a/changes/3156.bugfix.rst b/changes/3156.bugfix.md similarity index 100% rename from changes/3156.bugfix.rst rename to changes/3156.bugfix.md diff --git a/changes/3170.bugfix.rst b/changes/3170.bugfix.md similarity index 100% rename from changes/3170.bugfix.rst rename to changes/3170.bugfix.md From afc5d343eb07f2766769e6cb8669c31f4fd7528a Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 28 Jun 2025 14:28:41 -0400 Subject: [PATCH 17/64] Use cards on homepage for now --- .pre-commit-config.yaml | 1 + docs/index.md | 186 +++++----------------------------------- docs/quick-start.md | 179 ++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 6 +- 4 files changed, 207 insertions(+), 165 deletions(-) create mode 100644 docs/quick-start.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f016000984..ec3cbf0401 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,6 +20,7 @@ repos: rev: v5.0.0 hooks: - id: check-yaml + exclude: mkdocs.yml - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.16.1 diff --git a/docs/index.md b/docs/index.md index 240f939d90..2084fb4742 100644 --- a/docs/index.md +++ b/docs/index.md @@ -33,187 +33,45 @@ or `conda`: conda install --channel conda-forge zarr ``` -## Quick Start 🚀 - -This section will help you get up and running with -the Zarr library in Python to efficiently manage and analyze multi-dimensional arrays. - -### Creating an Array - -To get started, you can create a simple Zarr array: - -```python -import zarr -import numpy as np - -# Create a 2D Zarr array -z = zarr.create_array( - store="data/example-1.zarr", - shape=(100, 100), - chunks=(10, 10), - dtype="f4" -) - -# Assign data to the array -z[:, :] = np.random.random((100, 100)) -z.info - -# Type : Array -# Zarr format : 3 -# Data type : DataType.float32 -# Shape : (100, 100) -# Chunk shape : (10, 10) -# Order : C -# Read-only : False -# Store type : LocalStore -# Codecs : [{'endian': }, {'level': 0, 'checksum': False}] -# No. bytes : 40000 (39.1K) -``` - -Here, we created a 2D array of shape `(100, 100)`, chunked into blocks of -`(10, 10)`, and filled it with random floating-point data. This array was -written to a `LocalStore` in the `data/example-1.zarr` directory. - -#### Compression and Filters - -Zarr supports data compression and filters. For example, to use Blosc compression: - -```python -z = zarr.create_array( - "data/example-3.zarr", - mode="w", shape=(100, 100), - chunks=(10, 10), dtype="f4", - compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) -) -z[:, :] = np.random.random((100, 100)) -z.info -# Type : Array -# Zarr format : 3 -# Data type : DataType.float32 -# Shape : (100, 100) -# Chunk shape : (10, 10) -# Order : C -# Read-only : False -# Store type : LocalStore -# Codecs : [{'endian': }, {'level': 0, 'checksum': False}] -# No. bytes : 40000 (39.1K) -``` - -This compresses the data using the Zstandard codec with shuffle enabled for better compression. - -### Hierarchical Groups +## Navigating the documentation -Zarr allows you to create hierarchical groups, similar to directories: +
-```python -# Create nested groups and add arrays -root = zarr.group("data/example-2.zarr") -foo = root.create_group(name="foo") -bar = root.create_array( - name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" -) -spam = foo.create_array(name="spam", shape=(10,), dtype="i4") +- [:material-clock-fast:{ .lg .middle } __Quick start__](quick-start.md) -# Assign values -bar[:, :] = np.random.random((100, 10)) -spam[:] = np.arange(10) + --- -# print the hierarchy -root.tree() -# / -# ├── bar (100, 10) float32 -# └── foo -# └── spam (10,) int32 -``` + New to Zarr? Check out the quick start guide. It contains a brief + introduction to Zarr's main concepts and links to additional tutorials. -This creates a group with two datasets: `foo` and `bar`. - -#### Batch Hierarchy Creation - -Zarr provides tools for creating a collection of arrays and groups with a single function call. -Suppose we want to copy existing groups and arrays into a new storage backend: - -```python -# Create nested groups and add arrays -root = zarr.group("data/example-3.zarr", attributes={'name': 'root'}) -foo = root.create_group(name="foo") -bar = root.create_array( - name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" -) -nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} -print(nodes) -from zarr.storage import MemoryStore -new_nodes = dict(zarr.create_hierarchy(store=MemoryStore(), nodes=nodes)) -new_root = new_nodes[''] -assert new_root.attrs == root.attrs -``` -Note that `zarr.create_hierarchy` will only initialize arrays and groups -- copying array data must -be done in a separate step. +- [:material-book-open:{ .lg .middle } __User guide__](user-guide/installation.md) -### Persistent Storage + --- -Zarr supports persistent storage to disk or cloud-compatible backends. While examples above -utilized a `zarr.storage.LocalStore`, a number of other storage options are available. + A detailed guide for how to use Zarr-Python. -Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage -using external libraries like [s3fs](https://s3fs.readthedocs.io) or -[gcsfs](https://gcsfs.readthedocs.io): -```python -import s3fs +- [:material-api:{ .lg .middle } __API Reference__](api/open.md) -z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") -z[:, :] = np.random.random((100, 100)) -``` + --- -A single-file store can also be created using the `zarr.storage.ZipStore`: + The reference guide contains a detailed description of the functions, modules, + and objects included in Zarr. The reference describes how the methods work and + which parameters can be used. It assumes that you have an understanding of the + key concepts. -```python -# Store the array in a ZIP file -store = zarr.storage.ZipStore("data/example-3.zip", mode='w') -z = zarr.create_array( - store=store, - mode="w", - shape=(100, 100), - chunks=(10, 10), - dtype="f4" -) +- [:material-account-group:{ .lg .middle } __Contributor's Guide__](contributing.md) -# write to the array -z[:, :] = np.random.random((100, 100)) + --- -# the ZipStore must be explicitly closed -store.close() -``` + Want to contribute to Zarr? We welcome contributions in the form of bug reports, + bug fixes, documentation, enhancement proposals and more. The contributing guidelines + will guide you through the process of improving Zarr. -To open an existing array from a ZIP file: - -```python -# Open the ZipStore in read-only mode -store = zarr.storage.ZipStore("data/example-3.zip", read_only=True) - -z = zarr.open_array(store, mode='r') - -# read the data as a NumPy Array -z[:] -# array([[0.66734236, 0.15667458, 0.98720884, ..., 0.36229587, 0.67443246, -# 0.34315267], -# [0.65787303, 0.9544212 , 0.4830079 , ..., 0.33097172, 0.60423803, -# 0.45621237], -# [0.27632037, 0.9947008 , 0.42434934, ..., 0.94860053, 0.6226942 , -# 0.6386924 ], -# ..., -# [0.12854576, 0.934397 , 0.19524333, ..., 0.11838563, 0.4967675 , -# 0.43074256], -# [0.82029045, 0.4671437 , 0.8090906 , ..., 0.7814118 , 0.42650765, -# 0.95929915], -# [0.4335856 , 0.7565437 , 0.7828931 , ..., 0.48119593, 0.66220033, -# 0.6652362 ]], shape=(100, 100), dtype=float32) -``` +
-Read more about Zarr's storage options in the [User Guide](user-guide/storage.md). ## Project Status @@ -224,7 +82,7 @@ If you are using Zarr-Python, we would [love to hear about it](https://github.co ### Funding and Support The project is fiscally sponsored by [NumFOCUS](https://numfocus.org/), a US 501(c)(3) public charity, and development has been supported by the -[MRC Centre for Genomics and Global Health](https://www.cggh.org) +[MRC Centre for Genomics and Global Health](https://www.sanger.ac.uk/collaboration/mrc-centre-genomics-and-global-health-cggh/) and the [Chan Zuckerberg Initiative](https://chanzuckerberg.com/). [Donate to Zarr](https://numfocus.org/donate-to-zarr) to support the project! diff --git a/docs/quick-start.md b/docs/quick-start.md new file mode 100644 index 0000000000..22b3ee25c6 --- /dev/null +++ b/docs/quick-start.md @@ -0,0 +1,179 @@ +This section will help you get up and running with +the Zarr library in Python to efficiently manage and analyze multi-dimensional arrays. + +### Creating an Array + +To get started, you can create a simple Zarr array: + +```python +import zarr +import numpy as np + +# Create a 2D Zarr array +z = zarr.create_array( + store="data/example-1.zarr", + shape=(100, 100), + chunks=(10, 10), + dtype="f4" +) + +# Assign data to the array +z[:, :] = np.random.random((100, 100)) +z.info + +# Type : Array +# Zarr format : 3 +# Data type : DataType.float32 +# Shape : (100, 100) +# Chunk shape : (10, 10) +# Order : C +# Read-only : False +# Store type : LocalStore +# Codecs : [{'endian': }, {'level': 0, 'checksum': False}] +# No. bytes : 40000 (39.1K) +``` + +Here, we created a 2D array of shape `(100, 100)`, chunked into blocks of +`(10, 10)`, and filled it with random floating-point data. This array was +written to a `LocalStore` in the `data/example-1.zarr` directory. + +#### Compression and Filters + +Zarr supports data compression and filters. For example, to use Blosc compression: + +```python +z = zarr.create_array( + "data/example-3.zarr", + mode="w", shape=(100, 100), + chunks=(10, 10), dtype="f4", + compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) +) +z[:, :] = np.random.random((100, 100)) +z.info +# Type : Array +# Zarr format : 3 +# Data type : DataType.float32 +# Shape : (100, 100) +# Chunk shape : (10, 10) +# Order : C +# Read-only : False +# Store type : LocalStore +# Codecs : [{'endian': }, {'level': 0, 'checksum': False}] +# No. bytes : 40000 (39.1K) +``` + +This compresses the data using the Zstandard codec with shuffle enabled for better compression. + +### Hierarchical Groups + +Zarr allows you to create hierarchical groups, similar to directories: + +```python +# Create nested groups and add arrays +root = zarr.group("data/example-2.zarr") +foo = root.create_group(name="foo") +bar = root.create_array( + name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" +) +spam = foo.create_array(name="spam", shape=(10,), dtype="i4") + +# Assign values +bar[:, :] = np.random.random((100, 10)) +spam[:] = np.arange(10) + +# print the hierarchy +root.tree() +# / +# ├── bar (100, 10) float32 +# └── foo +# └── spam (10,) int32 +``` + +This creates a group with two datasets: `foo` and `bar`. + +#### Batch Hierarchy Creation + +Zarr provides tools for creating a collection of arrays and groups with a single function call. +Suppose we want to copy existing groups and arrays into a new storage backend: + +```python +# Create nested groups and add arrays +root = zarr.group("data/example-3.zarr", attributes={'name': 'root'}) +foo = root.create_group(name="foo") +bar = root.create_array( + name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" +) +nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} +print(nodes) +from zarr.storage import MemoryStore +new_nodes = dict(zarr.create_hierarchy(store=MemoryStore(), nodes=nodes)) +new_root = new_nodes[''] +assert new_root.attrs == root.attrs +``` + +Note that `zarr.create_hierarchy` will only initialize arrays and groups -- copying array data must +be done in a separate step. + +### Persistent Storage + +Zarr supports persistent storage to disk or cloud-compatible backends. While examples above +utilized a `zarr.storage.LocalStore`, a number of other storage options are available. + +Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage +using external libraries like [s3fs](https://s3fs.readthedocs.io) or +[gcsfs](https://gcsfs.readthedocs.io): + +```python +import s3fs + +z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") +z[:, :] = np.random.random((100, 100)) +``` + +A single-file store can also be created using the `zarr.storage.ZipStore`: + +```python +# Store the array in a ZIP file +store = zarr.storage.ZipStore("data/example-3.zip", mode='w') + +z = zarr.create_array( + store=store, + mode="w", + shape=(100, 100), + chunks=(10, 10), + dtype="f4" +) + +# write to the array +z[:, :] = np.random.random((100, 100)) + +# the ZipStore must be explicitly closed +store.close() +``` + +To open an existing array from a ZIP file: + +```python +# Open the ZipStore in read-only mode +store = zarr.storage.ZipStore("data/example-3.zip", read_only=True) + +z = zarr.open_array(store, mode='r') + +# read the data as a NumPy Array +z[:] +# array([[0.66734236, 0.15667458, 0.98720884, ..., 0.36229587, 0.67443246, +# 0.34315267], +# [0.65787303, 0.9544212 , 0.4830079 , ..., 0.33097172, 0.60423803, +# 0.45621237], +# [0.27632037, 0.9947008 , 0.42434934, ..., 0.94860053, 0.6226942 , +# 0.6386924 ], +# ..., +# [0.12854576, 0.934397 , 0.19524333, ..., 0.11838563, 0.4967675 , +# 0.43074256], +# [0.82029045, 0.4671437 , 0.8090906 , ..., 0.7814118 , 0.42650765, +# 0.95929915], +# [0.4335856 , 0.7565437 , 0.7828931 , ..., 0.48119593, 0.66220033, +# 0.6652362 ]], shape=(100, 100), dtype=float32) +``` + +Read more about Zarr's storage options in the [User Guide](user-guide/storage.md). diff --git a/mkdocs.yml b/mkdocs.yml index 3469c0c15c..9809ae785d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -9,6 +9,7 @@ docs_dir: docs nav: - "index.md" + - "quick-start.md" - User Guide: - user-guide/installation.md - user-guide/arrays.md @@ -129,7 +130,7 @@ plugins: 'spec/v3.md': 'https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html' 'license.md': 'https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt' 'tutorial.md': 'user-guide/installation.md' - 'getting-started.md': 'index.md##quick-start' + 'getting-started.md': 'quick-start.md' 'roadmap.md': 'https://zarr.readthedocs.io/en/v3.0.8/developers/roadmap.html' 'installation.md': 'user-guide/installation.md' 'api.md': 'api/open.md' @@ -160,5 +161,8 @@ markdown_extensions: - pymdownx.tasklist: custom_checkbox: true - pymdownx.tilde + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg - toc: permalink: true From e58b8d597baa59e75ceef2e479b319c60eac6f9e Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 28 Jun 2025 20:12:59 -0400 Subject: [PATCH 18/64] Execute examples in quickstart --- docs/quick-start.md | 112 +++++++++++++++++++------------------------- mkdocs.yml | 7 +++ 2 files changed, 55 insertions(+), 64 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 22b3ee25c6..da903af3e4 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -5,32 +5,29 @@ the Zarr library in Python to efficiently manage and analyze multi-dimensional a To get started, you can create a simple Zarr array: -```python +```python exec="true" session="quickstart" +import shutil +shutil.rmtree('data', ignore_errors=True) +import numpy as np + +np.random.seed(0) +``` + +```python exec="true" session="quickstart" source="material-block" result="ansi" import zarr import numpy as np # Create a 2D Zarr array z = zarr.create_array( - store="data/example-1.zarr", - shape=(100, 100), - chunks=(10, 10), - dtype="f4" + store="data/example-1.zarr", + shape=(100, 100), + chunks=(10, 10), + dtype="f4" ) # Assign data to the array z[:, :] = np.random.random((100, 100)) -z.info - -# Type : Array -# Zarr format : 3 -# Data type : DataType.float32 -# Shape : (100, 100) -# Chunk shape : (10, 10) -# Order : C -# Read-only : False -# Store type : LocalStore -# Codecs : [{'endian': }, {'level': 0, 'checksum': False}] -# No. bytes : 40000 (39.1K) +print(z.info) ``` Here, we created a 2D array of shape `(100, 100)`, chunked into blocks of @@ -41,36 +38,38 @@ written to a `LocalStore` in the `data/example-1.zarr` directory. Zarr supports data compression and filters. For example, to use Blosc compression: -```python + +```python exec="true" session="quickstart" source="material-block" result="ansi" + +# Create a 2D Zarr array with Blosc compression z = zarr.create_array( - "data/example-3.zarr", - mode="w", shape=(100, 100), - chunks=(10, 10), dtype="f4", - compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + store="data/example-2.zarr", + shape=(100, 100), + chunks=(10, 10), + dtype="f4", + compressors=zarr.codecs.BloscCodec( + cname="zstd", + clevel=3, + shuffle=zarr.codecs.BloscShuffle.shuffle + ) ) + +# Assign data to the array z[:, :] = np.random.random((100, 100)) -z.info -# Type : Array -# Zarr format : 3 -# Data type : DataType.float32 -# Shape : (100, 100) -# Chunk shape : (10, 10) -# Order : C -# Read-only : False -# Store type : LocalStore -# Codecs : [{'endian': }, {'level': 0, 'checksum': False}] -# No. bytes : 40000 (39.1K) +print(z.info) ``` -This compresses the data using the Zstandard codec with shuffle enabled for better compression. +This compresses the data using the Blosc codec with shuffle enabled for better compression. + ### Hierarchical Groups Zarr allows you to create hierarchical groups, similar to directories: -```python +```python exec="true" session="quickstart" source="material-block" result="ansi" + # Create nested groups and add arrays -root = zarr.group("data/example-2.zarr") +root = zarr.group("data/example-3.zarr") foo = root.create_group(name="foo") bar = root.create_array( name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" @@ -82,11 +81,7 @@ bar[:, :] = np.random.random((100, 10)) spam[:] = np.arange(10) # print the hierarchy -root.tree() -# / -# ├── bar (100, 10) float32 -# └── foo -# └── spam (10,) int32 +print(root.tree()) ``` This creates a group with two datasets: `foo` and `bar`. @@ -96,17 +91,17 @@ This creates a group with two datasets: `foo` and `bar`. Zarr provides tools for creating a collection of arrays and groups with a single function call. Suppose we want to copy existing groups and arrays into a new storage backend: -```python +```python exec="true" session="quickstart" source="material-block" result="ansi" + # Create nested groups and add arrays -root = zarr.group("data/example-3.zarr", attributes={'name': 'root'}) +root = zarr.group("data/example-4.zarr", attributes={'name': 'root'}) foo = root.create_group(name="foo") bar = root.create_array( name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" ) nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} print(nodes) -from zarr.storage import MemoryStore -new_nodes = dict(zarr.create_hierarchy(store=MemoryStore(), nodes=nodes)) +new_nodes = dict(zarr.create_hierarchy(store=zarr.storage.MemoryStore(), nodes=nodes)) new_root = new_nodes[''] assert new_root.attrs == root.attrs ``` @@ -124,6 +119,7 @@ using external libraries like [s3fs](https://s3fs.readthedocs.io) or [gcsfs](https://gcsfs.readthedocs.io): ```python + import s3fs z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") @@ -132,13 +128,13 @@ z[:, :] = np.random.random((100, 100)) A single-file store can also be created using the `zarr.storage.ZipStore`: -```python +```python exec="true" session="quickstart" source="material-block" + # Store the array in a ZIP file -store = zarr.storage.ZipStore("data/example-3.zip", mode='w') +store = zarr.storage.ZipStore("data/example-5.zip", mode="w") z = zarr.create_array( store=store, - mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4" @@ -153,27 +149,15 @@ store.close() To open an existing array from a ZIP file: -```python +```python exec="true" session="quickstart" source="material-block" result="ansi" + # Open the ZipStore in read-only mode -store = zarr.storage.ZipStore("data/example-3.zip", read_only=True) +store = zarr.storage.ZipStore("data/example-5.zip", read_only=True) z = zarr.open_array(store, mode='r') # read the data as a NumPy Array -z[:] -# array([[0.66734236, 0.15667458, 0.98720884, ..., 0.36229587, 0.67443246, -# 0.34315267], -# [0.65787303, 0.9544212 , 0.4830079 , ..., 0.33097172, 0.60423803, -# 0.45621237], -# [0.27632037, 0.9947008 , 0.42434934, ..., 0.94860053, 0.6226942 , -# 0.6386924 ], -# ..., -# [0.12854576, 0.934397 , 0.19524333, ..., 0.11838563, 0.4967675 , -# 0.43074256], -# [0.82029045, 0.4671437 , 0.8090906 , ..., 0.7814118 , 0.42650765, -# 0.95929915], -# [0.4335856 , 0.7565437 , 0.7828931 , ..., 0.48119593, 0.66220033, -# 0.6652362 ]], shape=(100, 100), dtype=float32) +print(z[:]) ``` Read more about Zarr's storage options in the [User Guide](user-guide/storage.md). diff --git a/mkdocs.yml b/mkdocs.yml index 9809ae785d..f1eed107d2 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -101,6 +101,7 @@ extra_css: plugins: - search + - markdown-exec - mkdocstrings: enable_inventory: true handlers: @@ -166,3 +167,9 @@ markdown_extensions: emoji_generator: !!python/name:material.extensions.emoji.to_svg - toc: permalink: true + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets From b3285f80f5005e710c19261437f971c435e28daa Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 28 Jun 2025 20:52:43 -0400 Subject: [PATCH 19/64] Execute examples in array user guide --- docs/user-guide/arrays.md | 435 +++++++++++++++----------------------- 1 file changed, 169 insertions(+), 266 deletions(-) diff --git a/docs/user-guide/arrays.md b/docs/user-guide/arrays.md index 2975ef2974..22aab5c035 100644 --- a/docs/user-guide/arrays.md +++ b/docs/user-guide/arrays.md @@ -4,12 +4,19 @@ Zarr has several functions for creating arrays. For example: -```python +```python exec="true" session="arrays" +import shutil +shutil.rmtree('data', ignore_errors=True) +import numpy as np + +np.random.seed(0) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" import zarr store = zarr.storage.MemoryStore() z = zarr.create_array(store=store, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') -z -# +print(z) ``` The code above creates a 2-dimensional array of 32-bit integers with 10000 rows @@ -27,13 +34,13 @@ Zarr arrays support a similar interface to [NumPy](https://numpy.org/doc/stable/ arrays for reading and writing data. For example, the entire array can be filled with a scalar value: -```python +```python exec="true" session="arrays" source="material-block" z[:] = 42 ``` Regions of the array can also be written to, e.g.: -```python +```python exec="true" session="arrays" source="material-block" import numpy as np z[0, :] = np.arange(10000) @@ -43,26 +50,24 @@ z[:, 0] = np.arange(10000) The contents of the array can be retrieved by slicing, which will load the requested region into memory as a NumPy array, e.g.: -```python -z[0, 0] -# array(0, dtype=int32) -z[-1, -1] -# array(42, dtype=int32) -z[0, :] -# array([ 0, 1, 2, ..., 9997, 9998, 9999], -# shape=(10000,), dtype=int32) -z[:, 0] -# array([ 0, 1, 2, ..., 9997, 9998, 9999], -# shape=(10000,), dtype=int32) -z[:] -# array([[ 0, 1, 2, ..., 9997, 9998, 9999], -# [ 1, 42, 42, ..., 42, 42, 42], -# [ 2, 42, 42, ..., 42, 42, 42], -# ..., -# [9997, 42, 42, ..., 42, 42, 42], -# [9998, 42, 42, ..., 42, 42, 42], -# [9999, 42, 42, ..., 42, 42, 42]], -# shape=(10000, 10000), dtype=int32) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[0, 0]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[-1, -1]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[0, :]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[:, 0]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[:]) ``` Read more about NumPy-style indexing can be found in the @@ -75,7 +80,7 @@ main memory. Zarr arrays can also be stored on a file system, enabling persistence of data between sessions. To do this, we can change the store argument to point to a filesystem path: -```python +```python exec="true" session="arrays" source="material-block" z1 = zarr.create_array(store='data/example-1.zarr', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') ``` @@ -97,10 +102,9 @@ z1[:, 0] = np.arange(10000) Check that the data have been written and can be read again: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" z2 = zarr.open_array('data/example-1.zarr', mode='r') -np.all(z1[:] == z2[:]) -# np.True_ +print(np.all(z1[:] == z2[:])) ``` If you are just looking for a fast and convenient way to save NumPy arrays to @@ -108,11 +112,10 @@ disk then load back into memory later, the functions `zarr.save` and `zarr.load` may be useful. E.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" a = np.arange(10) zarr.save('data/example-2.zarr', a) -zarr.load('data/example-2.zarr') -# array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) +print(zarr.load('data/example-2.zarr')) ``` Please note that there are a number of other options for persistent array @@ -123,14 +126,12 @@ storage, see the Storage Guide for more details. A Zarr array can be resized, which means that any of its dimensions can be increased or decreased in length. For example: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" z = zarr.create_array(store='data/example-3.zarr', shape=(10000, 10000), dtype='int32',chunks=(1000, 1000)) z[:] = 42 -z.shape -# (10000, 10000) +print(f"Original shape: {z.shape}") z.resize((20000, 10000)) -z.shape -# (20000, 10000) +print(f"New shape: {z.shape}") ``` Note that when an array is resized, the underlying data are not rearranged in @@ -140,18 +141,15 @@ new array shape will be deleted from the underlying store. `zarr.Array.append` is provided as a convenience function, which can be used to append data to any axis. E.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" a = np.arange(10000000, dtype='int32').reshape(10000, 1000) z = zarr.create_array(store='data/example-4.zarr', shape=a.shape, dtype=a.dtype, chunks=(1000, 100)) z[:] = a -z.shape -# (10000, 1000) +print(f"Original shape: {z.shape}") z.append(a) -# (20000, 1000) +print(f"Shape after first append: {z.shape}") z.append(np.vstack([a, a]), axis=1) -# (20000, 2000) -z.shape -# (20000, 2000) +print(f"Shape after second append: {z.shape}") ``` ## Compressors @@ -163,13 +161,12 @@ compressor libraries including LZ4, Zlib, BZ2 and LZMA. Different compressors can be provided via the `compressors` keyword argument accepted by all array creation functions. For example: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=3, shuffle=zarr.codecs.BloscShuffle.bitshuffle) data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) z[:] = data -z.compressors -# (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) +print(z.compressors) ``` This array above will use Blosc as the primary compressor, using the Zstandard @@ -180,44 +177,15 @@ When using a compressor, it can be useful to get some diagnostics on the compression ratio. Zarr arrays provide the `zarr.Array.info` property which can be used to print useful diagnostics, e.g.: -```python -z.info -# Type : Array -# Zarr format : 3 -# Data type : Int32(endianness='little') -# Fill value : 0 -# Shape : (10000, 10000) -# Chunk shape : (1000, 1000) -# Order : C -# Read-only : False -# Store type : LocalStore -# Filters : () -# Serializer : BytesCodec(endian=) -# Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) -# No. bytes : 400000000 (381.5M) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.info) ``` The `zarr.Array.info_complete` method inspects the underlying store and prints additional diagnostics, e.g.: -```python -z.info_complete() -# Type : Array -# Zarr format : 3 -# Data type : Int32(endianness='little') -# Fill value : 0 -# Shape : (10000, 10000) -# Chunk shape : (1000, 1000) -# Order : C -# Read-only : False -# Store type : LocalStore -# Filters : () -# Serializer : BytesCodec(endian=) -# Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) -# No. bytes : 400000000 (381.5M) -# No. bytes stored : 3558573 -# Storage ratio : 112.4 -# Chunks Initialized : 100 +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.info_complete()) ``` !!! note @@ -231,18 +199,17 @@ compressor. In addition to Blosc and Zstandard, other compression libraries can also be used. For example, here is an array using Gzip compression, level 1: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1)) z[:] = data -z.compressors -# (GzipCodec(level=1),) +print(f"Compressors: {z.compressors}") ``` Here is an example using LZMA from [NumCodecs](https://numcodecs.readthedocs.io/) with a custom filter pipeline including LZMA's built-in delta filter: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" import lzma from numcodecs.zarr3 import LZMA @@ -250,28 +217,30 @@ lzma_filters = [dict(id=lzma.FILTER_DELTA, dist=4), dict(id=lzma.FILTER_LZMA2, p compressors = LZMA(filters=lzma_filters) data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-7.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) -z.compressors -# (LZMA(codec_name='numcodecs.lzma', codec_config={'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]}),) +print(f"Compressors: {z.compressors}") ``` The default compressor can be changed by setting the value of the using Zarr's configuration system, e.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" with zarr.config.set({'array.v2_default_compressor.default': {'id': 'blosc'}}): z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2) -z.filters -# () -z.compressors -# (Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),) +print(f"Filter: {z.filters}") +print(f"Compressors: {z.compressors}") ``` To disable compression, set `compressors=None` when creating an array, e.g.: -```python -z = zarr.create_array(store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None) -z.compressors -# () +```python exec="true" session="arrays" source="material-block" result="ansi" +z = zarr.create_array( + store='data/example-8.zarr', + shape=(100000000,), + chunks=(1000000,), + dtype='int32', + compressors=None +) +print(f"Compressors: {z.compressors}") ``` ## Filters @@ -289,27 +258,14 @@ mechanism for configuring filters outside of the primary compressor. Here is an example using a delta filter with the Blosc compressor: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" from numcodecs.zarr3 import Delta filters = [Delta(dtype='int32')] compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=1, shuffle=zarr.codecs.BloscShuffle.shuffle) data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-9.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), filters=filters, compressors=compressors) -z.info -# Type : Array -# Zarr format : 3 -# Data type : Int32(endianness='little') -# Fill value : 0 -# Shape : (10000, 10000) -# Chunk shape : (1000, 1000) -# Order : C -# Read-only : False -# Store type : LocalStore -# Filters : (Delta(codec_name='numcodecs.delta', codec_config={'dtype': 'int32'}),) -# Serializer : BytesCodec(endian=) -# Compressors : (BloscCodec(typesize=4, cname=, clevel=1, shuffle=, blocksize=0),) -# No. bytes : 400000000 (381.5M) +print(z.info) ``` For more information about available filter codecs, see the [Numcodecs](https://numcodecs.readthedocs.io/) documentation. @@ -331,122 +287,115 @@ see the documentation for the `zarr.Array` class. Items from a Zarr array can be extracted by providing an integer array of coordinates. E.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" data = np.arange(10) ** 2 z = zarr.create_array(store='data/example-10.zarr', shape=data.shape, dtype=data.dtype) z[:] = data -z[:] -# array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) -z.get_coordinate_selection([2, 5]) -# array([ 4, 25]) +print(z[:]) +print(z.get_coordinate_selection([2, 5])) ``` Coordinate arrays can also be used to update data, e.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" z.set_coordinate_selection([2, 5], [-1, -2]) -z[:] -# array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) +print(z[:]) ``` For multidimensional arrays, coordinates must be provided for each dimension, e.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-11.zarr', shape=data.shape, dtype=data.dtype) z[:] = data -z[:] -# array([[ 0, 1, 2, 3, 4], -# [ 5, 6, 7, 8, 9], -# [10, 11, 12, 13, 14]]) -z.get_coordinate_selection(([0, 2], [1, 3])) -# array([ 1, 13]) +print(z.get_coordinate_selection(([0, 2], [1, 3]))) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" z.set_coordinate_selection(([0, 2], [1, 3]), [-1, -2]) -z[:] -# array([[ 0, -1, 2, 3, 4], -# [ 5, 6, 7, 8, 9], -# [10, 11, 12, -2, 14]]) +print(z[:]) ``` For convenience, coordinate indexing is also available via the `vindex` property, as well as the square bracket operator, e.g.: -```python -z.vindex[[0, 2], [1, 3]] -# array([-1, -2]) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.vindex[[0, 2], [1, 3]]) z.vindex[[0, 2], [1, 3]] = [-3, -4] -z[:] -# array([[ 0, -3, 2, 3, 4], -# [ 5, 6, 7, 8, 9], -# [10, 11, 12, -4, 14]]) -z[[0, 2], [1, 3]] -# array([-3, -4]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[:]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[[0, 2], [1, 3]]) ``` When the indexing arrays have different shapes, they are broadcast together. That is, the following two calls are equivalent: -```python -z[1, [1, 3]] -# array([6, 8]) -z[[1, 1], [1, 3]] -# array([6, 8]) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[1, [1, 3]]) +print(z[[1, 1], [1, 3]]) ``` ### Indexing with a mask array Items can also be extracted by providing a Boolean mask. E.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" data = np.arange(10) ** 2 z = zarr.create_array(store='data/example-12.zarr', shape=data.shape, dtype=data.dtype) z[:] = data -z[:] -# array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) +print(z[:]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" sel = np.zeros_like(z, dtype=bool) sel[2] = True sel[5] = True -z.get_mask_selection(sel) -# array([ 4, 25]) +print(z.get_mask_selection(sel)) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" z.set_mask_selection(sel, [-1, -2]) -z[:] -# array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) +print(z[:]) ``` Here's a multidimensional example: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-13.zarr', shape=data.shape, dtype=data.dtype) z[:] = data -z[:] -# array([[ 0, 1, 2, 3, 4], -# [ 5, 6, 7, 8, 9], -# [10, 11, 12, 13, 14]]) +print(z[:]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" sel = np.zeros_like(z, dtype=bool) sel[0, 1] = True sel[2, 3] = True -z.get_mask_selection(sel) -# array([ 1, 13]) +print(z.get_mask_selection(sel)) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" z.set_mask_selection(sel, [-1, -2]) -z[:] -# array([[ 0, -1, 2, 3, 4], -# [ 5, 6, 7, 8, 9], -# [10, 11, 12, -2, 14]]) +print(z[:]) ``` For convenience, mask indexing is also available via the `vindex` property, e.g.: -```python -z.vindex[sel] -# array([-1, -2]) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.vindex[sel]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" + z.vindex[sel] = [-3, -4] -z[:] -# array([[ 0, -3, 2, 3, 4], -# [ 5, 6, 7, 8, 9], -# [10, 11, 12, -4, 14]]) +print(z[:]) ``` Mask indexing is conceptually the same as coordinate indexing, and is @@ -460,54 +409,52 @@ selections to be made along each dimension of an array independently. For example, this allows selecting a subset of rows and/or columns from a 2-dimensional array. E.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-14.zarr', shape=data.shape, dtype=data.dtype) z[:] = data -z[:] -# array([[ 0, 1, 2, 3, 4], -# [ 5, 6, 7, 8, 9], -# [10, 11, 12, 13, 14]]) -z.get_orthogonal_selection(([0, 2], slice(None))) # select first and third rows -# array([[ 0, 1, 2, 3, 4], -# [10, 11, 12, 13, 14]]) -z.get_orthogonal_selection((slice(None), [1, 3])) # select second and fourth columns -# array([[ 1, 3], -# [ 6, 8], -# [11, 13]]) -z.get_orthogonal_selection(([0, 2], [1, 3])) # select rows [0, 2] and columns [1, 4] -# array([[ 1, 3], -# [11, 13]]) +print(z[:]) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.get_orthogonal_selection(([0, 2], slice(None)))) # select first and third rows +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.get_orthogonal_selection((slice(None), [1, 3]))) # select second and fourth columns) +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.get_orthogonal_selection(([0, 2], [1, 3]))) # select rows [0, 2] and columns [1, 4] ``` Data can also be modified, e.g.: -```python +```python exec="true" session="arrays" source="material-block" z.set_orthogonal_selection(([0, 2], [1, 3]), [[-1, -2], [-3, -4]]) ``` For convenience, the orthogonal indexing functionality is also available via the `oindex` property, e.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-15.zarr', shape=data.shape, dtype=data.dtype) z[:] = data -z.oindex[[0, 2], :] # select first and third rows -# array([[ 0, 1, 2, 3, 4], -# [10, 11, 12, 13, 14]]) -z.oindex[:, [1, 3]] # select second and fourth columns -# array([[ 1, 3], -# [ 6, 8], -# [11, 13]]) -z.oindex[[0, 2], [1, 3]] # select rows [0, 2] and columns [1, 4] -# array([[ 1, 3], -# [11, 13]]) +print(z.oindex[[0, 2], :]) # select first and third rows +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.oindex[:, [1, 3]]) # select second and fourth columns +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.oindex[[0, 2], [1, 3]]) # select rows [0, 2] and columns [1, 4] +``` + +```python exec="true" session="arrays" source="material-block" result="ansi" z.oindex[[0, 2], [1, 3]] = [[-1, -2], [-3, -4]] -z[:] -# array([[ 0, -1, 2, -2, 4], -# [ 5, 6, 7, 8, 9], -# [10, -3, 12, -4, 14]]) +print(z[:]) ``` Any combination of integer, slice, 1D integer array and/or 1D Boolean array can @@ -516,12 +463,11 @@ be used for orthogonal indexing. If the index contains at most one iterable, and otherwise contains only slices and integers, orthogonal indexing is also available directly on the array: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-16.zarr', shape=data.shape, dtype=data.dtype) z[:] = data -np.all(z.oindex[[0, 2], :] == z[[0, 2], :]) -# np.True_ +print(np.all(z.oindex[[0, 2], :] == z[[0, 2], :])) ``` ### Block Indexing @@ -530,7 +476,7 @@ Zarr also support block indexing, which allows selections of whole chunks based logical indices along each dimension of an array. For example, this allows selecting a subset of chunk aligned rows and/or columns from a 2-dimensional array. E.g.: -```python +```python exec="true" session="arrays" source="material-block" data = np.arange(100).reshape(10, 10) z = zarr.create_array(store='data/example-17.zarr', shape=data.shape, dtype=data.dtype, chunks=(3, 3)) z[:] = data @@ -538,90 +484,64 @@ z[:] = data Retrieve items by specifying their block coordinates: -```python -z.get_block_selection(1) -# array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], -# [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], -# [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.get_block_selection(1)) ``` Equivalent slicing: -```python -z[3:6] -# array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], -# [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], -# [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z[3:6]) ``` For convenience, the block selection functionality is also available via the `blocks` property, e.g.: -```python -z.blocks[1] -# array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], -# [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], -# [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.blocks[1]) ``` Block index arrays may be multidimensional to index multidimensional arrays. For example: -```python -z.blocks[0, 1:3] -# array([[ 3, 4, 5, 6, 7, 8], -# [13, 14, 15, 16, 17, 18], -# [23, 24, 25, 26, 27, 28]]) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.blocks[0, 1:3]) ``` Data can also be modified. Let's start by a simple 2D array: -```python +```python exec="true" session="arrays" source="material-block" z = zarr.create_array(store='data/example-18.zarr', shape=(6, 6), dtype=int, chunks=(2, 2)) ``` Set data for a selection of items: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" z.set_block_selection((1, 0), 1) -z[...] -# array([[0, 0, 0, 0, 0, 0], -# [0, 0, 0, 0, 0, 0], -# [1, 1, 0, 0, 0, 0], -# [1, 1, 0, 0, 0, 0], -# [0, 0, 0, 0, 0, 0], -# [0, 0, 0, 0, 0, 0]]) +print(z[...]) ``` For convenience, this functionality is also available via the `blocks` property. E.g.: -```python +```python exec="true" session="arrays" source="material-block" result="ansi" z.blocks[:, 2] = 7 -z[...] -# array([[0, 0, 0, 0, 7, 7], -# [0, 0, 0, 0, 7, 7], -# [1, 1, 0, 0, 7, 7], -# [1, 1, 0, 0, 7, 7], -# [0, 0, 0, 0, 7, 7], -# [0, 0, 0, 0, 7, 7]]) +print(z[...]) ``` Any combination of integer and slice can be used for block indexing: -```python -z.blocks[2, 1:3] -# array([[0, 0, 7, 7], -# [0, 0, 7, 7]]) +```python exec="true" session="arrays" source="material-block" result="ansi" +print(z.blocks[2, 1:3]) +``` +```python exec="true" session="arrays" source="material-block" result="ansi" root = zarr.create_group('data/example-19.zarr') foo = root.create_array(name='foo', shape=(1000, 100), chunks=(10, 10), dtype='float32') bar = root.create_array(name='foo/bar', shape=(100,), dtype='int32') foo[:, :] = np.random.random((1000, 100)) bar[:] = np.arange(100) -root.tree() -# / -# └── foo (1000, 100) float32 +print(root.tree()) ``` ## Sharding @@ -640,27 +560,10 @@ Users need to configure the chunk and shard shapes accordingly. Sharded arrays can be created by providing the `shards` parameter to `zarr.create_array`. -```python +```python exec="true" session="arrays" source="material-block" result="ansi" a = zarr.create_array('data/example-20.zarr', shape=(10000, 10000), shards=(1000, 1000), chunks=(100, 100), dtype='uint8') a[:] = (np.arange(10000 * 10000) % 256).astype('uint8').reshape(10000, 10000) -a.info_complete() -# Type : Array -# Zarr format : 3 -# Data type : UInt8() -# Fill value : 0 -# Shape : (10000, 10000) -# Shard shape : (1000, 1000) -# Chunk shape : (100, 100) -# Order : C -# Read-only : False -# Store type : LocalStore -# Filters : () -# Serializer : BytesCodec(endian=None) -# Compressors : (ZstdCodec(level=0, checksum=False),) -# No. bytes : 100000000 (95.4M) -# No. bytes stored : 3981473 -# Storage ratio : 25.1 -# Shards Initialized : 100 +print(a.info_complete()) ``` In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used. From 1c7793aba4326d611d49f4294b859cc5164ef0c0 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 10 Aug 2025 15:06:00 -0400 Subject: [PATCH 20/64] Auto-doc for some modules --- docs/api/abc/codec.md | 15 +---------- docs/api/abc/metadata.md | 2 +- docs/api/abc/store.md | 10 +------ docs/api/api_async.md | 29 +------------------- docs/api/api_sync.md | 29 +------------------- docs/api/codecs.md | 14 +--------- docs/api/deprecated/convenience.md | 12 +-------- docs/api/deprecated/creation.md | 13 +-------- docs/api/errors.md | 7 +---- docs/api/registry.md | 9 +------ docs/api/storage.md | 10 +------ docs/api/testing.md | 43 ++++-------------------------- 12 files changed, 16 insertions(+), 177 deletions(-) diff --git a/docs/api/abc/codec.md b/docs/api/abc/codec.md index 88e35d01bf..d4eaecabe9 100644 --- a/docs/api/abc/codec.md +++ b/docs/api/abc/codec.md @@ -2,17 +2,4 @@ title: codec --- -## Attributes - -::: zarr.abc.codec.CodecInput -::: zarr.abc.codec.CodecOutput - -## Classes - -::: zarr.abc.codec.ArrayArrayCodec -::: zarr.abc.codec.ArrayBytesCodec -::: zarr.abc.codec.ArrayBytesCodecPartialDecodeMixin -::: zarr.abc.codec.ArrayBytesCodecPartialEncodeMixin -::: zarr.abc.codec.BaseCodec -::: zarr.abc.codec.BytesBytesCodec -::: zarr.abc.codec.CodecPipeline +::: zarr.abc.codec diff --git a/docs/api/abc/metadata.md b/docs/api/abc/metadata.md index 00c670d3e4..7cc1e00662 100644 --- a/docs/api/abc/metadata.md +++ b/docs/api/abc/metadata.md @@ -2,4 +2,4 @@ title: metadata --- -::: zarr.abc.metadata.Metadata +::: zarr.abc.metadata diff --git a/docs/api/abc/store.md b/docs/api/abc/store.md index bb8ceebb91..f711448541 100644 --- a/docs/api/abc/store.md +++ b/docs/api/abc/store.md @@ -2,12 +2,4 @@ title: store --- -## Classes - -::: zarr.abc.store.ByteGetter -::: zarr.abc.store.ByteSetter -::: zarr.abc.store.Store - -## Functions - -::: zarr.abc.store.set_or_delete +::: zarr.abc.store diff --git a/docs/api/api_async.md b/docs/api/api_async.md index 1380469cff..f5df894134 100644 --- a/docs/api/api_async.md +++ b/docs/api/api_async.md @@ -2,31 +2,4 @@ title: asynchronous --- -::: zarr.api.asynchronous.array -::: zarr.api.asynchronous.consolidate_metadata -::: zarr.api.asynchronous.copy -::: zarr.api.asynchronous.copy_all -::: zarr.api.asynchronous.copy_store -::: zarr.api.asynchronous.create -::: zarr.api.asynchronous.create_array -::: zarr.api.asynchronous.create_hierarchy -::: zarr.api.asynchronous.empty -::: zarr.api.asynchronous.empty_like -::: zarr.api.asynchronous.from_array -::: zarr.api.asynchronous.full -::: zarr.api.asynchronous.full_like -::: zarr.api.asynchronous.group -::: zarr.api.asynchronous.load -::: zarr.api.asynchronous.ones -::: zarr.api.asynchronous.ones_like -::: zarr.api.asynchronous.open -::: zarr.api.asynchronous.open_array -::: zarr.api.asynchronous.open_consolidated -::: zarr.api.asynchronous.open_group -::: zarr.api.asynchronous.open_like -::: zarr.api.asynchronous.save -::: zarr.api.asynchronous.save_array -::: zarr.api.asynchronous.save_group -::: zarr.api.asynchronous.tree -::: zarr.api.asynchronous.zeros -::: zarr.api.asynchronous.zeros_like +::: zarr.api.asynchronous \ No newline at end of file diff --git a/docs/api/api_sync.md b/docs/api/api_sync.md index 730be3449e..83ff118db5 100644 --- a/docs/api/api_sync.md +++ b/docs/api/api_sync.md @@ -2,31 +2,4 @@ title: synchronous --- -::: zarr.api.synchronous.array -::: zarr.api.synchronous.consolidate_metadata -::: zarr.api.synchronous.copy -::: zarr.api.synchronous.copy_all -::: zarr.api.synchronous.copy_store -::: zarr.api.synchronous.create -::: zarr.api.synchronous.create_array -::: zarr.api.synchronous.create_hierarchy -::: zarr.api.synchronous.empty -::: zarr.api.synchronous.empty_like -::: zarr.api.synchronous.from_array -::: zarr.api.synchronous.full -::: zarr.api.synchronous.full_like -::: zarr.api.synchronous.group -::: zarr.api.synchronous.load -::: zarr.api.synchronous.ones -::: zarr.api.synchronous.ones_like -::: zarr.api.synchronous.open -::: zarr.api.synchronous.open_array -::: zarr.api.synchronous.open_consolidated -::: zarr.api.synchronous.open_group -::: zarr.api.synchronous.open_like -::: zarr.api.synchronous.save -::: zarr.api.synchronous.save_array -::: zarr.api.synchronous.save_group -::: zarr.api.synchronous.tree -::: zarr.api.synchronous.zeros -::: zarr.api.synchronous.zeros_like +::: zarr.api.synchronous \ No newline at end of file diff --git a/docs/api/codecs.md b/docs/api/codecs.md index f2793875f4..5cf66b304e 100644 --- a/docs/api/codecs.md +++ b/docs/api/codecs.md @@ -2,16 +2,4 @@ title: codecs --- -::: zarr.codecs.BloscCname -::: zarr.codecs.BloscCodec -::: zarr.codecs.BloscShuffle -::: zarr.codecs.BytesCodec -::: zarr.codecs.Crc32cCodec -::: zarr.codecs.Endian -::: zarr.codecs.GzipCodec -::: zarr.codecs.ShardingCodec -::: zarr.codecs.ShardingCodecIndexLocation -::: zarr.codecs.TransposeCodec -::: zarr.codecs.VLenBytesCodec -::: zarr.codecs.VLenUTF8Codec -::: zarr.codecs.ZstdCodec +::: zarr.codecs \ No newline at end of file diff --git a/docs/api/deprecated/convenience.md b/docs/api/deprecated/convenience.md index f7629b7ac6..307f78a99c 100644 --- a/docs/api/deprecated/convenience.md +++ b/docs/api/deprecated/convenience.md @@ -1,14 +1,4 @@ !!! warning "Deprecated" This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. -::: zarr.convenience.consolidate_metadata -::: zarr.convenience.copy -::: zarr.convenience.copy_all -::: zarr.convenience.copy_store -::: zarr.convenience.load -::: zarr.convenience.open -::: zarr.convenience.open_consolidated -::: zarr.convenience.save -::: zarr.convenience.save_array -::: zarr.convenience.save_group -::: zarr.convenience.tree +::: zarr.convenience \ No newline at end of file diff --git a/docs/api/deprecated/creation.md b/docs/api/deprecated/creation.md index 1f87c7cf2e..d5873e93fd 100644 --- a/docs/api/deprecated/creation.md +++ b/docs/api/deprecated/creation.md @@ -1,15 +1,4 @@ !!! warning "Deprecated" This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. -::: zarr.creation.array -::: zarr.creation.create -::: zarr.creation.empty -::: zarr.creation.empty_like -::: zarr.creation.full -::: zarr.creation.full_like -::: zarr.creation.ones -::: zarr.creation.ones_like -::: zarr.creation.open_array -::: zarr.creation.open_like -::: zarr.creation.zeros -::: zarr.creation.zeros_like +::: zarr.creation \ No newline at end of file diff --git a/docs/api/errors.md b/docs/api/errors.md index dfdd74c07a..2ba2213071 100644 --- a/docs/api/errors.md +++ b/docs/api/errors.md @@ -2,9 +2,4 @@ title: errors --- -::: zarr.errors.BaseZarrError -::: zarr.errors.ContainsArrayAndGroupError -::: zarr.errors.ContainsArrayError -::: zarr.errors.ContainsGroupError -::: zarr.errors.MetadataValidationError -::: zarr.errors.NodeTypeValidationError +::: zarr.errors \ No newline at end of file diff --git a/docs/api/registry.md b/docs/api/registry.md index 97262f8920..d2c3769596 100644 --- a/docs/api/registry.md +++ b/docs/api/registry.md @@ -2,11 +2,4 @@ title: registry --- -::: zarr.registry.get_buffer_class -::: zarr.registry.get_codec_class -::: zarr.registry.get_ndbuffer_class -::: zarr.registry.get_pipeline_class -::: zarr.registry.register_buffer -::: zarr.registry.register_codec -::: zarr.registry.register_ndbuffer -::: zarr.registry.register_pipeline +::: zarr.registry \ No newline at end of file diff --git a/docs/api/storage.md b/docs/api/storage.md index ac8e94158b..204ef64a9d 100644 --- a/docs/api/storage.md +++ b/docs/api/storage.md @@ -8,12 +8,4 @@ title: storage ## Classes -::: zarr.storage.FsspecStore -::: zarr.storage.GpuMemoryStore -::: zarr.storage.LocalStore -::: zarr.storage.LoggingStore -::: zarr.storage.MemoryStore -::: zarr.storage.ObjectStore -::: zarr.storage.StorePath -::: zarr.storage.WrapperStore -::: zarr.storage.ZipStore \ No newline at end of file +::: zarr.storage \ No newline at end of file diff --git a/docs/api/testing.md b/docs/api/testing.md index 257d0ff59b..1412950ee3 100644 --- a/docs/api/testing.md +++ b/docs/api/testing.md @@ -4,53 +4,20 @@ title: testing ## Buffer -::: zarr.testing.buffer.NDBufferUsingTestNDArrayLike -::: zarr.testing.buffer.StoreExpectingTestBuffer -::: zarr.testing.buffer.TestBuffer +::: zarr.testing.buffer ## Stateful -::: zarr.testing.stateful.MAX_BINARY_SIZE -::: zarr.testing.stateful.SyncStoreWrapper -::: zarr.testing.stateful.ZarrHierarchyStateMachine -::: zarr.testing.stateful.ZarrStoreStateMachine -::: zarr.testing.stateful.split_prefix_name +::: zarr.testing.stateful ## Store -::: zarr.testing.store.StoreTests +::: zarr.testing.store ## Strategies -::: zarr.testing.strategies.array_names -::: zarr.testing.strategies.array_shapes -::: zarr.testing.strategies.attrs -::: zarr.testing.strategies.compressors -::: zarr.testing.strategies.node_names -::: zarr.testing.strategies.short_node_names -::: zarr.testing.strategies.stores -::: zarr.testing.strategies.zarr_formats -::: zarr.testing.strategies.zarr_key_chars -::: zarr.testing.strategies.array_metadata -::: zarr.testing.strategies.arrays -::: zarr.testing.strategies.basic_indices -::: zarr.testing.strategies.chunk_shapes -::: zarr.testing.strategies.clear_store -::: zarr.testing.strategies.dimension_names -::: zarr.testing.strategies.end_slices -::: zarr.testing.strategies.is_negative_slice -::: zarr.testing.strategies.key_ranges -::: zarr.testing.strategies.keys -::: zarr.testing.strategies.np_array_and_chunks -::: zarr.testing.strategies.numpy_arrays -::: zarr.testing.strategies.orthogonal_indices -::: zarr.testing.strategies.paths -::: zarr.testing.strategies.safe_unicode_for_dtype -::: zarr.testing.strategies.shard_shapes -::: zarr.testing.strategies.simple_arrays -::: zarr.testing.strategies.v2_dtypes -::: zarr.testing.strategies.v3_dtypes +::: zarr.testing.strategies ## Utils -::: zarr.testing.utils.assert_bytes_equal +::: zarr.testing.utils From 852e6c6c5ef5826b0c7d10f0f4166832e26d7058 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 10 Aug 2025 15:32:04 -0400 Subject: [PATCH 21/64] Cleanup quickstart --- docs/quick-start.md | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index da903af3e4..0c419f5056 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -9,11 +9,13 @@ To get started, you can create a simple Zarr array: import shutil shutil.rmtree('data', ignore_errors=True) import numpy as np +from pprint import pprint +import io np.random.seed(0) ``` -```python exec="true" session="quickstart" source="material-block" result="ansi" +```python exec="true" session="quickstart" source="above" result="ansi" import zarr import numpy as np @@ -39,7 +41,7 @@ written to a `LocalStore` in the `data/example-1.zarr` directory. Zarr supports data compression and filters. For example, to use Blosc compression: -```python exec="true" session="quickstart" source="material-block" result="ansi" +```python exec="true" session="quickstart" source="above" result="code" # Create a 2D Zarr array with Blosc compression z = zarr.create_array( @@ -66,7 +68,7 @@ This compresses the data using the Blosc codec with shuffle enabled for better c Zarr allows you to create hierarchical groups, similar to directories: -```python exec="true" session="quickstart" source="material-block" result="ansi" +```python exec="true" session="quickstart" source="above" result="ansi" # Create nested groups and add arrays root = zarr.group("data/example-3.zarr") @@ -91,7 +93,7 @@ This creates a group with two datasets: `foo` and `bar`. Zarr provides tools for creating a collection of arrays and groups with a single function call. Suppose we want to copy existing groups and arrays into a new storage backend: -```python exec="true" session="quickstart" source="material-block" result="ansi" +```python exec="true" session="quickstart" source="above" result="html" # Create nested groups and add arrays root = zarr.group("data/example-4.zarr", attributes={'name': 'root'}) @@ -100,7 +102,12 @@ bar = root.create_array( name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" ) nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} -print(nodes) +# Report nodes +output = io.StringIO() +pprint(nodes, stream=output, width=60, depth=3) +result = output.getvalue() +print(result) +# Create new hierarchy from nodes new_nodes = dict(zarr.create_hierarchy(store=zarr.storage.MemoryStore(), nodes=nodes)) new_root = new_nodes[''] assert new_root.attrs == root.attrs @@ -128,7 +135,7 @@ z[:, :] = np.random.random((100, 100)) A single-file store can also be created using the `zarr.storage.ZipStore`: -```python exec="true" session="quickstart" source="material-block" +```python exec="true" session="quickstart" source="above" # Store the array in a ZIP file store = zarr.storage.ZipStore("data/example-5.zip", mode="w") @@ -149,7 +156,7 @@ store.close() To open an existing array from a ZIP file: -```python exec="true" session="quickstart" source="material-block" result="ansi" +```python exec="true" session="quickstart" source="above" result="code" # Open the ZipStore in read-only mode store = zarr.storage.ZipStore("data/example-5.zip", read_only=True) From f73f76fe4c78b5222f02903849331c4ff201ff91 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 10 Aug 2025 15:42:46 -0400 Subject: [PATCH 22/64] Filter warnings in quickstart --- docs/quick-start.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/quick-start.md b/docs/quick-start.md index 0c419f5056..03bef10086 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -11,7 +11,13 @@ shutil.rmtree('data', ignore_errors=True) import numpy as np from pprint import pprint import io +import warnings +warnings.filterwarnings( + "ignore", + message="Numcodecs codecs are not in the Zarr version 3 specification*", + category=UserWarning +) np.random.seed(0) ``` From 41aa4a71f538400e6d17c727d7d3ff6e581f3123 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:08:17 -0400 Subject: [PATCH 23/64] Add links to quickstart --- docs/quick-start.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 03bef10086..42ac95d169 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -119,13 +119,13 @@ new_root = new_nodes[''] assert new_root.attrs == root.attrs ``` -Note that `zarr.create_hierarchy` will only initialize arrays and groups -- copying array data must +Note that [`zarr.create_hierarchy`][] will only initialize arrays and groups -- copying array data must be done in a separate step. ### Persistent Storage Zarr supports persistent storage to disk or cloud-compatible backends. While examples above -utilized a `zarr.storage.LocalStore`, a number of other storage options are available. +utilized a [`zarr.storage.LocalStore`][], a number of other storage options are available. Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage using external libraries like [s3fs](https://s3fs.readthedocs.io) or @@ -139,7 +139,7 @@ z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chu z[:, :] = np.random.random((100, 100)) ``` -A single-file store can also be created using the `zarr.storage.ZipStore`: +A single-file store can also be created using the [`zarr.storage.ZipStore`][]: ```python exec="true" session="quickstart" source="above" @@ -173,4 +173,4 @@ z = zarr.open_array(store, mode='r') print(z[:]) ``` -Read more about Zarr's storage options in the [User Guide](user-guide/storage.md). +Read more about Zarr's storage options in the [User Guide](user-guide/index.md). From a653782c4e5d243b868c030ab3201238fa5779a1 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:08:52 -0400 Subject: [PATCH 24/64] Add index pages --- docs/api/deprecated/convenience.md | 3 -- docs/api/deprecated/creation.md | 5 +-- docs/api/index.md | 64 ++++++++++++++++++++++++++++++ docs/user-guide/index.md | 41 +++++++++++++++++++ mkdocs.yml | 12 +++--- src/zarr/convenience.py | 6 +-- src/zarr/creation.py | 5 +-- 7 files changed, 116 insertions(+), 20 deletions(-) create mode 100644 docs/api/index.md create mode 100644 docs/user-guide/index.md diff --git a/docs/api/deprecated/convenience.md b/docs/api/deprecated/convenience.md index 307f78a99c..91bcb15f71 100644 --- a/docs/api/deprecated/convenience.md +++ b/docs/api/deprecated/convenience.md @@ -1,4 +1 @@ -!!! warning "Deprecated" - This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. - ::: zarr.convenience \ No newline at end of file diff --git a/docs/api/deprecated/creation.md b/docs/api/deprecated/creation.md index d5873e93fd..5d18a06a4a 100644 --- a/docs/api/deprecated/creation.md +++ b/docs/api/deprecated/creation.md @@ -1,4 +1 @@ -!!! warning "Deprecated" - This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. - -::: zarr.creation \ No newline at end of file +::: zarr.creation diff --git a/docs/api/index.md b/docs/api/index.md new file mode 100644 index 0000000000..8e6be1058e --- /dev/null +++ b/docs/api/index.md @@ -0,0 +1,64 @@ +# API Reference + +Complete reference documentation for the Zarr-Python API. + +## Core API + +### Essential Classes and Functions + +- **[Array](array.md)** - The main Zarr array class for N-dimensional data +- **[Group](group.md)** - Hierarchical organization of arrays and subgroups +- **[Create](create.md)** - Functions for creating new arrays and groups +- **[Open](open.md)** - Opening existing Zarr stores and arrays + +### Data Operations + +- **[Load](load.md)** - Loading data from Zarr stores +- **[Save](save.md)** - Saving data to Zarr format +- **[Convenience](convenience.md)** - High-level convenience functions + +### Data Types and Configuration + +- **[Data Types](dtype.md)** - Supported NumPy data types and type handling +- **[Configuration](config.md)** - Runtime configuration and settings + +## Storage and Compression + +- **[Codecs](codecs.md)** - Compression and filtering codecs +- **[Storage](storage.md)** - Storage backend implementations and interfaces +- **[Registry](registry.md)** - Codec and storage backend registry + +## API Variants + +Zarr-Python provides both synchronous and asynchronous APIs: + +- **[Async API](api_async.md)** - Asynchronous operations for concurrent access +- **[Sync API](api_sync.md)** - Synchronous operations for simple usage + +## Abstract Base Classes + +The ABC module defines interfaces for extending Zarr: + +- **[Codec ABC](abc/codec.md)** - Interface for custom compression codecs +- **[Metadata ABC](abc/metadata.md)** - Interface for metadata handling +- **[Store ABC](abc/store.md)** - Interface for custom storage backends + +## Utilities + +- **[Errors](errors.md)** - Exception classes and error handling +- **[Testing](testing.md)** - Utilities for testing Zarr-based code + + +## Migration and Compatibility + +- **[Deprecated Functions](deprecated/convenience.md)** - Legacy convenience functions +- **[Deprecated Creation](deprecated/creation.md)** - Legacy array creation functions + +These deprecated modules are maintained for backward compatibility but should be avoided in new code. + +## Getting Help + +- Check the [User Guide](../user-guide/index.md) for tutorials and examples +- Browse function signatures and docstrings in the API reference +- Report issues on [GitHub](https://github.com/zarr-developers/zarr-python) +- Join discussions on the [Zarr community forum](https://github.com/zarr-developers/community) diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md new file mode 100644 index 0000000000..14808457bd --- /dev/null +++ b/docs/user-guide/index.md @@ -0,0 +1,41 @@ +# User Guide + +Welcome to the user guide, where you can learn more about using Zarr-Python! + +## Getting Started + +New to Zarr-Python? Start here: + +- **[Installation](installation.md)** - Install Zarr-Python +- **[Quick-start](../quick-start.md)** - Quick overview of core functionality + +## Core Concepts + +Learn the essential building blocks: + +- **[Arrays](arrays.md)** - Learn the fundamentals of working with arrays +- **[Groups](groups.md)** - Organize your data with groups +- **[Attributes](attributes.md)** - Configure metadata to your data structures +- **[Storage](storage.md)** - Learn how data is stored and accessed + +## Configuration & Setup + +Customize your experience: + +- **[Runtime Configuration](config.md)** - Configure Zarr-Python for your needs +- **[V3 Migration](v3_migration.md)** - Upgrading from version 2 to version 3 + +## Advanced Topics + +Take your skills to the next level: + +- **[Data Types](data_types.md)** - Learn about supported and extensible data types +- **[Performance](performance.md)** - Optimize for speed and efficiency +- **[GPU](gpu.md)** - Leverage GPU acceleration +- **[Extending](extending.md)** - Extend functionality with custom code +- **[Consolidated Metadata](consolidated_metadata.md)** - Advanced metadata management + +## Need Help? + +- Browse the [API Reference](../api/index.md) for detailed function documentation +- Report issues on [GitHub](https://github.com/zarr-developers/zarr-python/issues?q=sort%3Aupdated-desc+is%3Aissue+is%3Aopen) diff --git a/mkdocs.yml b/mkdocs.yml index f5b99ab942..89dcdc20c9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -11,6 +11,7 @@ nav: - "index.md" - "quick-start.md" - User Guide: + - user-guide/index.md - user-guide/installation.md - user-guide/arrays.md - user-guide/groups.md @@ -24,6 +25,7 @@ nav: - user-guide/gpu.md - user-guide/consolidated_metadata.md - API Reference: + - api/index.md - api/array.md - api/group.md - api/create.md @@ -38,16 +40,15 @@ nav: - api/registry.md - api/storage.md - api/testing.md - - API: - - api/api_async.md - - api/api_sync.md + - Async API: api/api_async.md + - Sync API: api/api_sync.md - ABC: - api/abc/codec.md - api/abc/metadata.md - api/abc/store.md - deprecated: - - api/deprecated/convenience.md - - api/deprecated/creation.md + - Convenience sub-module: api/deprecated/convenience.md + - Creation sub-module: api/deprecated/creation.md - release-notes.md - contributing.md watch: @@ -141,7 +142,6 @@ plugins: 'getting-started.md': 'quick-start.md' 'roadmap.md': 'https://zarr.readthedocs.io/en/v3.0.8/developers/roadmap.html' 'installation.md': 'user-guide/installation.md' - 'api.md': 'api/open.md' 'release.md': 'release-notes.md' # https://github.com/developmentseed/titiler/blob/50934c929cca2fa8d3c408d239015f8da429c6a8/docs/mkdocs.yml#L115-L140 diff --git a/src/zarr/convenience.py b/src/zarr/convenience.py index 3ca4ffcb4b..391ffc5186 100644 --- a/src/zarr/convenience.py +++ b/src/zarr/convenience.py @@ -1,10 +1,8 @@ """ Convenience helpers. -.. warning:: - - This sub-module is deprecated. All functions here are defined - in the top level zarr namespace instead. +!!! warning "Deprecated" + This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. """ import warnings diff --git a/src/zarr/creation.py b/src/zarr/creation.py index 622406ed75..605b5af5de 100644 --- a/src/zarr/creation.py +++ b/src/zarr/creation.py @@ -1,10 +1,9 @@ """ Helpers for creating arrays. -.. warning:: +!!! warning "Deprecated" + This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. - This sub-module is deprecated. All functions here are defined - in the top level zarr namespace instead. """ import warnings From e16c374eb72d509695bd1ae63457c87699359d3f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:10:44 -0400 Subject: [PATCH 25/64] Remove $ from console --- docs/user-guide/installation.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md index ab8deebb0b..13f87baaab 100644 --- a/docs/user-guide/installation.md +++ b/docs/user-guide/installation.md @@ -16,7 +16,7 @@ Required dependencies include: Zarr is available on [PyPI](https://pypi.org/project/zarr/). Install it using `pip`: ```console -$ pip install zarr +pip install zarr ``` There are a number of optional dependency groups you can install for extra functionality. @@ -32,7 +32,7 @@ Additional optional dependencies include `rich`, `universal_pathlib`. These must Zarr is also published to [conda-forge](https://conda-forge.org). Install it using `conda`: ```console -$ conda install -c conda-forge zarr +conda install -c conda-forge zarr ``` Conda does not support optional dependencies, so you will have to manually install any packages From e88640c6686dd46f459492b439df55703a5f61e9 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:15:44 -0400 Subject: [PATCH 26/64] Update code block formatting for arrays --- docs/user-guide/arrays.md | 116 +++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/docs/user-guide/arrays.md b/docs/user-guide/arrays.md index ecd0b77619..19e961e954 100644 --- a/docs/user-guide/arrays.md +++ b/docs/user-guide/arrays.md @@ -12,7 +12,7 @@ import numpy as np np.random.seed(0) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" import zarr store = zarr.storage.MemoryStore() z = zarr.create_array(store=store, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') @@ -36,13 +36,13 @@ Zarr arrays support a similar interface to [NumPy](https://numpy.org/doc/stable/ arrays for reading and writing data. For example, the entire array can be filled with a scalar value: -```python exec="true" session="arrays" source="material-block" +```python exec="true" session="arrays" source="above" z[:] = 42 ``` Regions of the array can also be written to, e.g.: -```python exec="true" session="arrays" source="material-block" +```python exec="true" session="arrays" source="above" import numpy as np z[0, :] = np.arange(10000) @@ -52,23 +52,23 @@ z[:, 0] = np.arange(10000) The contents of the array can be retrieved by slicing, which will load the requested region into memory as a NumPy array, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[0, 0]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[-1, -1]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[0, :]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[:, 0]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[:]) ``` @@ -82,7 +82,7 @@ main memory. Zarr arrays can also be stored on a file system, enabling persistence of data between sessions. To do this, we can change the store argument to point to a filesystem path: -```python exec="true" session="arrays" source="material-block" +```python exec="true" session="arrays" source="above" z1 = zarr.create_array(store='data/example-1.zarr', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') ``` @@ -96,7 +96,7 @@ flushed to disk, and files are automatically closed whenever an array is modifie Persistent arrays support the same interface for reading and writing data, e.g.: -```python +```python exec="true" session="arrays" source="above" z1[:] = 42 z1[0, :] = np.arange(10000) z1[:, 0] = np.arange(10000) @@ -104,7 +104,7 @@ z1[:, 0] = np.arange(10000) Check that the data have been written and can be read again: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z2 = zarr.open_array('data/example-1.zarr', mode='r') print(np.all(z1[:] == z2[:])) ``` @@ -114,7 +114,7 @@ disk then load back into memory later, the functions `zarr.save` and `zarr.load` may be useful. E.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" a = np.arange(10) zarr.save('data/example-2.zarr', a) print(zarr.load('data/example-2.zarr')) @@ -128,7 +128,7 @@ storage, see the Storage Guide for more details. A Zarr array can be resized, which means that any of its dimensions can be increased or decreased in length. For example: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z = zarr.create_array(store='data/example-3.zarr', shape=(10000, 10000), dtype='int32',chunks=(1000, 1000)) z[:] = 42 print(f"Original shape: {z.shape}") @@ -143,7 +143,7 @@ new array shape will be deleted from the underlying store. `zarr.Array.append` is provided as a convenience function, which can be used to append data to any axis. E.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" a = np.arange(10000000, dtype='int32').reshape(10000, 1000) z = zarr.create_array(store='data/example-4.zarr', shape=a.shape, dtype=a.dtype, chunks=(1000, 100)) z[:] = a @@ -163,7 +163,7 @@ compressor libraries including LZ4, Zlib, BZ2 and LZMA. Different compressors can be provided via the `compressors` keyword argument accepted by all array creation functions. For example: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=3, shuffle=zarr.codecs.BloscShuffle.bitshuffle) data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) @@ -179,14 +179,14 @@ When using a compressor, it can be useful to get some diagnostics on the compression ratio. Zarr arrays provide the `zarr.Array.info` property which can be used to print useful diagnostics, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.info) ``` The `zarr.Array.info_complete` method inspects the underlying store and prints additional diagnostics, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.info_complete()) ``` @@ -201,7 +201,7 @@ compressor. In addition to Blosc and Zstandard, other compression libraries can also be used. For example, here is an array using Gzip compression, level 1: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1)) z[:] = data @@ -211,7 +211,7 @@ print(f"Compressors: {z.compressors}") Here is an example using LZMA from [NumCodecs](https://numcodecs.readthedocs.io/) with a custom filter pipeline including LZMA's built-in delta filter: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" import lzma from numcodecs.zarr3 import LZMA @@ -224,7 +224,7 @@ print(f"Compressors: {z.compressors}") To disable compression, set `compressors=None` when creating an array, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z = zarr.create_array( store='data/example-8.zarr', shape=(100000000,), @@ -250,7 +250,7 @@ mechanism for configuring filters outside of the primary compressor. Here is an example using a delta filter with the Blosc compressor: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" from numcodecs.zarr3 import Delta filters = [Delta(dtype='int32')] @@ -279,7 +279,7 @@ see the documentation for the `zarr.Array` class. Items from a Zarr array can be extracted by providing an integer array of coordinates. E.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(10) ** 2 z = zarr.create_array(store='data/example-10.zarr', shape=data.shape, dtype=data.dtype) z[:] = data @@ -289,7 +289,7 @@ print(z.get_coordinate_selection([2, 5])) Coordinate arrays can also be used to update data, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z.set_coordinate_selection([2, 5], [-1, -2]) print(z[:]) ``` @@ -297,14 +297,14 @@ print(z[:]) For multidimensional arrays, coordinates must be provided for each dimension, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-11.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z.get_coordinate_selection(([0, 2], [1, 3]))) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z.set_coordinate_selection(([0, 2], [1, 3]), [-1, -2]) print(z[:]) ``` @@ -312,23 +312,23 @@ print(z[:]) For convenience, coordinate indexing is also available via the `vindex` property, as well as the square bracket operator, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.vindex[[0, 2], [1, 3]]) z.vindex[[0, 2], [1, 3]] = [-3, -4] ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[:]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[[0, 2], [1, 3]]) ``` When the indexing arrays have different shapes, they are broadcast together. That is, the following two calls are equivalent: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[1, [1, 3]]) print(z[[1, 1], [1, 3]]) ``` @@ -337,42 +337,42 @@ print(z[[1, 1], [1, 3]]) Items can also be extracted by providing a Boolean mask. E.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(10) ** 2 z = zarr.create_array(store='data/example-12.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z[:]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" sel = np.zeros_like(z, dtype=bool) sel[2] = True sel[5] = True print(z.get_mask_selection(sel)) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z.set_mask_selection(sel, [-1, -2]) print(z[:]) ``` Here's a multidimensional example: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-13.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z[:]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" sel = np.zeros_like(z, dtype=bool) sel[0, 1] = True sel[2, 3] = True print(z.get_mask_selection(sel)) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z.set_mask_selection(sel, [-1, -2]) print(z[:]) ``` @@ -380,11 +380,11 @@ print(z[:]) For convenience, mask indexing is also available via the `vindex` property, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.vindex[sel]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z.vindex[sel] = [-3, -4] print(z[:]) @@ -401,50 +401,50 @@ selections to be made along each dimension of an array independently. For example, this allows selecting a subset of rows and/or columns from a 2-dimensional array. E.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-14.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z[:]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.get_orthogonal_selection(([0, 2], slice(None)))) # select first and third rows ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.get_orthogonal_selection((slice(None), [1, 3]))) # select second and fourth columns) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.get_orthogonal_selection(([0, 2], [1, 3]))) # select rows [0, 2] and columns [1, 4] ``` Data can also be modified, e.g.: -```python exec="true" session="arrays" source="material-block" +```python exec="true" session="arrays" source="above" z.set_orthogonal_selection(([0, 2], [1, 3]), [[-1, -2], [-3, -4]]) ``` For convenience, the orthogonal indexing functionality is also available via the `oindex` property, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-15.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z.oindex[[0, 2], :]) # select first and third rows ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.oindex[:, [1, 3]]) # select second and fourth columns ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.oindex[[0, 2], [1, 3]]) # select rows [0, 2] and columns [1, 4] ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z.oindex[[0, 2], [1, 3]] = [[-1, -2], [-3, -4]] print(z[:]) ``` @@ -455,7 +455,7 @@ be used for orthogonal indexing. If the index contains at most one iterable, and otherwise contains only slices and integers, orthogonal indexing is also available directly on the array: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-16.zarr', shape=data.shape, dtype=data.dtype) z[:] = data @@ -468,7 +468,7 @@ Zarr also support block indexing, which allows selections of whole chunks based logical indices along each dimension of an array. For example, this allows selecting a subset of chunk aligned rows and/or columns from a 2-dimensional array. E.g.: -```python exec="true" session="arrays" source="material-block" +```python exec="true" session="arrays" source="above" data = np.arange(100).reshape(10, 10) z = zarr.create_array(store='data/example-17.zarr', shape=data.shape, dtype=data.dtype, chunks=(3, 3)) z[:] = data @@ -476,39 +476,39 @@ z[:] = data Retrieve items by specifying their block coordinates: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.get_block_selection(1)) ``` Equivalent slicing: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z[3:6]) ``` For convenience, the block selection functionality is also available via the `blocks` property, e.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.blocks[1]) ``` Block index arrays may be multidimensional to index multidimensional arrays. For example: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.blocks[0, 1:3]) ``` Data can also be modified. Let's start by a simple 2D array: -```python exec="true" session="arrays" source="material-block" +```python exec="true" session="arrays" source="above" z = zarr.create_array(store='data/example-18.zarr', shape=(6, 6), dtype=int, chunks=(2, 2)) ``` Set data for a selection of items: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z.set_block_selection((1, 0), 1) print(z[...]) ``` @@ -516,18 +516,18 @@ print(z[...]) For convenience, this functionality is also available via the `blocks` property. E.g.: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" z.blocks[:, 2] = 7 print(z[...]) ``` Any combination of integer and slice can be used for block indexing: -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" print(z.blocks[2, 1:3]) ``` -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" root = zarr.create_group('data/example-19.zarr') foo = root.create_array(name='foo', shape=(1000, 100), chunks=(10, 10), dtype='float32') bar = root.create_array(name='foo/bar', shape=(100,), dtype='int32') @@ -552,7 +552,7 @@ Users need to configure the chunk and shard shapes accordingly. Sharded arrays can be created by providing the `shards` parameter to `zarr.create_array`. -```python exec="true" session="arrays" source="material-block" result="ansi" +```python exec="true" session="arrays" source="above" result="ansi" a = zarr.create_array('data/example-20.zarr', shape=(10000, 10000), shards=(1000, 1000), chunks=(100, 100), dtype='uint8') a[:] = (np.arange(10000 * 10000) % 256).astype('uint8').reshape(10000, 10000) print(a.info_complete()) From 179e516827219e1f36b1afead4bd7e6314c1104f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:38:25 -0400 Subject: [PATCH 27/64] Add cross-references to arrays --- docs/user-guide/arrays.md | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/docs/user-guide/arrays.md b/docs/user-guide/arrays.md index 19e961e954..536455d509 100644 --- a/docs/user-guide/arrays.md +++ b/docs/user-guide/arrays.md @@ -22,13 +22,13 @@ print(z) The code above creates a 2-dimensional array of 32-bit integers with 10000 rows and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000 columns (and so there will be 100 chunks in total). The data is written to a -`zarr.storage.MemoryStore` (e.g. an in-memory dict). See +[`zarr.storage.MemoryStore`][] (e.g. an in-memory dict). See [Persistent arrays](#persistent-arrays) for details on storing arrays in other stores, and see [Data types](data_types.md) for an in-depth look at the data types supported by Zarr. -For a complete list of array creation routines see the `zarr` -module documentation. +See the [creation API documentation][/api/create.md] for more detailed information about +creating arrays. ## Reading and writing data @@ -88,7 +88,7 @@ z1 = zarr.create_array(store='data/example-1.zarr', shape=(10000, 10000), chunks The array above will store its configuration metadata and all compressed chunk data in a directory called `'data/example-1.zarr'` relative to the current working -directory. The `zarr.create_array` function provides a convenient way +directory. The [`zarr.create_array`][] function provides a convenient way to create a new persistent array or continue working with an existing array. Note, there is no need to close an array: data are automatically flushed to disk, and files are automatically closed whenever an array is modified. @@ -111,7 +111,7 @@ print(np.all(z1[:] == z2[:])) If you are just looking for a fast and convenient way to save NumPy arrays to disk then load back into memory later, the functions -`zarr.save` and `zarr.load` may be +[`zarr.save`][] and [`zarr.load`][] may be useful. E.g.: ```python exec="true" session="arrays" source="above" result="ansi" @@ -121,7 +121,7 @@ print(zarr.load('data/example-2.zarr')) ``` Please note that there are a number of other options for persistent array -storage, see the Storage Guide for more details. +storage, see the [Storage Guide](storage.md) for more details. ## Resizing and appending @@ -140,7 +140,7 @@ Note that when an array is resized, the underlying data are not rearranged in any way. If one or more dimensions are shrunk, any chunks falling outside the new array shape will be deleted from the underlying store. -`zarr.Array.append` is provided as a convenience function, which can be +[`zarr.Array.append`][] is provided as a convenience function, which can be used to append data to any axis. E.g.: ```python exec="true" session="arrays" source="above" result="ansi" @@ -176,14 +176,14 @@ algorithm (compression level 3) internally within Blosc, and with the bit-shuffle filter applied. When using a compressor, it can be useful to get some diagnostics on the -compression ratio. Zarr arrays provide the `zarr.Array.info` property +compression ratio. Zarr arrays provide the [`zarr.Array.info`][] property which can be used to print useful diagnostics, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" print(z.info) ``` -The `zarr.Array.info_complete` method inspects the underlying store and +The [`zarr.Array.info_complete`][] method inspects the underlying store and prints additional diagnostics, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" @@ -191,8 +191,8 @@ print(z.info_complete()) ``` !!! note - `zarr.Array.info_complete` will inspect the underlying store and may - be slow for large arrays. Use `zarr.Array.info` if detailed storage + [`zarr.Array.info_complete`][] will inspect the underlying store and may + be slow for large arrays. Use [`zarr.Array.info`][] if detailed storage statistics are not needed. If you don't specify a compressor, by default Zarr uses the Zstandard @@ -272,7 +272,7 @@ Note that although this functionality is similar to some of the advanced indexing capabilities available on NumPy arrays and on h5py datasets, **the Zarr API for advanced indexing is different from both NumPy and h5py**, so please read this section carefully. For a complete description of the indexing API, -see the documentation for the `zarr.Array` class. +see the documentation for the [`zarr.Array`][] class. ### Indexing with coordinate arrays @@ -301,6 +301,10 @@ e.g.: data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-11.zarr', shape=data.shape, dtype=data.dtype) z[:] = data +print(z[:]) +``` + +```python exec="true" session="arrays" source="above" result="ansi" print(z.get_coordinate_selection(([0, 2], [1, 3]))) ``` @@ -550,7 +554,7 @@ performance and to avoid concurrency issues. That means that shards are the units of writing and chunks are the units of reading. Users need to configure the chunk and shard shapes accordingly. -Sharded arrays can be created by providing the `shards` parameter to `zarr.create_array`. +Sharded arrays can be created by providing the `shards` parameter to [`zarr.create_array`][]. ```python exec="true" session="arrays" source="above" result="ansi" a = zarr.create_array('data/example-20.zarr', shape=(10000, 10000), shards=(1000, 1000), chunks=(100, 100), dtype='uint8') @@ -559,7 +563,7 @@ print(a.info_complete()) ``` In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used. -This means that 10*10 chunks are stored in each shard, and there are 10*10 shards in total. +This means that `10*10` chunks are stored in each shard, and there are `10*10` shards in total. Without the `shards` argument, there would be 10,000 chunks stored as individual files. ## Missing features in 3.0 From 9877928fcd8c7c65717e927d27107690bbe319c3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 12:13:23 -0400 Subject: [PATCH 28/64] Executable blocks and cross-references in groups user guide --- docs/user-guide/groups.md | 148 +++++++++++++++----------------------- 1 file changed, 57 insertions(+), 91 deletions(-) diff --git a/docs/user-guide/groups.md b/docs/user-guide/groups.md index cf8675274d..5ec5554675 100644 --- a/docs/user-guide/groups.md +++ b/docs/user-guide/groups.md @@ -4,99 +4,100 @@ Zarr supports hierarchical organization of arrays via groups. As with arrays, groups can be stored in memory, on disk, or via other storage systems that support a similar interface. -To create a group, use the `zarr.group` function: +To create a group, use the [`zarr.group`][] function: -```python +```python exec="true" session="groups" source="above" result="ansi" import zarr store = zarr.storage.MemoryStore() root = zarr.create_group(store=store) -root -# +print(root) ``` Groups have a similar API to the Group class from [h5py](https://www.h5py.org/). For example, groups can contain other groups: -```python +```python exec="true" session="groups" source="above" foo = root.create_group('foo') bar = foo.create_group('bar') ``` Groups can also contain arrays, e.g.: -```python +```python exec="true" session="groups" source="above" result="ansi" z1 = bar.create_array(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') -z1 -# +print(z1) ``` Members of a group can be accessed via the suffix notation, e.g.: -```python -root['foo'] -# +```python exec="true" session="groups" source="above" result="ansi" +print(root['foo']) ``` The '/' character can be used to access multiple levels of the hierarchy in one call, e.g.: -```python -root['foo/bar'] -# -root['foo/bar/baz'] -# +```python exec="true" session="groups" source="above" result="ansi" +print(root['foo/bar']) ``` -The `zarr.Group.tree` method can be used to print a tree +```python exec="true" session="groups" source="above" result="ansi" +print(root['foo/bar/baz']) +``` + +The [`zarr.Group.tree`][] method can be used to print a tree representation of the hierarchy, e.g.: -```python -root.tree() -# / -# └── foo -# └── bar -# └── baz (10000, 10000) int32 +```python exec="true" session="groups" source="above" result="ansi" +print(root.tree()) ``` -The `zarr.open_group` function provides a convenient way to create or +The [`zarr.open_group`][] function provides a convenient way to create or re-open a group stored in a directory on the file-system, with sub-groups stored in sub-directories, e.g.: -```python +```python exec="true" session="groups" source="above" result="ansi" root = zarr.open_group('data/group.zarr', mode='w') -root -# +print(root) +``` +```python exec="true" session="groups" source="above" result="ansi" z = root.create_array(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') -z -# +print(z) ``` -For more information on groups see the `zarr.Group` API docs. +For more information on groups see the [`zarr.Group` API docs](../api/group.md). ## Batch Group Creation -You can also create multiple groups concurrently with a single function call. `zarr.create_hierarchy` takes -a `zarr.storage.Store` instance and a dict of `key : metadata` pairs, parses that dict, and +You can also create multiple groups concurrently with a single function call. [`zarr.create_hierarchy`][] takes +a [`zarr Storage instance`](../api/storage.md) instance and a dict of `key : metadata` pairs, parses that dict, and writes metadata documents to storage: -```python +```python exec="true" session="groups" source="above" result="ansi" from zarr import create_hierarchy from zarr.core.group import GroupMetadata from zarr.storage import LocalStore + +from pprint import pprint +import io + node_spec = {'a/b/c': GroupMetadata()} nodes_created = dict(create_hierarchy(store=LocalStore(root='data'), nodes=node_spec)) -print(sorted(nodes_created.items(), key=lambda kv: len(kv[0]))) -# [('', ), ('a', ), ('a/b', ), ('a/b/c', )] +# Report nodes (pprint is used for cleaner rendering in the docs) +output = io.StringIO() +pprint(nodes_created, stream=output, width=60, depth=3) +result = output.getvalue() +print(result) ``` Note that we only specified a single group named `a/b/c`, but 4 groups were created. These additional groups were created to ensure that the desired node `a/b/c` is connected to the root group `''` by a sequence -of intermediate groups. `zarr.create_hierarchy` normalizes the `nodes` keyword argument to +of intermediate groups. [`zarr.create_hierarchy`][] normalizes the `nodes` keyword argument to ensure that the resulting hierarchy is complete, i.e. all groups or arrays are connected to the root of the hierarchy via intermediate groups. -Because `zarr.create_hierarchy` concurrently creates metadata documents, it's more efficient -than repeated calls to `create_group` or `create_array`, provided you can statically define +Because [`zarr.create_hierarchy`][] concurrently creates metadata documents, it's more efficient +than repeated calls to [`create_group`][zarr.create_group] or [`create_array`][zarr.create_array], provided you can statically define the metadata for the groups and arrays you want to create. ## Array and group diagnostics @@ -104,7 +105,7 @@ the metadata for the groups and arrays you want to create. Diagnostic information about arrays and groups is available via the `info` property. E.g.: -```python +```python exec="true" session="groups" source="above" result="ansi" store = zarr.storage.MemoryStore() root = zarr.group(store=store) foo = root.create_group('foo') @@ -112,61 +113,26 @@ bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64') bar[:] = 42 baz = foo.create_array(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='float32') baz[:] = 4.2 -root.info -# Name : -# Type : Group -# Zarr format : 3 -# Read-only : False -# Store type : MemoryStore -foo.info -# Name : foo -# Type : Group -# Zarr format : 3 -# Read-only : False -# Store type : MemoryStore -bar.info_complete() -# Type : Array -# Zarr format : 3 -# Data type : Int64(endianness='little') -# Fill value : 0 -# Shape : (1000000,) -# Chunk shape : (100000,) -# Order : C -# Read-only : False -# Store type : MemoryStore -# Filters : () -# Serializer : BytesCodec(endian=) -# Compressors : (ZstdCodec(level=0, checksum=False),) -# No. bytes : 8000000 (7.6M) -# No. bytes stored : 1614 (1.6K) -# Storage ratio : 4956.6 -# Chunks Initialized : 10 -baz.info -# Type : Array -# Zarr format : 3 -# Data type : Float32(endianness='little') -# Fill value : 0.0 -# Shape : (1000, 1000) -# Chunk shape : (100, 100) -# Order : C -# Read-only : False -# Store type : MemoryStore -# Filters : () -# Serializer : BytesCodec(endian=) -# Compressors : (ZstdCodec(level=0, checksum=False),) -# No. bytes : 4000000 (3.8M) +print(root.info) +``` + +```python exec="true" session="groups" source="above" result="ansi" +print(foo.info) +``` + +```python exec="true" session="groups" source="above" result="ansi" +print(bar.info_complete()) +``` + +```python exec="true" session="groups" source="above" result="ansi" +print(baz.info) ``` -Groups also have the `zarr.Group.tree` method, e.g.: +Groups also have the [`zarr.Group.tree`][] method, e.g.: -```python -root.tree() -# / -# └── foo -# ├── bar (1000000,) int64 -# └── baz (1000, 1000) float32 +```python exec="true" session="groups" source="above" result="ansi" +print(root.tree()) ``` !!! note - `zarr.Group.tree` requires the optional [rich](https://rich.readthedocs.io/en/stable/) - dependency. It can be installed with the `[tree]` extra. + [`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra. \ No newline at end of file From f38a6736eb8b1f96dfde48b250c9d2ec8df4c5f8 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 12:13:34 -0400 Subject: [PATCH 29/64] Fix links --- docs/api/storage.md | 2 +- docs/user-guide/arrays.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/storage.md b/docs/api/storage.md index 204ef64a9d..33580d1d8a 100644 --- a/docs/api/storage.md +++ b/docs/api/storage.md @@ -8,4 +8,4 @@ title: storage ## Classes -::: zarr.storage \ No newline at end of file +::: zarr.storage diff --git a/docs/user-guide/arrays.md b/docs/user-guide/arrays.md index 536455d509..643f0221ce 100644 --- a/docs/user-guide/arrays.md +++ b/docs/user-guide/arrays.md @@ -27,7 +27,7 @@ columns (and so there will be 100 chunks in total). The data is written to a and see [Data types](data_types.md) for an in-depth look at the data types supported by Zarr. -See the [creation API documentation][/api/create.md] for more detailed information about +See the [creation API documentation](../api/create.md) for more detailed information about creating arrays. ## Reading and writing data From 7cde0eeab0f9e339253682336d0711f199751c17 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 12:16:42 -0400 Subject: [PATCH 30/64] Executable code blocks in attrs user-guide --- docs/user-guide/attributes.md | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/docs/user-guide/attributes.md b/docs/user-guide/attributes.md index 216720c75f..44d2f9fa87 100644 --- a/docs/user-guide/attributes.md +++ b/docs/user-guide/attributes.md @@ -3,7 +3,7 @@ Zarr arrays and groups support custom key/value attributes, which can be useful for storing application-specific metadata. For example: -```python +```python exec="true" session="arrays" source="above" result="ansi" import zarr store = zarr.storage.MemoryStore() root = zarr.create_group(store=store) @@ -11,18 +11,26 @@ root.attrs['foo'] = 'bar' z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32') z.attrs['baz'] = 42 z.attrs['qux'] = [1, 4, 7, 12] -sorted(root.attrs) -# ['foo'] -'foo' in root.attrs -# True -root.attrs['foo'] -# 'bar' -sorted(z.attrs) -# ['baz', 'qux'] -z.attrs['baz'] -# 42 -z.attrs['qux'] -# [1, 4, 7, 12] +print(sorted(root.attrs)) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print('foo' in root.attrs) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(root.attrs['foo']) +``` +```python exec="true" session="arrays" source="above" result="ansi" +print(sorted(z.attrs)) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.attrs['baz']) +``` + +```python exec="true" session="arrays" source="above" result="ansi" +print(z.attrs['qux']) ``` Internally Zarr uses JSON to store array attributes, so attribute values must be From e005f29ac075636850f886fccf28b0d779def32e Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 16:11:07 -0400 Subject: [PATCH 31/64] Add external inventories --- mkdocs.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mkdocs.yml b/mkdocs.yml index 89dcdc20c9..2c8c4b3737 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -131,6 +131,18 @@ plugins: inventories: - https://docs.python.org/3/objects.inv + - https://docs.xarray.dev/en/stable/objects.inv + - https://numpy.org/doc/stable/objects.inv + - https://numcodecs.readthedocs.io/en/stable/objects.inv + - https://developmentseed.org/obstore/latest/objects.inv + - https://filesystem-spec.readthedocs.io/en/latest/objects.inv + - https://requests.readthedocs.io/en/latest/objects.inv + - https://docs.aiohttp.org/en/stable/objects.inv + - https://s3fs.readthedocs.io/en/latest/objects.inv + - https://docs.h5py.org/en/stable/objects.inv + - https://icechunk.io/en/stable/objects.inv + - https://lithops-cloud.github.io/docs/objects.inv + - https://docs.dask.org/en/stable/objects.inv - redirects: redirect_maps: 'spec/index.md': 'https://zarr-specs.readthedocs.io' From 901f04e5b20197d6cf2272db491749a26879e1c3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 16:17:19 -0400 Subject: [PATCH 32/64] Add cross-references to storage --- docs/user-guide/storage.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/user-guide/storage.md b/docs/user-guide/storage.md index eb247cec99..10c6b9846a 100644 --- a/docs/user-guide/storage.md +++ b/docs/user-guide/storage.md @@ -3,7 +3,7 @@ Zarr-Python supports multiple storage backends, including: local file systems, Zip files, remote stores via [fsspec](https://filesystem-spec.readthedocs.io) (S3, HTTP, etc.), and in-memory stores. In Zarr-Python 3, stores must implement the abstract store API from -`zarr.abc.store.Store`. +[`zarr.abc.store.Store`][]. !!! note Unlike Zarr-Python 2 where the store interface was built around a generic `MutableMapping` @@ -12,7 +12,7 @@ Zarr-Python 3, stores must implement the abstract store API from ## Implicit Store Creation In most cases, it is not required to create a `Store` object explicitly. Passing a string -to Zarr's top level API will result in the store being created automatically.: +to Zarr's top level API will result in the store being created automatically: ```python import zarr @@ -38,13 +38,13 @@ zarr.create_group(store=data) ## Explicit Store Creation In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four -built-in store: `zarr.storage.LocalStore`, `zarr.storage.FsspecStore`, -`zarr.storage.ZipStore`, `zarr.storage.MemoryStore`, and `zarr.storage.ObjectStore`. +built-in store: [`zarr.storage.LocalStore`][], [`zarr.storage.FsspecStore`][], +[`zarr.storage.ZipStore`][], [`zarr.storage.MemoryStore`][], and [`zarr.storage.ObjectStore`][]. ### Local Store -The `zarr.storage.LocalStore` stores data in a nested set of directories on a local -filesystem.: +The [`zarr.storage.LocalStore`][] stores data in a nested set of directories on a local +filesystem: ```python store = zarr.storage.LocalStore('data/foo/bar', read_only=True) @@ -54,8 +54,8 @@ zarr.open_group(store=store, mode='r') ### Zip Store -The `zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single -Zip file. The [Zip Store specification](https://github.com/zarr-developers/zarr-specs/pull/311) is currently in draft form.: +The [`zarr.storage.ZipStore`][] stores the contents of a Zarr hierarchy in a single +Zip file. The [Zip Store specification](https://github.com/zarr-developers/zarr-specs/pull/311) is currently in draft form: ```python store = zarr.storage.ZipStore('data.zip', mode='w') @@ -65,12 +65,12 @@ zarr.create_array(store=store, shape=(2,), dtype='float64') ### Remote Store -The `zarr.storage.FsspecStore` stores the contents of a Zarr hierarchy in following the same -logical layout as the `LocalStore`, except the store is assumed to be on a remote storage system +The [`zarr.storage.FsspecStore`][] stores the contents of a Zarr hierarchy in following the same +logical layout as the [`LocalStore`][zarr.storage.LocalStore], except the store is assumed to be on a remote storage system such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The -`zarr.storage.FsspecStore` is backed by [fsspec](https://filesystem-spec.readthedocs.io) and can support any backend +[`zarr.storage.FsspecStore`][] is backed by [fsspec](https://filesystem-spec.readthedocs.io) and can support any backend that implements the [AbstractFileSystem](https://filesystem-spec.readthedocs.io/en/stable/api.html#fsspec.spec.AbstractFileSystem) -API. `storage_options` can be used to configure the fsspec backend.: +API. `storage_options` can be used to configure the fsspec backend: ```python store = zarr.storage.FsspecStore.from_url( @@ -97,8 +97,8 @@ store = zarr.storage.FsspecStore(fs) ### Memory Store -The `zarr.storage.MemoryStore` a in-memory store that allows for serialization of -Zarr data (metadata and chunks) to a dictionary.: +The [`zarr.storage.MemoryStore`][] a in-memory store that allows for serialization of +Zarr data (metadata and chunks) to a dictionary: ```python data = {} @@ -110,8 +110,8 @@ zarr.create_array(store=store, shape=(2,), dtype='float64') ### Object Store -`zarr.storage.ObjectStore` stores the contents of the Zarr hierarchy using any ObjectStore -[storage implementation](https://developmentseed.org/obstore/latest/api/store/), including AWS S3 (`obstore.store.S3Store`), Google Cloud Storage (`obstore.store.GCSStore`), and Azure Blob Storage (`obstore.store.AzureStore`). This store is backed by [obstore](https://developmentseed.org/obstore/latest/), which +[`zarr.storage.ObjectStore`][] stores the contents of the Zarr hierarchy using any ObjectStore +[storage implementation](https://developmentseed.org/obstore/latest/api/store/), including AWS S3 ([`obstore.store.S3Store`][]), Google Cloud Storage ([`obstore.store.GCSStore`][]), and Azure Blob Storage ([`obstore.store.AzureStore`][]). This store is backed by [obstore](https://developmentseed.org/obstore/latest/), which builds on the production quality Rust library [object_store](https://docs.rs/object_store/latest/object_store/). ```python @@ -144,10 +144,10 @@ group.info ``` !!! warning - The `zarr.storage.ObjectStore` class is experimental. + The [`zarr.storage.ObjectStore`][] class is experimental. ## Developing custom stores -Zarr-Python `zarr.abc.store.Store` API is meant to be extended. The Store Abstract Base +Zarr-Python [`zarr.abc.store.Store`][] API is meant to be extended. The Store Abstract Base Class includes all of the methods needed to be a fully operational store in Zarr Python. -Zarr also provides a test harness for custom stores: `zarr.testing.store.StoreTests`. +Zarr also provides a test harness for custom stores: [`zarr.testing.store.StoreTests`][]. From b8737056f19314cea4b86807439328bdac688a92 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 16:24:55 -0400 Subject: [PATCH 33/64] Executable storage guide --- docs/user-guide/storage.md | 64 ++++++++++++++++++-------------------- pyproject.toml | 2 +- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/docs/user-guide/storage.md b/docs/user-guide/storage.md index 10c6b9846a..86dac188a5 100644 --- a/docs/user-guide/storage.md +++ b/docs/user-guide/storage.md @@ -14,25 +14,29 @@ Zarr-Python 3, stores must implement the abstract store API from In most cases, it is not required to create a `Store` object explicitly. Passing a string to Zarr's top level API will result in the store being created automatically: -```python +```python exec="true" session="storage" source="above" result="ansi" import zarr # Implicitly create a writable LocalStore -zarr.create_group(store='data/foo/bar') -# +group = zarr.create_group(store='data/foo/bar') +print(group) +``` +```python exec="true" session="storage" source="above" result="ansi" # Implicitly create a read-only FsspecStore -zarr.open_group( +group = zarr.open_group( store='s3://noaa-nwm-retro-v2-zarr-pds', mode='r', storage_options={'anon': True} ) -# > +print(group) +``` +```python exec="true" session="storage" source="above" result="ansi" # Implicitly creates a MemoryStore data = {} -zarr.create_group(store=data) -# +group = zarr.create_group(store=data) +print(group) ``` ## Explicit Store Creation @@ -46,10 +50,10 @@ built-in store: [`zarr.storage.LocalStore`][], [`zarr.storage.FsspecStore`][], The [`zarr.storage.LocalStore`][] stores data in a nested set of directories on a local filesystem: -```python +```python exec="true" session="storage" source="above" result="ansi" store = zarr.storage.LocalStore('data/foo/bar', read_only=True) -zarr.open_group(store=store, mode='r') -# +group = zarr.open_group(store=store, mode='r') +print(group) ``` ### Zip Store @@ -57,10 +61,10 @@ zarr.open_group(store=store, mode='r') The [`zarr.storage.ZipStore`][] stores the contents of a Zarr hierarchy in a single Zip file. The [Zip Store specification](https://github.com/zarr-developers/zarr-specs/pull/311) is currently in draft form: -```python +```python exec="true" session="storage" source="above" result="ansi" store = zarr.storage.ZipStore('data.zip', mode='w') -zarr.create_array(store=store, shape=(2,), dtype='float64') -# +array = zarr.create_array(store=store, shape=(2,), dtype='float64') +print(array) ``` ### Remote Store @@ -72,26 +76,27 @@ such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Stor that implements the [AbstractFileSystem](https://filesystem-spec.readthedocs.io/en/stable/api.html#fsspec.spec.AbstractFileSystem) API. `storage_options` can be used to configure the fsspec backend: -```python +```python exec="true" session="storage" source="above" result="ansi" store = zarr.storage.FsspecStore.from_url( 's3://noaa-nwm-retro-v2-zarr-pds', read_only=True, storage_options={'anon': True} ) -zarr.open_group(store=store, mode='r') -# > +group = zarr.open_group(store=store, mode='r') +print(group) ``` The type of filesystem (e.g. S3, https, etc..) is inferred from the scheme of the url (e.g. s3 for "**s3**://noaa-nwm-retro-v2-zarr-pds"). In case a specific filesystem is needed, one can explicitly create it. For example to create a S3 filesystem: -```python +```python exec="true" session="storage" source="above" result="ansi" import fsspec fs = fsspec.filesystem( 's3', anon=True, asynchronous=True, client_kwargs={'endpoint_url': "https://noaa-nwm-retro-v2-zarr-pds.s3.amazonaws.com"} ) store = zarr.storage.FsspecStore(fs) +print(store) ``` @@ -100,12 +105,11 @@ store = zarr.storage.FsspecStore(fs) The [`zarr.storage.MemoryStore`][] a in-memory store that allows for serialization of Zarr data (metadata and chunks) to a dictionary: -```python +```python exec="true" session="storage" source="above" result="ansi" data = {} store = zarr.storage.MemoryStore(data) -# TODO: replace with create_array after #2463 -zarr.create_array(store=store, shape=(2,), dtype='float64') -# +array = zarr.create_array(store=store, shape=(2,), dtype='float64') +print(array) ``` ### Object Store @@ -114,33 +118,25 @@ zarr.create_array(store=store, shape=(2,), dtype='float64') [storage implementation](https://developmentseed.org/obstore/latest/api/store/), including AWS S3 ([`obstore.store.S3Store`][]), Google Cloud Storage ([`obstore.store.GCSStore`][]), and Azure Blob Storage ([`obstore.store.AzureStore`][]). This store is backed by [obstore](https://developmentseed.org/obstore/latest/), which builds on the production quality Rust library [object_store](https://docs.rs/object_store/latest/object_store/). -```python +```python exec="true" session="storage" source="above" result="ansi" from zarr.storage import ObjectStore from obstore.store import MemoryStore store = ObjectStore(MemoryStore()) -zarr.create_array(store=store, shape=(2,), dtype='float64') -# +array = zarr.create_array(store=store, shape=(2,), dtype='float64') +print(array) ``` Here's an example of using ObjectStore for accessing remote data: -```python +```python exec="true" session="storage" source="above" result="ansi" from zarr.storage import ObjectStore from obstore.store import S3Store s3_store = S3Store('noaa-nwm-retro-v2-zarr-pds', skip_signature=True, region="us-west-2") store = zarr.storage.ObjectStore(store=s3_store, read_only=True) group = zarr.open_group(store=store, mode='r') -group.info -# Name : -# Type : Group -# Zarr format : 2 -# Read-only : True -# Store type : ObjectStore -# No. members : 12 -# No. arrays : 12 -# No. groups : 0 +print(group.info) ``` !!! warning diff --git a/pyproject.toml b/pyproject.toml index 0f3bfe5d0c..81dc6addf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -252,7 +252,7 @@ fix = "rm -r data/; pytest docs/user-guide --doctest-glob='*.rst' --accept" list-env = "pip list" [tool.hatch.envs.docs] -features = ['docs'] +features = ['docs', 'remote'] [tool.hatch.envs.docs.scripts] serve = "mkdocs serve" From 128a205b1ad31b5ea2a0b4ec44962b2e0d6cc9bf Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 16:53:38 -0400 Subject: [PATCH 34/64] Executable config code blocks --- docs/user-guide/config.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/user-guide/config.md b/docs/user-guide/config.md index d9b156c3e2..b0a0969c4f 100644 --- a/docs/user-guide/config.md +++ b/docs/user-guide/config.md @@ -1,19 +1,21 @@ # Runtime configuration -`zarr.config` is responsible for managing the configuration of zarr and +[`zarr.config`][] is responsible for managing the configuration of zarr and is based on the [donfig](https://github.com/pytroll/donfig) Python library. Configuration values can be set using code like the following: -```python +```python exec="true" session="config" source="above" result="ansi" + import zarr +print(zarr.config.get('array.order')) +``` + +```python exec="true" session="config" source="above" result="ansi" zarr.config.set({'array.order': 'F'}) -# -# revert this change so it doesn't impact the rest of the docs -zarr.config.set({'array.order': 'C'}) -# +print(zarr.config.get('array.order')) ``` Alternatively, configuration values can be set using environment variables, e.g. From d8e43788f8f568b8859dee8337262b0c3325b87f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 17:28:50 -0400 Subject: [PATCH 35/64] Update V3 migration links --- docs/user-guide/v3_migration.md | 62 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/docs/user-guide/v3_migration.md b/docs/user-guide/v3_migration.md index d5bfda7041..c3a7ddbafe 100644 --- a/docs/user-guide/v3_migration.md +++ b/docs/user-guide/v3_migration.md @@ -20,9 +20,9 @@ so we can improve this guide. The goals described above necessitated some breaking changes to the API (hence the major version update), but where possible we have maintained backwards compatibility -in the most widely used parts of the API. This in the `zarr.Array` and -`zarr.Group` classes and the "top-level API" (e.g. `zarr.open_array` and -`zarr.open_group`). +in the most widely used parts of the API. This in the [`zarr.Array`][] and +[`zarr.Group`][] classes and the "top-level API" (e.g. [`zarr.open_array`][] and +[`zarr.open_group`][]). ## Getting ready for 3.0 @@ -92,8 +92,8 @@ The following sections provide details on breaking changes in Zarr-Python 3. ### The Array class 1. Disallow direct construction - the signature for initializing the `Array` class has changed - significantly. Please use `zarr.create_array` or `zarr.open_array` instead of - directly constructing the `zarr.Array` class. + significantly. Please use [`zarr.create_array`][] or [`zarr.open_array`][] instead of + directly constructing the [`zarr.Array`][] class. 2. Defaulting to `zarr_format=3` - newly created arrays will use the version 3 of the Zarr specification. To continue using version 2, set `zarr_format=2` when creating arrays @@ -101,13 +101,13 @@ The following sections provide details on breaking changes in Zarr-Python 3. ### The Group class -1. Disallow direct construction - use `zarr.open_group` or `zarr.create_group` +1. Disallow direct construction - use [`zarr.open_group`][] or [`zarr.create_group`][] instead of directly constructing the `zarr.Group` class. 2. Most of the h5py compatibility methods are deprecated and will issue warnings if used. The following functions are drop in replacements that have the same signature and functionality: - - Use `zarr.Group.create_array` in place of `zarr.Group.create_dataset` - - Use `zarr.Group.require_array` in place of `zarr.Group.require_dataset` + - Use [`zarr.Group.create_array`][] in place of `zarr.Group.create_dataset` + - Use [`zarr.Group.require_array`][] in place of `zarr.Group.require_dataset` 3. Disallow "." syntax for getting group members. To get a member of a group named `foo`, use `group["foo"]` in place of `group.foo`. @@ -129,14 +129,13 @@ Several store implementations have moved from the top-level module to `zarr.stor Common replacements: | v2 Import | v3 Import | -|=========================|====================================| -| `zarr.MemoryStore` | `zarr.storage.MemoryStore` | -| `zarr.DirectoryStore` | `zarr.storage.LocalStore` | -| `zarr.TempStore` | Use `tempfile.TemporaryDirectory` | -| | with `LocalStore` | +|-------------------------|------------------------------------| +| `zarr.MemoryStore` | [`zarr.storage.MemoryStore`][] | +| `zarr.DirectoryStore` | [`zarr.storage.LocalStore`][] | +| `zarr.TempStore` | Use [`tempfile.TemporaryDirectory`][] with [`LocalStore`][zarr.storage.LocalStore] | 1. Replaced the `MutableMapping` base class in favor of a custom abstract base class - (`zarr.abc.store.Store`). + ([`zarr.abc.store.Store`][]). 2. Switched to an asynchronous interface for all store methods that result in IO. This change ensures that all store methods are non-blocking and are as performant as possible. @@ -146,7 +145,7 @@ Zarr-Python 3. See issue #1274 for more details on the removal of these stores. - `N5Store` - see https://github.com/zarr-developers/n5py for an alternative interface to N5 formatted data. -- `ABSStore` - use the `zarr.storage.FsspecStore` instead along with fsspec's +- `ABSStore` - use the [`zarr.storage.FsspecStore`][] instead along with fsspec's [adlfs backend](https://github.com/fsspec/adlfs). The following stores have been removed altogether. Users who need these stores will have to @@ -160,7 +159,7 @@ implement their own version in zarr-python v3. At present, the latter five stores in this list do not have an equivalent in Zarr-Python 3. If you are interested in developing a custom store that targets these backends, see -developing custom stores or open an +[developing custom stores](storage.md/#developing-custom-stores) or open an [issue](https://github.com/zarr-developers/zarr-python/issues) to discuss your use case. ### Codecs @@ -187,7 +186,7 @@ When installing using `pip`: ### Miscellaneous - The keyword argument `zarr_version` available in most creation functions in `zarr` - (e.g. `zarr.create`, `zarr.open`, `zarr.group`, `zarr.array`) has + (e.g. [`zarr.create`][], [`zarr.open`][], [`zarr.group`][], [`zarr.array`][]) has been deprecated in favor of `zarr_format`. ## 🚧 Work in Progress 🚧 @@ -200,10 +199,10 @@ of Zarr-Python, please open (or comment on) a - The following functions / methods have not been ported to Zarr-Python 3 yet: - * `zarr.copy` (issue #2407) - * `zarr.copy_all` (issue #2407) - * `zarr.copy_store` (issue #2407) - * `zarr.Group.move` (issue #2108) + * `zarr.copy` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) + * `zarr.copy_all` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) + * `zarr.copy_store` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) + * `zarr.Group.move` ([issue #2108](https://github.com/zarr-developers/zarr-python/issues/2108)) - The following features (corresponding to function arguments to functions in `zarr`) have not been ported to Zarr-Python 3 yet. Using these features @@ -211,20 +210,19 @@ of Zarr-Python, please open (or comment on) a * `cache_attrs` * `cache_metadata` - * `chunk_store` (issue #2495) + * `chunk_store` ([issue #2495](https://github.com/zarr-developers/zarr-python/issues/2495)) * `meta_array` - * `object_codec` (issue #2617) - * `synchronizer` (issue #1596) + * `object_codec` ([issue #2617](https://github.com/zarr-developers/zarr-python/issues/2617)) + * `synchronizer` ([issue #1596](https://github.com/zarr-developers/zarr-python/issues/1596)) * `dimension_separator` - The following features that were supported by Zarr-Python 2 have not been ported to Zarr-Python 3 yet: - * Structured arrays / dtypes (issue #2134) - * Fixed-length string dtypes (issue #2347) - * Datetime and timedelta dtypes (issue #2616) - * Object dtypes (issue #2617) - * Ragged arrays (issue #2618) - * Groups and Arrays do not implement `__enter__` and `__exit__` protocols (issue #2619) - * Big Endian dtypes (issue #2324) - * Default filters for object dtypes for Zarr format 2 arrays (issue #2627) + * Structured arrays / dtypes ([issue #2134](https://github.com/zarr-developers/zarr-python/issues/2134)) + * Fixed-length string dtypes ([issue #2347](https://github.com/zarr-developers/zarr-python/issues/2347)) + * Datetime and timedelta dtypes ([issue #2616](https://github.com/zarr-developers/zarr-python/issues/2616)) + * Object dtypes ([issue #2616](https://github.com/zarr-developers/zarr-python/issues/2616)) + * Ragged arrays ([issue #2618](https://github.com/zarr-developers/zarr-python/issues/2618)) + * Groups and Arrays do not implement `__enter__` and `__exit__` protocols ([issue #2619](https://github.com/zarr-developers/zarr-python/issues/2619)) + * Default filters for object dtypes for Zarr format 2 arrays ([issue #2627](https://github.com/zarr-developers/zarr-python/issues/2627)) From cb2a205f6a7b7b6e26c39cbcd70724b5b06bd818 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Aug 2025 17:40:43 -0400 Subject: [PATCH 36/64] Executable code blocks in data types --- docs/user-guide/data_types.md | 180 +++++++++++++++++----------------- 1 file changed, 88 insertions(+), 92 deletions(-) diff --git a/docs/user-guide/data_types.md b/docs/user-guide/data_types.md index 6245fb5b3d..82b7c89809 100644 --- a/docs/user-guide/data_types.md +++ b/docs/user-guide/data_types.md @@ -45,19 +45,20 @@ Version 2 of the Zarr format defined its data types relative to and added a few non-NumPy data types as well. With one exception ([structured data types](#structured-data-type)), the Zarr V2 JSON identifier for a data type is just the NumPy `str` attribute of that data type: -```python ->>> import zarr ->>> import numpy as np ->>> import json ->>> ->>> store = {} ->>> np_dtype = np.dtype('int64') ->>> np_dtype.str -'>> z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) ->>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] ->>> dtype_meta -'>> store = {} ->>> np_dtype = np.dtype([('field_a', '>i2'), ('field_b', [('subfield_c', '>f4'), ('subfield_d', 'i2')])]) ->>> np_dtype.str -'|V8' ->>> z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) ->>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] ->>> dtype_meta -[['field_a', '>i2'], ['field_b', [['subfield_c', '>f4'], ['subfield_d', 'i2'), ('field_b', [('subfield_c', '>f4'), ('subfield_d', 'i2')])]) +print(np_dtype.str) +``` + +```python exec="true" session="data_types" source="above" result="ansi" +z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) +dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] +print(dtype_meta) ``` ### Object Data Type @@ -169,7 +171,7 @@ We do this with an abstract Zarr data type class: [ZDType][zarr.dtype.ZDType] which provides Zarr V2 and Zarr V3 compatibility routines for "native" data types. In this context, a "native" data type is a Python class, typically defined in another library, that -models an array's data type. For example, `np.dtypes.UInt8DType` is a native data type defined in NumPy. +models an array's data type. For example, [`numpy.dtypes.UInt8DType`][] is a native data type defined in NumPy. Zarr Python wraps the NumPy `uint8` with a [ZDType][zarr.dtype.ZDType] instance called [UInt8][zarr.dtype.UInt8]. @@ -233,31 +235,31 @@ This section will demonstrates the basic usage of Zarr data types. Create a `ZDType` from a native data type: -```python ->>> from zarr.core.dtype import Int8 ->>> import numpy as np ->>> int8 = Int8.from_native_dtype(np.dtype('int8')) +```python exec="true" session="data_types" source="above" +from zarr.core.dtype import Int8 +import numpy as np +int8 = Int8.from_native_dtype(np.dtype('int8')) ``` Convert back to a native data type: -```python ->>> native_dtype = int8.to_native_dtype() ->>> assert native_dtype == np.dtype('int8') +```python exec="true" session="data_types" source="above" +native_dtype = int8.to_native_dtype() +assert native_dtype == np.dtype('int8') ``` Get the default scalar value for the data type: -```python ->>> default_value = int8.default_scalar() ->>> assert default_value == np.int8(0) +```python exec="true" session="data_types" source="above" +default_value = int8.default_scalar() +assert default_value == np.int8(0) ``` Serialize to JSON for Zarr V2: -```python ->>> json_v2 = int8.to_json(zarr_format=2) ->>> json_v2 +```python exec="true" session="data_types" source="above" result="ansi" +json_v2 = int8.to_json(zarr_format=2) +print(json_v2) {'name': '|i1', 'object_codec_id': None} ``` @@ -272,25 +274,23 @@ Serialize to JSON for Zarr V2: And for V3: -```python ->>> json_v3 = int8.to_json(zarr_format=3) ->>> json_v3 -'int8' +```python exec="true" session="data_types" source="above" result="ansi" +json_v3 = int8.to_json(zarr_format=3) +print(json_v3) ``` Serialize a scalar value to JSON: -```python ->>> json_value = int8.to_json_scalar(42, zarr_format=3) ->>> json_value -42 +```python exec="true" session="data_types" source="above" result="ansi" +json_value = int8.to_json_scalar(42, zarr_format=3) +print(json_value) ``` Deserialize a scalar value from JSON: -```python ->>> scalar_value = int8.from_json_scalar(42, zarr_format=3) ->>> assert scalar_value == np.int8(42) +```python exec="true" session="data_types" source="above" +scalar_value = int8.from_json_scalar(42, zarr_format=3) +assert scalar_value == np.int8(42) ``` ### Adding New Data Types @@ -312,34 +312,31 @@ Python project directory. Although Zarr Python uses a different data type model from NumPy, you can still define a Zarr array with a NumPy data type object: -```python ->>> from zarr import create_array ->>> import numpy as np ->>> a = create_array({}, shape=(10,), dtype=np.dtype('int')) ->>> a - +```python exec="true" session="data_types" source="above" result="ansi" +from zarr import create_array +import numpy as np +a = create_array({}, shape=(10,), dtype=np.dtype('int')) +print(a) ``` Or a string representation of a NumPy data type: -```python ->>> a = create_array({}, shape=(10,), dtype='>> a - +```python exec="true" session="data_types" source="above" result="ansi" +a = create_array({}, shape=(10,), dtype='>> type(a.dtype) - +```python exec="true" session="data_types" source="above" result="ansi" +print(type(a.dtype)) ``` But if we inspect the metadata for the array, we can see the Zarr data type object: ```python ->>> type(a.metadata.data_type) +type(a.metadata.data_type) ``` @@ -353,16 +350,17 @@ For simple data types like `int`, the solution could be extremely simple: just maintain a lookup table that maps a NumPy data type to the Zarr data type equivalent. But not all data types are so simple. Consider this case: -```python ->>> from zarr import create_array ->>> import warnings ->>> import numpy as np ->>> warnings.simplefilter("ignore", category=FutureWarning) ->>> a = create_array({}, shape=(10,), dtype=[('a', 'f8'), ('b', 'i8')]) ->>> a.dtype # this is the NumPy data type -dtype([('a', '>> a.metadata.data_type # this is the Zarr data type -Structured(fields=(('a', Float64(endianness='little')), ('b', Int64(endianness='little')))) +```python exec="true" session="data_types" source="above" +from zarr import create_array +import warnings +import numpy as np +warnings.simplefilter("ignore", category=FutureWarning) +a = create_array({}, shape=(10,), dtype=[('a', 'f8'), ('b', 'i8')]) +print(a.dtype) # this is the NumPy data type +``` + +```python exec="true" session="data_types" source="above" +print(a.metadata.data_type) # this is the Zarr data type ``` In this example, we created a @@ -394,38 +392,36 @@ handles a range of input types: - NumPy data types: - ```python - >>> import numpy as np - >>> from zarr.dtype import parse_dtype - >>> my_dtype = np.dtype('>M8[10s]') - >>> parse_dtype(my_dtype, zarr_format=2) - DateTime64(endianness='big', scale_factor=10, unit='s') + ```python exec="true" session="data_types" source="above" result="ansi" + import numpy as np + from zarr.dtype import parse_dtype + my_dtype = np.dtype('>M8[10s]') + print(parse_dtype(my_dtype, zarr_format=2)) ``` - NumPy data type-compatible strings: - ```python - >>> dtype_str = '>M8[10s]' - >>> parse_dtype(dtype_str, zarr_format=2) - DateTime64(endianness='big', scale_factor=10, unit='s') + ```python exec="true" session="data_types" source="above" result="ansi" + dtype_str = '>M8[10s]' + print(parse_dtype(dtype_str, zarr_format=2)) ``` - `ZDType` instances: - ```python - >>> from zarr.dtype import DateTime64 - >>> zdt = DateTime64(endianness='big', scale_factor=10, unit='s') - >>> parse_dtype(zdt, zarr_format=2) # Use a ZDType (this is a no-op) - DateTime64(endianness='big', scale_factor=10, unit='s') + ```python exec="true" session="data_types" source="above" result="ansi" + from zarr.dtype import DateTime64 + zdt = DateTime64(endianness='big', scale_factor=10, unit='s') + print(parse_dtype(zdt, zarr_format=2)) # Use a ZDType (this is a no-op) ``` - Python dictionaries (requires `zarr_format=3`). These dictionaries must be consistent with the `JSON` form of the data type: - ```python - >>> dt_dict = {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}} - >>> parse_dtype(dt_dict, zarr_format=3) - DateTime64(endianness='little', scale_factor=10, unit='s') - >>> parse_dtype(dt_dict, zarr_format=3).to_json(zarr_format=3) - {'name': 'numpy.datetime64', 'configuration': {'unit': 's', 'scale_factor': 10}} + ```python exec="true" session="data_types" source="above" result="ansi" + dt_dict = {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}} + print(parse_dtype(dt_dict, zarr_format=3)) + ``` + + ```python exec="true" session="data_types" source="above" result="ansi" + print(parse_dtype(dt_dict, zarr_format=3).to_json(zarr_format=3)) ``` From 17ad05363b342f3b9714d3554fd4cdcfa2373313 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 13:34:50 -0400 Subject: [PATCH 37/64] Executable code blocks in performance --- docs/user-guide/performance.md | 103 ++++++++------------------------- 1 file changed, 23 insertions(+), 80 deletions(-) diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index aebee5b671..ad101319f1 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -13,49 +13,44 @@ dimension, then chunk across the second dimension. If you know you want to chunk across an entire dimension you can use the full size of that dimension within the `chunks` argument, e.g.: -```python +```python exec="true" session="performance" source="above" result="ansi" import zarr z1 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(100, 10000), dtype='int32') -z1.chunks -# (100, 10000) +print(z1.chunks) ``` Alternatively, if you only ever take slices along the second dimension, then chunk across the first dimension, e.g.: -```python +```python exec="true" session="performance" source="above" result="ansi" z2 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 100), dtype='int32') -z2.chunks -# (10000, 100) +print(z2.chunks) ``` If you require reasonable performance for both access patterns then you need to find a compromise, e.g.: -```python +```python exec="true" session="performance" source="above" result="ansi" z3 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') -z3.chunks -# (1000, 1000) +print(z3.chunks) ``` If you are feeling lazy, you can let Zarr guess a chunk shape for your data by providing `chunks='auto'`, although please note that the algorithm for guessing a chunk shape is based on simple heuristics and may be far from optimal. E.g.: -```python +```python exec="true" session="performance" source="above" result="ansi" z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32') -z4.chunks -# (625, 625) +print(z4.chunks) ``` If you know you are always going to be loading the entire array into memory, you can turn off chunks by providing `chunks` equal to `shape`, in which case there will be one single chunk for the array: -```python +```python exec="true" session="performance" source="above" result="ansi" z5 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 10000), dtype='int32') -z5.chunks -# (10000, 10000) +print(z5.chunks) ``` ### Sharding @@ -81,23 +76,9 @@ write your data in 1 GB increments. To use sharding, you need to specify the `shards` parameter when creating the array. -```python +```python exec="true" session="performance" source="above" result="ansi" z6 = zarr.create_array(store={}, shape=(10000, 10000, 1000), shards=(1000, 1000, 1000), chunks=(100, 100, 100), dtype='uint8') -z6.info -# Type : Array -# Zarr format : 3 -# Data type : UInt8() -# Fill value : 0 -# Shape : (10000, 10000, 1000) -# Shard shape : (1000, 1000, 1000) -# Chunk shape : (100, 100, 100) -# Order : C -# Read-only : False -# Store type : MemoryStore -# Filters : () -# Serializer : BytesCodec(endian=None) -# Compressors : (ZstdCodec(level=0, checksum=False),) -# No. bytes : 100000000000 (93.1G) +print(z6.info) ``` ### Chunk memory layout @@ -107,50 +88,21 @@ The order of bytes **within each chunk** of an array can be changed via the multi-dimensional arrays, these two layouts may provide different compression ratios, depending on the correlation structure within the data. E.g.: -```python +```python exec="true" session="performance" source="above" result="ansi" import numpy as np a = np.arange(100000000, dtype='int32').reshape(10000, 10000).T c = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype, config={'order': 'C'}) c[:] = a -c.info_complete() -# Type : Array -# Zarr format : 3 -# Data type : Int32(endianness='little') -# Fill value : 0 -# Shape : (10000, 10000) -# Chunk shape : (1000, 1000) -# Order : C -# Read-only : False -# Store type : MemoryStore -# Filters : () -# Serializer : BytesCodec(endian=) -# Compressors : (ZstdCodec(level=0, checksum=False),) -# No. bytes : 400000000 (381.5M) -# No. bytes stored : 342588911 (326.7M) -# Storage ratio : 1.2 -# Chunks Initialized : 100 +print(c.info_complete()) +``` +```python exec="true" session="performance" source="above" result="ansi" with zarr.config.set({'array.order': 'F'}): f = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype) f[:] = a -f.info_complete() -# Type : Array -# Zarr format : 3 -# Data type : Int32(endianness='little') -# Fill value : 0 -# Shape : (10000, 10000) -# Chunk shape : (1000, 1000) -# Order : F -# Read-only : False -# Store type : MemoryStore -# Filters : () -# Serializer : BytesCodec(endian=) -# Compressors : (ZstdCodec(level=0, checksum=False),) -# No. bytes : 400000000 (381.5M) -# No. bytes stored : 342588911 (326.7M) -# Storage ratio : 1.2 -# Chunks Initialized : 100 +print(f.info_complete()) + ``` In the above example, Fortran order gives a better compression ratio. This is an @@ -176,7 +128,7 @@ In this case, creating an array with `write_empty_chunks=True` (the default) wil The following example illustrates the effect of the `write_empty_chunks` flag on the time required to write an array with different values.: -```python +```python exec="true" session="performance" source="above" result="ansi" import zarr import numpy as np import time @@ -212,13 +164,6 @@ def timed_write(write_empty_chunks): for write_empty_chunks in (True, False): full, empty = timed_write(write_empty_chunks) print(f'\nwrite_empty_chunks={write_empty_chunks}:\n\tRandom Data: {full[0]:.4f}s, {full[1]} objects stored\n\t Empty Data: {empty[0]:.4f}s, {empty[1]} objects stored\n') -# write_empty_chunks=True: -# Random Data: ..., 1024 objects stored -# Empty Data: ...s, 1024 objects stored -# -# write_empty_chunks=False: -# Random Data: ...s, 1024 objects stored -# Empty Data: ...s, 0 objects stored ``` In this example, writing random data is slightly slower with `write_empty_chunks=True`, @@ -245,17 +190,15 @@ to re-open any underlying files or databases upon being unpickled. E.g., pickle/unpickle an local store array: -```python +```python exec="true" session="performance" source="above" result="ansi" import pickle data = np.arange(100000) -z1 = zarr.create_array(store='data/example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype) +z1 = zarr.create_array(store='data/perf-example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype) z1[:] = data s = pickle.dumps(z1) z2 = pickle.loads(s) -z1 == z2 -# True -np.all(z1[:] == z2[:]) -# np.True_ +assert z1 == z2 +print(np.all(z1[:] == z2[:])) ``` ## Configuring Blosc From ae4b08d10b271399b37e13fe2c60875f97c630ce Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 13:53:24 -0400 Subject: [PATCH 38/64] Use mkdocs build --strict rather than doctest --- .github/workflows/test.yml | 6 +++--- pyproject.toml | 16 +--------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d1921a8306..fc9dcfaa16 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -129,11 +129,11 @@ jobs: pip install hatch - name: Set Up Hatch Env run: | - hatch env create doctest - hatch env run -e doctest list-env + hatch env create docs + hatch env run -e docs list-env - name: Run Tests run: | - hatch env run --env doctest run + hatch env run --env docs check test-complete: name: Test complete diff --git a/pyproject.toml b/pyproject.toml index 81dc6addf4..cd733d5a57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -241,22 +241,13 @@ dependencies = [ 'zarr[remote_tests]', ] - -[tool.hatch.envs.doctest] -features = ["test", "optional", "remote", "remote_tests"] -description = "Test environment for doctests" - -[tool.hatch.envs.doctest.scripts] -run = "rm -r data/; pytest docs/user-guide --doctest-glob='*.rst'" -fix = "rm -r data/; pytest docs/user-guide --doctest-glob='*.rst' --accept" -list-env = "pip list" - [tool.hatch.envs.docs] features = ['docs', 'remote'] [tool.hatch.envs.docs.scripts] serve = "mkdocs serve" build = "mkdocs build" +check = "mkdocs build --strict" readthedocs = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r site $READTHEDOCS_OUTPUT/html" [tool.ruff] @@ -387,11 +378,6 @@ testpaths = ["tests", "docs/user-guide"] log_cli_level = "INFO" xfail_strict = true asyncio_mode = "auto" -doctest_optionflags = [ - "NORMALIZE_WHITESPACE", - "ELLIPSIS", - "IGNORE_EXCEPTION_DETAIL", -] addopts = [ "--durations=10", "-ra", "--strict-config", "--strict-markers", ] From b662c081f4e79f6aa9be24e80302dce5ac2df53d Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:02:53 -0400 Subject: [PATCH 39/64] Executable code blocks in consolidated metadata --- docs/user-guide/consolidated_metadata.md | 106 +++++++++-------------- 1 file changed, 41 insertions(+), 65 deletions(-) diff --git a/docs/user-guide/consolidated_metadata.md b/docs/user-guide/consolidated_metadata.md index 107e99bc65..d4fc9d6bab 100644 --- a/docs/user-guide/consolidated_metadata.md +++ b/docs/user-guide/consolidated_metadata.md @@ -15,104 +15,80 @@ metadata of the root Group. If consolidated metadata is present in a Zarr Group's metadata then it is used by default. The initial read to open the group will need to communicate with -the store (reading from a file for a `zarr.storage.LocalStore`, making a -network request for a `zarr.storage.FsspecStore`). After that, any subsequent +the store (reading from a file for a [`zarr.storage.LocalStore`][], making a +network request for a [`zarr.storage.FsspecStore`][]). After that, any subsequent metadata reads get child Group or Array nodes will *not* require reads from the store. In Python, the consolidated metadata is available on the `.consolidated_metadata` attribute of the `GroupMetadata` object. -```python +```python exec="true" session="consolidated_metadata" source="above" result="ansi" import zarr import warnings warnings.filterwarnings("ignore", category=UserWarning) store = zarr.storage.MemoryStore() group = zarr.create_group(store=store) -group.create_array(shape=(1,), name='a', dtype='float64') -# -group.create_array(shape=(2, 2), name='b', dtype='float64') -# -group.create_array(shape=(3, 3, 3), name='c', dtype='float64') -# -zarr.consolidate_metadata(store) -# +print(group) +array = group.create_array(shape=(1,), name='a', dtype='float64') +print(array) +``` + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +array = group.create_array(shape=(2, 2), name='b', dtype='float64') +print(array) +``` + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +array = group.create_array(shape=(3, 3, 3), name='c', dtype='float64') +print(array) +``` + +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +result = zarr.consolidate_metadata(store) +print(result) ``` If we open that group, the Group's metadata has a `zarr.core.group.ConsolidatedMetadata` that can be used.: -```python +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +from pprint import pprint +import io + consolidated = zarr.open_group(store=store) consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata -from pprint import pprint -pprint(dict(consolidated_metadata.items())) -# {'a': ArrayV3Metadata(shape=(1,), -# data_type=Float64(endianness='little'), -# chunk_grid=RegularChunkGrid(chunk_shape=(1,)), -# chunk_key_encoding=DefaultChunkKeyEncoding(name='default', -# separator='/'), -# fill_value=np.float64(0.0), -# codecs=(BytesCodec(endian=), -# ZstdCodec(level=0, checksum=False)), -# attributes={}, -# dimension_names=None, -# zarr_format=3, -# node_type='array', -# storage_transformers=()), -# 'b': ArrayV3Metadata(shape=(2, 2), -# data_type=Float64(endianness='little'), -# chunk_grid=RegularChunkGrid(chunk_shape=(2, 2)), -# chunk_key_encoding=DefaultChunkKeyEncoding(name='default', -# separator='/'), -# fill_value=np.float64(0.0), -# codecs=(BytesCodec(endian=), -# ZstdCodec(level=0, checksum=False)), -# attributes={}, -# dimension_names=None, -# zarr_format=3, -# node_type='array', -# storage_transformers=()), -# 'c': ArrayV3Metadata(shape=(3, 3, 3), -# data_type=Float64(endianness='little'), -# chunk_grid=RegularChunkGrid(chunk_shape=(3, 3, 3)), -# chunk_key_encoding=DefaultChunkKeyEncoding(name='default', -# separator='/'), -# fill_value=np.float64(0.0), -# codecs=(BytesCodec(endian=), -# ZstdCodec(level=0, checksum=False)), -# attributes={}, -# dimension_names=None, -# zarr_format=3, -# node_type='array', -# storage_transformers=())} + +# Note: pprint can be users without capturing the output regularly +output = io.StringIO() +pprint(dict(sorted(consolidated_metadata.items())), stream=output, width=60) +print(output.getvalue()) ``` Operations on the group to get children automatically use the consolidated metadata.: -```python -consolidated['a'] # no read / HTTP request to the Store is required -# +```python exec="true" session="consolidated_metadata" source="above" result="ansi" +print(consolidated['a']) # no read / HTTP request to the Store is required ``` With nested groups, the consolidated metadata is available on the children, recursively.: -```python +```python exec="true" session="consolidated_metadata" source="above" result="ansi" child = group.create_group('child', attributes={'kind': 'child'}) grandchild = child.create_group('child', attributes={'kind': 'grandchild'}) consolidated = zarr.consolidate_metadata(store) -consolidated['child'].metadata.consolidated_metadata -# ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False) +output = io.StringIO() +pprint(consolidated['child'].metadata.consolidated_metadata, stream=output, width=60) +print(output.getvalue()) ``` !!! info "Added in version 3.1.1" - - The keys in the consolidated metadata are sorted prior to writing. Keys are - sorted in ascending order by path depth, where a path is defined as a sequence - of strings joined by `"/"`. For keys with the same path length, lexicographic - order is used to break the tie. This behaviour ensures deterministic metadata - output for a given group. + The keys in the consolidated metadata are sorted prior to writing. Keys are + sorted in ascending order by path depth, where a path is defined as a sequence + of strings joined by `"/"`. For keys with the same path length, lexicographic + order is used to break the tie. This behaviour ensures deterministic metadata + output for a given group. ## Synchronization and Concurrency From f8b71c3e6041e7426153d38e1d7a9fd222c8fcf3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:03:09 -0400 Subject: [PATCH 40/64] Better pprint --- docs/user-guide/config.md | 28 ++++++---------------------- docs/user-guide/groups.md | 5 ++--- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/docs/user-guide/config.md b/docs/user-guide/config.md index b0a0969c4f..965c8f23a4 100644 --- a/docs/user-guide/config.md +++ b/docs/user-guide/config.md @@ -42,26 +42,10 @@ requires the value of `codecs.bytes.name` to be `'custompackage.NewBytesCodec'`. This is the current default configuration: -```python -zarr.config.pprint() -# {'array': {'order': 'C', -# 'write_empty_chunks': False}, -# 'async': {'concurrency': 10, 'timeout': None}, -# 'buffer': 'zarr.buffer.cpu.Buffer', -# 'codec_pipeline': {'batch_size': 1, -# 'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'}, -# 'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec', -# 'bytes': 'zarr.codecs.bytes.BytesCodec', -# 'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec', -# 'endian': 'zarr.codecs.bytes.BytesCodec', -# 'gzip': 'zarr.codecs.gzip.GzipCodec', -# 'sharding_indexed': 'zarr.codecs.sharding.ShardingCodec', -# 'transpose': 'zarr.codecs.transpose.TransposeCodec', -# 'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec', -# 'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec', -# 'zstd': 'zarr.codecs.zstd.ZstdCodec'}, -# 'default_zarr_format': 3, -# 'json_indent': 2, -# 'ndbuffer': 'zarr.buffer.cpu.NDBuffer', -# 'threading': {'max_workers': None}} +```python exec="true" session="config" source="above" result="ansi" +from pprint import pprint +import io +output = io.StringIO() +zarr.config.pprint(stream=output, width=60) +print(output.getvalue()) ``` diff --git a/docs/user-guide/groups.md b/docs/user-guide/groups.md index 5ec5554675..8a3f9ff7bf 100644 --- a/docs/user-guide/groups.md +++ b/docs/user-guide/groups.md @@ -85,9 +85,8 @@ node_spec = {'a/b/c': GroupMetadata()} nodes_created = dict(create_hierarchy(store=LocalStore(root='data'), nodes=node_spec)) # Report nodes (pprint is used for cleaner rendering in the docs) output = io.StringIO() -pprint(nodes_created, stream=output, width=60, depth=3) -result = output.getvalue() -print(result) +pprint(nodes_created, stream=output, width=60) +print(output.getvalue()) ``` Note that we only specified a single group named `a/b/c`, but 4 groups were created. These additional groups From dc0a759fb2f2542a80701de223d944964fedb90e Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:19:26 -0400 Subject: [PATCH 41/64] Add buffer protocol to API --- docs/api/abc/buffer.md | 5 +++++ mkdocs.yml | 1 + 2 files changed, 6 insertions(+) create mode 100644 docs/api/abc/buffer.md diff --git a/docs/api/abc/buffer.md b/docs/api/abc/buffer.md new file mode 100644 index 0000000000..ac814d20b6 --- /dev/null +++ b/docs/api/abc/buffer.md @@ -0,0 +1,5 @@ +--- +title: buffer +--- + +::: zarr.abc.buffer diff --git a/mkdocs.yml b/mkdocs.yml index 2c8c4b3737..d9c3b6eb24 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,6 +43,7 @@ nav: - Async API: api/api_async.md - Sync API: api/api_sync.md - ABC: + - api/abc/buffer.md - api/abc/codec.md - api/abc/metadata.md - api/abc/store.md From 89f7920bd9036f067aae35ba05e43062c5764fea Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:19:41 -0400 Subject: [PATCH 42/64] Cross-references in extending --- docs/user-guide/extending.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/user-guide/extending.md b/docs/user-guide/extending.md index 8a951d50a9..d857fa3356 100644 --- a/docs/user-guide/extending.md +++ b/docs/user-guide/extending.md @@ -21,15 +21,15 @@ There are three types of codecs in Zarr: Array-to-array codecs are used to transform the array data before serializing to bytes. Examples include delta encoding or scaling codecs. Array-to-bytes codecs are used for serializing the array data to bytes. In Zarr, the main codec to use for numeric arrays -is the `zarr.codecs.BytesCodec`. Bytes-to-bytes codecs transform the serialized bytestreams +is the [`zarr.codecs.BytesCodec`][]. Bytes-to-bytes codecs transform the serialized bytestreams of the array data. Examples include compression codecs, such as -`zarr.codecs.GzipCodec`, `zarr.codecs.BloscCodec` or -`zarr.codecs.ZstdCodec`, and codecs that add a checksum to the bytestream, such as -`zarr.codecs.Crc32cCodec`. +[`zarr.codecs.GzipCodec`][], [`zarr.codecs.BloscCodec`][] or +[`zarr.codecs.ZstdCodec`][], and codecs that add a checksum to the bytestream, such as +[`zarr.codecs.Crc32cCodec`][]. Custom codecs for Zarr are implemented by subclassing the relevant base class, see -`zarr.abc.codec.ArrayArrayCodec`, `zarr.abc.codec.ArrayBytesCodec` and -`zarr.abc.codec.BytesBytesCodec`. Most custom codecs should implemented the +[`zarr.abc.codec.ArrayArrayCodec`][], [`zarr.abc.codec.ArrayBytesCodec`][] and +[`zarr.abc.codec.BytesBytesCodec`][]. Most custom codecs should implemented the `_encode_single` and `_decode_single` methods. These methods operate on single chunks of the array data. Alternatively, custom codecs can implement the `encode` and `decode` methods, which operate on batches of chunks, in case the codec is intended to implement @@ -69,7 +69,7 @@ the codecs from `numcodecs` are prefixed with `numcodecs.`, e.g. `numcodecs.delt It is also possible to register codecs as replacements for existing codecs. This might be useful for providing specialized implementations, such as GPU-based codecs. In case of -multiple codecs, the `zarr.core.config` mechanism can be used to select the preferred +multiple codecs, the [`zarr.config`][] mechanism can be used to select the preferred implementation. ## Custom stores @@ -79,9 +79,9 @@ Coming soon. ## Custom array buffers Zarr-python provides control over where and how arrays stored in memory through -:mod:`zarr.buffer`. Currently both CPU (the default) and GPU implementations are -provided (see :ref:`user-guide-gpu` for more). You can implement your own buffer -classes by implementing the interface defined in :mod:`zarr.abc.buffer`. +[`zarr.abc.buffer.Buffer`][]. Currently both CPU (the default) and GPU implementations are +provided (see [Using GPUs with Zarr](gpu.md) for more information). You can implement your own buffer +classes by implementing the interface defined in [`zarr.abc.buffer.BufferPrototype`][]. ## Other extensions From 069e4b9efb17e2ea947dec814d0bb331c9e58ae1 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:20:07 -0400 Subject: [PATCH 43/64] References in gpu guide --- docs/user-guide/gpu.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/user-guide/gpu.md b/docs/user-guide/gpu.md index 994667b515..e2e297bc3a 100644 --- a/docs/user-guide/gpu.md +++ b/docs/user-guide/gpu.md @@ -1,7 +1,7 @@ # Using GPUs with Zarr Zarr can use GPUs to accelerate your workload by running -`zarr.config.enable_gpu`. +[`zarr.config.enable_gpu`][]. !!! note `zarr-python` currently supports reading the ndarray data into device (GPU) @@ -14,18 +14,18 @@ Zarr can use GPUs to accelerate your workload by running ## Reading data into device memory -`zarr.config.enable_gpu` configures Zarr to use GPU memory for the data -buffers used internally by Zarr. +[`zarr.config`][] configures Zarr to use GPU memory for the data +buffers used internally by Zarr via the `enable_gpu`. ```python import zarr -import cupy as cp # doctest: +SKIP -zarr.config.enable_gpu() # doctest: +SKIP -store = zarr.storage.MemoryStore() # doctest: +SKIP -z = zarr.create_array( # doctest: +SKIP +import cupy as cp +zarr.config.enable_gpu() +store = zarr.storage.MemoryStore() +z = zarr.create_array( store=store, shape=(100, 100), chunks=(10, 10), dtype="float32", ) -type(z[:10, :10]) # doctest: +SKIP +type(z[:10, :10]) # cupy.ndarray ``` From 2ee63fef46c49ec7cde37ac8ccbef1eaf8325402 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:20:14 -0400 Subject: [PATCH 44/64] References in gpu guide --- docs/user-guide/gpu.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/gpu.md b/docs/user-guide/gpu.md index e2e297bc3a..2701aa329f 100644 --- a/docs/user-guide/gpu.md +++ b/docs/user-guide/gpu.md @@ -15,7 +15,7 @@ Zarr can use GPUs to accelerate your workload by running ## Reading data into device memory [`zarr.config`][] configures Zarr to use GPU memory for the data -buffers used internally by Zarr via the `enable_gpu`. +buffers used internally by Zarr via `enable_gpu()`. ```python import zarr From 2257aa1c491d8999594eadb680ba16fd4aa4f629 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:20:32 -0400 Subject: [PATCH 45/64] Fix indentation --- src/zarr/core/buffer/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 07bdb8c26e..381c318176 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -218,7 +218,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self: Parameters ---------- bytes_like - bytes-like object + bytes-like object Returns ------- From 60459604aa74f35a44482ae8caa816a4a09ec1cd Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:54:55 -0400 Subject: [PATCH 46/64] Formatting --- docs/contributing.md | 35 +++++++++++++++++++---------------- docs/user-guide/gpu.md | 3 +-- src/zarr/core/buffer/cpu.py | 2 +- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index 756b5401b5..7bfa6f6a18 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -22,9 +22,12 @@ g = zarr.group() 3. Information about the version of Zarr, along with versions of dependencies and the Python interpreter, and installation information. The version of Zarr can be obtained from the `zarr.__version__` property. Please also state how Zarr was installed, e.g., "installed via pip into a virtual environment", or "installed using conda". Information about other packages installed can be obtained by executing `pip freeze` (if using pip to install packages) or `conda env export` (if using conda to install packages) from the operating system command prompt. The version of the Python interpreter can be obtained by running a Python interactive session, e.g.: +```console +python ``` -$ python - Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin + +```ansi +Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin ``` ## Enhancement proposals @@ -44,9 +47,9 @@ The Zarr source code is hosted on GitHub at the following location: You will need your own fork to work on the code. Go to the link above and hit the ["Fork"](https://github.com/zarr-developers/zarr-python/fork) button. Then clone your fork to your local machine: ```bash -$ git clone git@github.com:your-user-name/zarr-python.git -$ cd zarr-python -$ git remote add upstream git@github.com:zarr-developers/zarr-python.git +git clone git@github.com:your-user-name/zarr-python.git +cd zarr-python +git remote add upstream git@github.com:zarr-developers/zarr-python.git ``` ### Creating a development environment @@ -54,14 +57,14 @@ $ git remote add upstream git@github.com:zarr-developers/zarr-python.git To work with the Zarr source code, it is recommended to use [hatch](https://hatch.pypa.io/latest/index.html) to create and manage development environments. Hatch will automatically install all Zarr dependencies using the same versions as are used by the core developers and continuous integration services. Assuming you have a Python 3 interpreter already installed, and you have cloned the Zarr source code and your current working directory is the root of the repository, you can do something like the following: ```bash -$ pip install hatch -$ hatch env show # list all available environments +pip install hatch +hatch env show # list all available environments ``` To verify that your development environment is working, you can run the unit tests for one of the test environments, e.g.: ```bash -$ hatch env run --env test.py3.12-2.2-optional run-pytest +hatch env run --env test.py3.12-2.2-optional run-pytest ``` ### Creating a branch @@ -99,7 +102,7 @@ Again, any conflicts need to be resolved before submitting a pull request. Zarr includes a suite of unit tests. The simplest way to run the unit tests is to activate your development environment (see [creating a development environment](#creating-a-development-environment) above) and invoke: ```bash -$ hatch env run --env test.py3.12-2.2-optional run-pytest +hatch env run --env test.py3.12-2.2-optional run-pytest ``` All tests are automatically run via GitHub Actions for every pull request and must pass before code can be accepted. Test coverage is also collected automatically via the Codecov service. @@ -113,19 +116,19 @@ All code must conform to the PEP8 standard. Regarding line length, lines up to 1 `Zarr` uses a set of `pre-commit` hooks and the `pre-commit` bot to format, type-check, and prettify the codebase. `pre-commit` can be installed locally by running: ```bash -$ python -m pip install pre-commit +python -m pip install pre-commit ``` The hooks can be installed locally by running: ```bash -$ pre-commit install +pre-commit install ``` This would run the checks every time a commit is created locally. These checks will also run on every commit pushed to an open PR, resulting in some automatic styling fixes by the `pre-commit` bot. The checks will by default only run on the files modified by a commit, but the checks can be triggered for all the files by running: ```bash -$ pre-commit run --all-files +pre-commit run --all-files ``` If you would like to skip the failing checks and push the code for further discussion, use the `--no-verify` option with `git commit`. @@ -137,7 +140,7 @@ If you would like to skip the failing checks and push the code for further discu Zarr strives to maintain 100% test coverage under the latest Python stable release. Both unit tests and docstring doctests are included when computing coverage. Running: ```bash -$ hatch env run --env test.py3.12-2.2-optional run-coverage +hatch env run --env test.py3.12-2.2-optional run-coverage ``` will automatically run the test suite with coverage and produce a XML coverage report. This should be 100% before code can be accepted into the main code base. @@ -145,7 +148,7 @@ will automatically run the test suite with coverage and produce a XML coverage r You can also generate an HTML coverage report by running: ```bash -$ hatch env run --env test.py3.12-2.2-optional run-coverage-html +hatch env run --env test.py3.12-2.2-optional run-coverage-html ``` When submitting a pull request, coverage will also be collected across all supported Python versions via the Codecov service, and will be reported back within the pull request. Codecov coverage must also be 100% before code can be accepted. @@ -159,7 +162,7 @@ Zarr uses mkdocs for documentation, hosted on readthedocs.org. Documentation is The documentation can be built locally by running: ```bash -$ hatch --env docs run build +hatch --env docs run build ``` The resulting built documentation will be available in the `docs/_build/html` folder. @@ -167,7 +170,7 @@ The resulting built documentation will be available in the `docs/_build/html` fo Hatch can also be used to serve continuously updating version of the documentation during development at [http://0.0.0.0:8000/](http://0.0.0.0:8000/). This can be done by running: ```bash -$ hatch --env docs run serve +hatch --env docs run serve ``` ### Changelog diff --git a/docs/user-guide/gpu.md b/docs/user-guide/gpu.md index 2701aa329f..3317bdf065 100644 --- a/docs/user-guide/gpu.md +++ b/docs/user-guide/gpu.md @@ -1,7 +1,6 @@ # Using GPUs with Zarr -Zarr can use GPUs to accelerate your workload by running -[`zarr.config.enable_gpu`][]. +Zarr can use GPUs to accelerate your workload by running `zarr.Config.enable_gpu`. !!! note `zarr-python` currently supports reading the ndarray data into device (GPU) diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 9da0059d0b..bb12c7699e 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -86,7 +86,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self: Parameters ---------- bytes_like - bytes-like object + bytes-like object Returns ------- From bffedcf3b95dd1876ffd586ef9332e6c57add7c2 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 17:33:27 -0400 Subject: [PATCH 47/64] Add new release notes --- docs/release-notes.md | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/docs/release-notes.md b/docs/release-notes.md index 34f5a7fe65..48187e4308 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -2,15 +2,44 @@ +## 3.1.1 (2025-07-28) + +### Features + +- Add lightweight implementations of `.getsize()` and `.getsize_prefix()` for ObjectStore. ([#3227](https://github.com/zarr-developers/zarr-python/issues/3227)) + +### Bugfixes + +- Creating a Zarr format 2 array with the `order` keyword argument no longer raises a warning. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- Fixed the error message when passing both `config` and `write_empty_chunks` arguments to reflect the current behaviour (`write_empty_chunks` takes precedence). ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- Creating a Zarr format 3 array with the `order` argument now consistently ignores this argument and raises a warning. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- When using [`from_array`][zarr.api.asynchronous.from_array] to copy a Zarr format 2 array to a Zarr format 3 array, if the memory order of the input array is `"F"` a warning is raised and the order ignored. This is because Zarr format 3 arrays are always stored in "C" order. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- The `config` argument to [`zarr.create`][zarr.create] (and functions that create arrays) is now used - previously it had no effect. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) +- Ensure that all abstract methods of [`ZDType`][zarr.core.dtype.ZDType] raise a `NotImplementedError` when invoked. ([#3251](https://github.com/zarr-developers/zarr-python/issues/3251)) +- Register 'gpu' marker with pytest for downstream StoreTests. ([#3258](https://github.com/zarr-developers/zarr-python/issues/3258)) +- Expand the range of types accepted by `parse_data_type` to include strings and Sequences. +- Move the functionality of `zarr.core.dtype.parse_data_type` to a new function called `zarr.dtype.parse_dtype`. This change ensures that nomenclature is consistent across the codebase. `zarr.core.dtype.parse_data_type` remains, so this change is not breaking. ([#3264](https://github.com/zarr-developers/zarr-python/issues/3264)) +- Fix a regression introduced in 3.1.0 that prevented `inf`, `-inf`, and `nan` values from being stored in `attributes`. ([#3280](https://github.com/zarr-developers/zarr-python/issues/3280)) +- Fixes [`Group.nmembers()`][zarr.Group.nmembers] ignoring depth when using consolidated metadata. ([#3287](https://github.com/zarr-developers/zarr-python/issues/3287)) + +### Improved Documentation + +- Expand the data type docs to include a demonstration of the `parse_data_type` function. Expand the docstring for the `parse_data_type` function. ([#3249](https://github.com/zarr-developers/zarr-python/issues/3249)) +- Add a section on codecs to the migration guide. ([#3273](https://github.com/zarr-developers/zarr-python/issues/3273)) + +### Misc + +- Remove warnings about vlen-utf8 and vlen-bytes codecs ([#3268](https://github.com/zarr-developers/zarr-python/issues/3268)) + ## 3.1.0 (2025-07-14) ### Features - Ensure that invocations of `create_array` use consistent keyword arguments, with consistent defaults. - `zarr.api.synchronous.create_array` now takes a `write_data` keyword argument + [`zarr.api.synchronous.create_array`][] now takes a `write_data` keyword argument The `Group.create_array` method takes `data` and `write_data` keyword arguments. - The functions `api.asynchronous.create`, `api.asynchronous.create_array` + The functions [`zarr.api.asynchronous.create`][], [`zarr.api.asynchronous.create_array`] and the methods `Group.create_array`, `Group.array`, had the default `fill_value` changed from `0` to the `DEFAULT_FILL_VALUE` value, which instructs Zarr to use the default scalar value associated with the array's data type as the fill value. These are From 4d19f2c701fb18a71f150beb3e5c5a8f2662258d Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 22:45:39 -0400 Subject: [PATCH 48/64] Inherit docstrings --- mkdocs.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index d9c3b6eb24..85a5aae646 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -33,6 +33,7 @@ nav: - api/open.md - api/load.md - api/save.md + - api/buffer.md - api/convenience.md - api/config.md - api/codecs.md @@ -117,11 +118,10 @@ plugins: python: paths: [src/zarr] options: - # We set allow_inspection: false to ensure that all docstrings come - # from the pyi files, not the Rust-facing doc comments. - allow_inspection: false + allow_inspection: true docstring_section_style: list docstring_style: numpy + inherited_members: true line_length: 60 separate_signature: true show_root_heading: true @@ -129,6 +129,8 @@ plugins: show_source: false show_symbol_type_toc: true signature_crossrefs: true + extensions: + - griffe_inherited_docstrings inventories: - https://docs.python.org/3/objects.inv From b7ca3b458ec0f928907ac08bb180306eee269d52 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 22:45:54 -0400 Subject: [PATCH 49/64] Add buffer API page --- docs/api/buffer.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/api/buffer.md diff --git a/docs/api/buffer.md b/docs/api/buffer.md new file mode 100644 index 0000000000..aa089957cf --- /dev/null +++ b/docs/api/buffer.md @@ -0,0 +1,3 @@ +::: zarr.buffer +::: zarr.buffer.cpu +::: zarr.buffer.gpu From cf33263ec56a793e08faaf8e5f35a7c9a7d347f9 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 22:50:27 -0400 Subject: [PATCH 50/64] Fix some links --- src/zarr/core/array.py | 307 ++++++++++++++++++++++++++--------------- src/zarr/core/group.py | 16 +-- 2 files changed, 206 insertions(+), 117 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 311a0eb986..f828d5268b 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -476,7 +476,7 @@ async def create( """Method to create a new asynchronous array instance. .. deprecated:: 3.0.0 - Deprecated in favor of :func:`zarr.api.asynchronous.create_array`. + Deprecated in favor of [`zarr.api.asynchronous.create_array`][]. Parameters ---------- @@ -526,7 +526,7 @@ async def create( order : Literal["C", "F"], optional The memory of the array (default is "C"). If ``zarr_format`` is 2, this parameter sets the memory order of the array. - If `zarr_format`` is 3, then this parameter is deprecated, because memory order + If ``zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. filters : list[dict[str, JSON]], optional @@ -615,8 +615,7 @@ async def _create( config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Method to create a new asynchronous array instance. - See :func:`AsyncArray.create` for more details. - Deprecated in favor of :func:`zarr.api.asynchronous.create_array`. + Deprecated in favor of [`zarr.api.asynchronous.create_array`][]. """ dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format) @@ -1610,15 +1609,15 @@ async def setitem( @property def oindex(self) -> AsyncOIndex[T_ArrayMetadata]: - """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and - :func:`set_orthogonal_selection` for documentation and examples.""" + """Shortcut for orthogonal (outer) indexing, see [get_orthogonal_selection][zarr.Array.get_orthogonal_selection] and + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection] for documentation and examples.""" return AsyncOIndex(self) @property def vindex(self) -> AsyncVIndex[T_ArrayMetadata]: - """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, - :func:`set_coordinate_selection`, :func:`get_mask_selection` and - :func:`set_mask_selection` for documentation and examples.""" + """Shortcut for vectorized (inner) indexing, see [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_mask_selection][zarr.Array.get_mask_selection] and + [set_mask_selection][zarr.Array.set_mask_selection] for documentation and examples.""" return AsyncVIndex(self) async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: @@ -1774,10 +1773,9 @@ def info(self) -> Any: ------- ArrayInfo - See Also + Related -------- - AsyncArray.info_complete - All information about a group, including dynamic information + [zarr.AsyncArray.info_complete][] - All information about a group, including dynamic information like the number of bytes and chunks written. Examples @@ -1813,10 +1811,9 @@ async def info_complete(self) -> Any: ------- ArrayInfo - See Also + Related -------- - AsyncArray.info - A property giving just the statically known information about an array. + [zarr.AsyncArray.info][] - A property giving just the statically known information about an array. """ return self._info( await self.nchunks_initialized(), @@ -1889,7 +1886,7 @@ def create( """Creates a new Array instance from an initialized store. .. deprecated:: 3.0.0 - Deprecated in favor of :func:`zarr.create_array`. + Deprecated in favor of [`zarr.create_array`][]. Parameters ---------- @@ -2016,8 +2013,7 @@ def _create( config: ArrayConfigLike | None = None, ) -> Array: """Creates a new Array instance from an initialized store. - See :func:`Array.create` for more details. - Deprecated in favor of :func:`zarr.create_array`. + Deprecated in favor of [`zarr.create_array`][]. """ async_array = sync( AsyncArray._create( @@ -2538,9 +2534,9 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: fields Currently the implementation for __getitem__ is provided by - :func:`vindex` if the indexing is pure fancy indexing (ie a + [`vindex`][zarr.Array.vindex] if the indexing is pure fancy indexing (ie a broadcast-compatible tuple of integer array indices), or by - :func:`set_basic_selection` otherwise. + [`set_basic_selection`][zarr.Array.set_basic_selection] otherwise. Effectively, this means that the following indexing modes are supported: @@ -2551,14 +2547,16 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: - fancy indexing (vectorized list of integers) For specific indexing options including outer indexing, see the - methods listed under See Also. + methods listed under Related. - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __setitem__ + [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection] + [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__setitem__][zarr.Array.__setitem__] """ fields, pure_selection = pop_fields(selection) @@ -2637,27 +2635,35 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: fields Currently the implementation for __setitem__ is provided by - :func:`vindex` if the indexing is pure fancy indexing (ie a + [`vindex`][zarr.Array.vindex] if the indexing is pure fancy indexing (ie a broadcast-compatible tuple of integer array indices), or by - :func:`set_basic_selection` otherwise. + [`set_basic_selection`][zarr.Array.set_basic_selection] otherwise. Effectively, this means that the following indexing modes are supported: - - integer indexing - - slice indexing - - mixed slice and integer indexing - - boolean indexing - - fancy indexing (vectorized list of integers) + - integer indexing + - slice indexing + - mixed slice and integer indexing + - boolean indexing + - fancy indexing (vectorized list of integers) For specific indexing options including outer indexing, see the - methods listed under See Also. + methods listed under Related. - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__ + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__] """ fields, pure_selection = pop_fields(selection) @@ -2768,15 +2774,23 @@ def get_basic_selection( the `fields` parameter. This method provides the implementation for accessing data via the - square bracket notation (__getitem__). See :func:`__getitem__` for examples + square bracket notation (__getitem__). See [`__getitem__`][zarr.Array.__getitem__] for examples using the alternative notation. - See Also + Related -------- - set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ @@ -2870,15 +2884,23 @@ def set_basic_selection( the `fields` parameter. This method provides the underlying implementation for modifying data via square - bracket notation, see :func:`__setitem__` for equivalent examples using the + bracket notation, see [`__setitem__`][zarr.Array.__setitem__] for equivalent examples using the alternative notation. - See Also + Related -------- - get_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + [get_basic_selection][zarr.Array.get_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -2993,12 +3015,20 @@ def get_orthogonal_selection( Slices with step > 1 are supported, but slices with negative step are not. - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, set_orthogonal_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3105,13 +3135,20 @@ def set_orthogonal_selection( Slices with step > 1 are supported, but slices with negative step are not. - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() @@ -3185,12 +3222,20 @@ def get_mask_selection( coordinate indexing. Internally the mask array is converted to coordinate arrays by calling `np.nonzero`. - See Also + Related -------- - get_basic_selection, set_basic_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3267,12 +3312,20 @@ def set_mask_selection( coordinate indexing. Internally the mask array is converted to coordinate arrays by calling `np.nonzero`. - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3347,12 +3400,20 @@ def get_coordinate_selection( before being applied. The shape of the output will be the same as the shape of each coordinate array after broadcasting. - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, set_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3431,12 +3492,20 @@ def set_coordinate_selection( Slices are not supported. Coordinate arrays must be provided for all dimensions of the array. - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3547,13 +3616,20 @@ def get_block_selection( [13, 14, 15, 16, 17, 18], [23, 24, 25, 26, 27, 28]]) - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() @@ -3640,12 +3716,20 @@ def set_block_selection( Slices are supported. However, only with a step size of one. - See Also + Related -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ + [get_basic_selection][zarr.Array.get_basic_selection], + [set_basic_selection][zarr.Array.set_basic_selection], + [get_mask_selection][zarr.Array.get_mask_selection], + [set_mask_selection][zarr.Array.set_mask_selection], + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [get_block_selection][zarr.Array.get_block_selection], + [set_block_selection][zarr.Array.set_block_selection], + [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], + [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], + [__setitem__][zarr.Array.__setitem__] """ if prototype is None: @@ -3655,21 +3739,28 @@ def set_block_selection( @property def vindex(self) -> VIndex: - """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, - :func:`set_coordinate_selection`, :func:`get_mask_selection` and - :func:`set_mask_selection` for documentation and examples.""" + """Shortcut for vectorized (inner) indexing, see + [get_coordinate_selection][zarr.Array.get_coordinate_selection], + [set_coordinate_selection][zarr.Array.set_coordinate_selection], + [get_mask_selection][zarr.Array.get_mask_selection] and + [set_mask_selection][zarr.Array.set_mask_selection] for documentation and + examples.""" return VIndex(self) @property def oindex(self) -> OIndex: - """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and - :func:`set_orthogonal_selection` for documentation and examples.""" + """Shortcut for orthogonal (outer) indexing, see + [get_orthogonal_selection][zarr.Array.get_orthogonal_selection] and + [set_orthogonal_selection][zarr.Array.set_orthogonal_selection] for + documentation and examples.""" return OIndex(self) @property def blocks(self) -> BlockIndex: - """Shortcut for blocked chunked indexing, see :func:`get_block_selection` and - :func:`set_block_selection` for documentation and examples.""" + """Shortcut for blocked chunked indexing, see + [get_block_selection][zarr.Array.get_block_selection] and + [set_block_selection][zarr.Array.set_block_selection] for documentation and + examples.""" return BlockIndex(self) def resize(self, new_shape: ShapeLike) -> None: @@ -3789,11 +3880,10 @@ def info(self) -> Any: ------- ArrayInfo - See Also + Related -------- - Array.info_complete - All information about a group, including dynamic information - like the number of bytes and chunks written. + [zarr.Array.info_complete][] - All information about a group, + including dynamic information like the number of bytes and chunks written. Examples -------- @@ -3826,10 +3916,9 @@ def info_complete(self) -> Any: ------- ArrayInfo - See Also + Related -------- - Array.info - The statically known subset of metadata about an array. + [zarr.Array.info][] - The statically known subset of metadata about an array. """ return sync(self._async_array.info_complete()) @@ -3850,9 +3939,9 @@ async def chunks_initialized( chunks_initialized : tuple[str, ...] The keys of the chunks that have been initialized. - See Also + Related -------- - nchunks_initialized + [nchunks_initialized][zarr.Array.nchunks_initialized] """ store_contents = [ diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 4bdc7b549f..6e55812d31 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -863,9 +863,9 @@ def info(self) -> Any: ------- GroupInfo - See Also + Related -------- - AsyncGroup.info_complete + [zarr.AsyncGroup.info_complete][] All information about a group, including dynamic information """ @@ -887,9 +887,9 @@ async def info_complete(self) -> Any: ------- GroupInfo - See Also + Related -------- - AsyncGroup.info + [zarr.AsyncGroup.info][] """ members = [x[1].metadata async for x in self.members(max_depth=None)] return self._info(members=members) @@ -2072,9 +2072,9 @@ def info(self) -> Any: ------- GroupInfo - See Also + Related -------- - Group.info_complete + [zarr.Group.info_complete][] All information about a group, including dynamic information like the children members. """ @@ -2091,9 +2091,9 @@ def info_complete(self) -> Any: ------- GroupInfo - See Also + Related -------- - Group.info + [zarr.Group.info][] """ return self._sync(self._async_group.info_complete()) From 47bbaa5157e48a1d790f7d6b3aca4099e7170846 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 23:13:51 -0400 Subject: [PATCH 51/64] Update changelog check --- ci/check_changelog_entries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/check_changelog_entries.py b/ci/check_changelog_entries.py index 9f883f0be4..da2700e32a 100644 --- a/ci/check_changelog_entries.py +++ b/ci/check_changelog_entries.py @@ -25,13 +25,13 @@ def is_int(s: str) -> bool: print(f"Found {len(entries)} entries") print() - bad_suffix = [e for e in entries if e.suffix != ".rst"] + bad_suffix = [e for e in entries if e.suffix != ".md"] bad_issue_no = [e for e in entries if not is_int(e.name.split(".")[0])] bad_type = [e for e in entries if e.name.split(".")[1] not in VALID_CHANGELOG_TYPES] if len(bad_suffix) or len(bad_issue_no) or len(bad_type): if len(bad_suffix): - print("Changelog entries without .rst suffix") + print("Changelog entries without .md suffix") print("-------------------------------------") print("\n".join([p.name for p in bad_suffix])) print() From 38fc403c91e1ccf1e833db46a6d2e9af16a04753 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 13 Aug 2025 23:14:11 -0400 Subject: [PATCH 52/64] Update dependency --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index cd733d5a57..124b9d6d19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,7 @@ docs = [ "mike>=2.1.3", "mkdocs-redirects>=1.2.0", "markdown-exec[ansi]", + "griffe-inherited-docstrings", "ruff", # Changelog generation 'towncrier', @@ -249,6 +250,7 @@ serve = "mkdocs serve" build = "mkdocs build" check = "mkdocs build --strict" readthedocs = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r site $READTHEDOCS_OUTPUT/html" +list-env = "pip list" [tool.ruff] line-length = 100 From 7f3e224c3df4890a55cf82413f465c917a5a88c9 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 12:56:13 -0400 Subject: [PATCH 53/64] Update cross-references --- src/zarr/abc/store.py | 2 +- src/zarr/api/asynchronous.py | 42 ++++++------ src/zarr/api/synchronous.py | 50 +++++++------- src/zarr/codecs/numcodecs/_codecs.py | 8 +-- src/zarr/core/array.py | 46 ++++++------- src/zarr/core/group.py | 98 ++++++++++++++-------------- src/zarr/storage/_memory.py | 4 +- 7 files changed, 125 insertions(+), 125 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index e8d1329b17..4b3edf78d1 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -450,7 +450,7 @@ async def getsize_prefix(self, prefix: str) -> int: Notes ----- ``getsize_prefix`` is just provided as a potentially faster alternative to - listing all the keys under a prefix calling :meth:`Store.getsize` on each. + listing all the keys under a prefix calling [`Store.getsize`][zarr.abc.store.Store.getsize] on each. In general, ``prefix`` should be the path of an Array or Group in the Store. Implementations may differ on the behavior when some other ``prefix`` diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index dcfadf6a3f..afd8931638 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -341,8 +341,8 @@ async def open( If the store is backed by an fsspec-based implementation, then this dict will be passed to the Store constructor for that implementation. Ignored otherwise. **kwargs - Additional parameters are passed through to :func:`zarr.creation.open_array` or - :func:`zarr.hierarchy.open_group`. + Additional parameters are passed through to [zarr.creation.open_array][] or + [zarr.api.asynchronous.open_group][]. Returns ------- @@ -387,7 +387,7 @@ async def open_consolidated( *args: Any, use_consolidated: Literal[True] = True, **kwargs: Any ) -> AsyncGroup: """ - Alias for :func:`open_group` with ``use_consolidated=True``. + Alias for [`open_group`][zarr.api.asynchronous.open_group] with ``use_consolidated=True``. """ if use_consolidated is not True: raise TypeError( @@ -457,7 +457,7 @@ async def save_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Passed through to :func:`create`, e.g., compressor. + Passed through to [`create`][zarr.api.asynchronous.create], e.g., compressor. """ zarr_format = ( _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) @@ -587,7 +587,7 @@ async def array( data : array_like The data to fill the array with. **kwargs - Passed through to :func:`create`. + Passed through to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -723,7 +723,7 @@ async def create_group( The zarr format to use when saving. If no ``zarr_format`` is provided, the default format will be used. This default can be changed by modifying the value of ``default_zarr_format`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. @@ -772,12 +772,12 @@ async def open_group( Store or path to directory in file system or name of zip file. Strings are interpreted as paths on the local file system - and used as the ``root`` argument to :class:`zarr.storage.LocalStore`. + and used as the ``root`` argument to [zarr.storage.LocalStore][]. Dictionaries are used as the ``store_dict`` argument in - :class:`zarr.storage.MemoryStore``. + [zarr.storage.MemoryStore][]. - By default (``store=None``) a new :class:`zarr.storage.MemoryStore` + By default (``store=None``) a new [zarr.storage.MemoryStore][] is created. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional @@ -930,7 +930,7 @@ async def create( - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. These defaults can be changed by modifying the value of ``array.v3_default_filters``, - ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`. + ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in [`zarr.config`][zarr.config]. compressor : Codec, optional Primary compressor to compress chunk data. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. @@ -941,7 +941,7 @@ async def create( - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. - These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. + These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in [`zarr.config`][zarr.config]. fill_value : object Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional @@ -964,7 +964,7 @@ async def create( filters : sequence of Codecs, optional Sequence of filters to use to encode chunk data prior to compression. Zarr format 2 only. If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. + These defaults can be changed by modifying the value of ``array.v2_default_filters`` in [`zarr.config`][zarr.config]. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded @@ -1081,7 +1081,7 @@ async def empty( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Notes ----- @@ -1104,7 +1104,7 @@ async def empty_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1135,7 +1135,7 @@ async def full( fill_value : scalar Fill value. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1156,7 +1156,7 @@ async def full_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1180,7 +1180,7 @@ async def ones( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1200,7 +1200,7 @@ async def ones_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1236,7 +1236,7 @@ async def open_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Any keyword arguments to pass to :func:`create`. + Any keyword arguments to pass to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1304,7 +1304,7 @@ async def zeros( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1324,7 +1324,7 @@ async def zeros_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 1e47208dcc..d29a60e09a 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -198,8 +198,8 @@ def open( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Additional parameters are passed through to :func:`zarr.api.asynchronous.open_array` or - :func:`zarr.api.asynchronous.open_group`. + Additional parameters are passed through to [zarr.api.asynchronous.open_array][] or + [zarr.api.asynchronous.open_group][]. Returns ------- @@ -225,7 +225,7 @@ def open( def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group: """ - Alias for :func:`open_group` with ``use_consolidated=True``. + Alias for [`open_group`][zarr.api.synchronous.open_group] with ``use_consolidated=True``. """ return Group( sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs)) @@ -290,7 +290,7 @@ def save_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Passed through to :func:`create`, e.g., compressor. + Passed through to [`create`][zarr.api.synchronous.create], e.g., compressor. """ return sync( async_api.save_array( @@ -382,7 +382,7 @@ def array(data: npt.ArrayLike | Array, **kwargs: Any) -> Array: data : array_like The data to fill the array with. **kwargs - Passed through to :func:`create`. + Passed through to [`create`][zarr.api.synchronous.create]. Returns ------- @@ -483,12 +483,12 @@ def open_group( Store or path to directory in file system or name of zip file. Strings are interpreted as paths on the local file system - and used as the ``root`` argument to :class:`zarr.storage.LocalStore`. + and used as the ``root`` argument to [zarr.storage.LocalStore][]. Dictionaries are used as the ``store_dict`` argument in - :class:`zarr.storage.MemoryStore``. + [zarr.storage.MemoryStore][]. - By default (``store=None``) a new :class:`zarr.storage.MemoryStore` + By default (``store=None``) a new [zarr.storage.MemoryStore][] is created. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional @@ -580,7 +580,7 @@ def create_group( The zarr format to use when saving. If no ``zarr_format`` is provided, the default format will be used. This default can be changed by modifying the value of ``default_zarr_format`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. @@ -781,7 +781,7 @@ def create_array( ) -> Array: """Create an array. - This function wraps :func:`zarr.core.array.create_array`. + This function wraps [zarr.core.array.create_array][]. Parameters ---------- @@ -811,14 +811,14 @@ def create_array( of ``ArrayArrayCodec``. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any @@ -828,20 +828,20 @@ def create_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -851,7 +851,7 @@ def create_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional @@ -1021,7 +1021,7 @@ def from_array( - dict[str, JSON]: A dict representation of an ``ArrayBytesCodec``. - ArrayBytesCodec: An instance of ``ArrayBytesCodec``. - "auto": a default serializer will be used. These defaults can be changed by modifying the value of - ``array.v3_default_serializer`` in :mod:`zarr.core.config`. + ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. - "keep": Retain the serializer of the input array if it is a zarr Array. fill_value : Any, optional @@ -1145,7 +1145,7 @@ def empty(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1172,7 +1172,7 @@ def empty_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1199,7 +1199,7 @@ def full(shape: tuple[int, ...], fill_value: Any, **kwargs: Any) -> Array: fill_value : scalar Fill value. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1219,7 +1219,7 @@ def full_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1238,7 +1238,7 @@ def ones(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1257,7 +1257,7 @@ def ones_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1340,7 +1340,7 @@ def zeros(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1359,7 +1359,7 @@ def zeros_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- diff --git a/src/zarr/codecs/numcodecs/_codecs.py b/src/zarr/codecs/numcodecs/_codecs.py index c7884700c7..3d86aee24e 100644 --- a/src/zarr/codecs/numcodecs/_codecs.py +++ b/src/zarr/codecs/numcodecs/_codecs.py @@ -1,7 +1,7 @@ """ -This module provides compatibility for :py:mod:`numcodecs` in Zarr version 3. +This module provides compatibility for [numcodecs][] in Zarr version 3. -These codecs were previously defined in :py:mod:`numcodecs`, and have now been moved to `zarr`. +These codecs were previously defined in [numcodecs][], and have now been moved to `zarr`. >>> import zarr >>> import zarr.codecs.numcodecs as numcodecs @@ -17,7 +17,7 @@ .. note:: - Please note that the codecs in :py:mod:`zarr.codecs.numcodecs` are not part of the Zarr version + Please note that the codecs in [zarr.codecs.numcodecs][] are not part of the Zarr version 3 specification. Using these codecs might cause interoperability issues with other Zarr implementations. """ @@ -81,7 +81,7 @@ def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs: Any) -> N cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}" cls.codec_name = f"{CODEC_PREFIX}{namespace}" cls.__doc__ = f""" - See :class:`{cls_name}` for more details and parameters. + See [{cls_name}][] for more details and parameters. """ def __init__(self, **codec_config: JSON) -> None: diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 9c27a2998a..aabfdfe250 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -529,7 +529,7 @@ async def create( - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. These defaults can be changed by modifying the value of ``array.v3_default_filters``, - ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`. + ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in [`zarr.config`][zarr.config]. dimension_names : Iterable[str | None], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. @@ -550,7 +550,7 @@ async def create( Sequence of filters to use to encode chunk data prior to compression. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. + These defaults can be changed by modifying the value of ``array.v2_default_filters`` in [`zarr.config`][zarr.config]. compressor : dict[str, JSON], optional The compressor used to compress the data (default is None). Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. @@ -561,7 +561,7 @@ async def create( - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. - These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. + These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in [`zarr.config`][zarr.config]. overwrite : bool, optional Whether to raise an error if the store already exists (default is False). data : npt.ArrayLike, optional @@ -1283,8 +1283,8 @@ async def nchunks_initialized(self) -> int: Notes ----- - On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous) - property :attr:`Array.nchunks_initialized`. + On [`AsyncArray`][zarr.AsyncArray] this is an asynchronous method, unlike the (synchronous) + property [`Array.nchunks_initialized`][zarr.Array.nchunks_initialized]. Examples -------- @@ -1316,8 +1316,8 @@ async def _nshards_initialized(self) -> int: Notes ----- - On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous) - property :attr:`Array._nshards_initialized`. + On [`AsyncArray`][zarr.AsyncArray] this is an asynchronous method, unlike the (synchronous) + property [`Array._nshards_initialized`][zarr.Array._nshards_initialized]. Examples -------- @@ -2066,7 +2066,7 @@ def create( - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. These defaults can be changed by modifying the value of ``array.v3_default_filters``, - ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`. + ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in [`zarr.config`][zarr.config]. dimension_names : Iterable[str | None], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. @@ -2087,7 +2087,7 @@ def create( Sequence of filters to use to encode chunk data prior to compression. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. + These defaults can be changed by modifying the value of ``array.v2_default_filters`` in [`zarr.config`][zarr.config]. compressor : dict[str, JSON], optional Primary compressor to compress chunk data. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. @@ -2098,7 +2098,7 @@ def create( - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. - These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. + These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in [`zarr.config`][zarr.config]. overwrite : bool, optional Whether to raise an error if the store already exists (default is False). @@ -4351,7 +4351,7 @@ async def from_array( - dict[str, JSON]: A dict representation of an ``ArrayBytesCodec``. - ArrayBytesCodec: An instance of ``ArrayBytesCodec``. - "auto": a default serializer will be used. These defaults can be changed by modifying the value of - ``array.v3_default_serializer`` in :mod:`zarr.core.config`. + ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. - "keep": Retain the serializer of the input array if it is a zarr Array. fill_value : Any, optional @@ -4560,14 +4560,14 @@ async def init_array( of ``ArrayArrayCodec``. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any @@ -4577,20 +4577,20 @@ async def init_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -4600,7 +4600,7 @@ async def init_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional @@ -4789,14 +4789,14 @@ async def create_array( of ``ArrayArrayCodec``. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any @@ -4806,20 +4806,20 @@ async def create_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -4829,7 +4829,7 @@ async def create_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index d532418eab..e129a997c8 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1038,7 +1038,7 @@ async def create_array( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array within this group. - This method lightly wraps :func:`zarr.core.array.create_array`. + This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- @@ -1063,14 +1063,14 @@ async def create_array( of ``ArrayArrayCodec``. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any @@ -1080,13 +1080,13 @@ async def create_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. @@ -1095,7 +1095,7 @@ async def create_array( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -1105,7 +1105,7 @@ async def create_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional @@ -1169,14 +1169,14 @@ async def create_dataset( The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the :func:`zarr.AsyncGroup.require_dataset` method. + with h5py, Zarr groups also implement the [zarr.AsyncGroup.require_dataset][] method. Parameters ---------- name : str Array name. **kwargs : dict - Additional arguments passed to :func:`zarr.AsyncGroup.create_array`. + Additional arguments passed to [zarr.AsyncGroup.create_array][]. Returns ------- @@ -1209,9 +1209,9 @@ async def require_dataset( The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.require_dataset` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the :func:`zarr.AsyncGroup.create_dataset` method. + with h5py, Zarr groups also implement the [zarr.AsyncGroup.create_dataset][] method. - Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. + Other `kwargs` are as per [zarr.AsyncGroup.create_dataset][]. Parameters ---------- @@ -1242,7 +1242,7 @@ async def require_array( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Obtain an array, creating if it doesn't exist. - Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. + Other `kwargs` are as per [zarr.AsyncGroup.create_dataset][]. Parameters ---------- @@ -1629,7 +1629,7 @@ async def empty( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Notes ----- @@ -1651,7 +1651,7 @@ async def zeros( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1672,7 +1672,7 @@ async def ones( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1695,7 +1695,7 @@ async def full( fill_value : scalar Value to fill the array with. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1723,7 +1723,7 @@ async def empty_like( data : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1744,7 +1744,7 @@ async def zeros_like( data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1765,7 +1765,7 @@ async def ones_like( data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -1786,7 +1786,7 @@ async def full_like( data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2469,7 +2469,7 @@ def create_array( ) -> Array: """Create an array within this group. - This method lightly wraps :func:`zarr.core.array.create_array`. + This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- @@ -2496,14 +2496,14 @@ def create_array( of ``ArrayArrayCodec``. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any @@ -2513,13 +2513,13 @@ def create_array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. @@ -2528,7 +2528,7 @@ def create_array( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -2538,7 +2538,7 @@ def create_array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional @@ -2602,14 +2602,14 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array: Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the :func:`zarr.Group.require_dataset` method. + with h5py, Zarr groups also implement the [zarr.Group.require_dataset][] method. Parameters ---------- name : str Array name. **kwargs : dict - Additional arguments passed to :func:`zarr.Group.create_array` + Additional arguments passed to [zarr.Group.create_array][] Returns ------- @@ -2625,16 +2625,16 @@ def require_dataset(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Arra The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the :func:`zarr.Group.create_dataset` method. + with h5py, Zarr groups also implement the [zarr.Group.create_dataset][] method. - Other `kwargs` are as per :func:`zarr.Group.create_dataset`. + Other `kwargs` are as per [zarr.Group.create_dataset][]. Parameters ---------- name : str Array name. **kwargs : - See :func:`zarr.Group.create_dataset`. + See [zarr.Group.create_dataset][]. Returns ------- @@ -2645,14 +2645,14 @@ def require_dataset(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Arra def require_array(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array: """Obtain an array, creating if it doesn't exist. - Other `kwargs` are as per :func:`zarr.Group.create_array`. + Other `kwargs` are as per [zarr.Group.create_array][]. Parameters ---------- name : str Array name. **kwargs : - See :func:`zarr.Group.create_array`. + See [zarr.Group.create_array][]. Returns ------- @@ -2671,7 +2671,7 @@ def empty(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Notes ----- @@ -2691,7 +2691,7 @@ def zeros(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2710,7 +2710,7 @@ def ones(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2733,7 +2733,7 @@ def full( fill_value : scalar Value to fill the array with. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2757,7 +2757,7 @@ def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> data : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2782,7 +2782,7 @@ def zeros_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2802,7 +2802,7 @@ def ones_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> A data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2821,7 +2821,7 @@ def full_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> A data : array-like The array to create the new array like. **kwargs - Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- @@ -2867,7 +2867,7 @@ def array( .. deprecated:: 3.0.0 Use `Group.create_array` instead. - This method lightly wraps :func:`zarr.core.array.create_array`. + This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- @@ -2892,14 +2892,14 @@ def array( of ``ArrayArrayCodec``. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default filters. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any @@ -2909,13 +2909,13 @@ def array( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. @@ -2924,7 +2924,7 @@ def array( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -2934,7 +2934,7 @@ def array( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional diff --git a/src/zarr/storage/_memory.py b/src/zarr/storage/_memory.py index e6076d9669..a3fd058680 100644 --- a/src/zarr/storage/_memory.py +++ b/src/zarr/storage/_memory.py @@ -188,7 +188,7 @@ class GpuMemoryStore(MemoryStore): Parameters ---------- store_dict : MutableMapping, optional - A mutable mapping with string keys and :class:`zarr.core.buffer.gpu.Buffer` + A mutable mapping with string keys and [zarr.core.buffer.gpu.Buffer][] values. read_only : bool Whether to open the store in read-only mode. @@ -222,7 +222,7 @@ def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: ---------- store_dict : mapping A mapping of strings keys to arbitrary Buffers. The buffer data - will be moved into a :class:`gpu.Buffer`. + will be moved into a [`gpu.Buffer`][zarr.core.buffer.gpu.Buffer]. Returns ------- From ba782a893659474e340fa35c58bec957d541e091 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 13:10:30 -0400 Subject: [PATCH 54/64] Add cross-reference --- src/zarr/core/array.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index aabfdfe250..64184fb018 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2324,12 +2324,12 @@ def dtype(self) -> np.dtype[Any]: @property def attrs(self) -> Attributes: - """Returns a MutableMapping containing user-defined attributes. + """Returns a [MutableMapping][collections.abc.MutableMapping] containing user-defined attributes. Returns ------- - attrs : MutableMapping - A MutableMapping object containing user-defined attributes. + attrs + A [MutableMapping][collections.abc.MutableMapping] object containing user-defined attributes. Notes ----- From d2bc7d673474c68c6ddba9b0993eed1aaf524b7f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 13:57:30 -0400 Subject: [PATCH 55/64] Convert references and admonitions --- src/zarr/api/asynchronous.py | 4 +- src/zarr/api/synchronous.py | 4 +- src/zarr/codecs/numcodecs/_codecs.py | 3 +- src/zarr/core/array.py | 24 ++++--- src/zarr/core/attributes.py | 14 ++-- src/zarr/core/config.py | 18 ++--- src/zarr/core/dtype/npy/bool.py | 2 +- src/zarr/core/dtype/npy/bytes.py | 71 +++++++++---------- src/zarr/core/dtype/npy/float.py | 6 +- src/zarr/core/dtype/npy/int.py | 16 ++--- src/zarr/core/dtype/npy/string.py | 44 ++++++------ src/zarr/core/dtype/npy/structured.py | 42 ++++++------ src/zarr/core/dtype/npy/time.py | 98 +++++++++++++-------------- src/zarr/core/group.py | 23 ++++--- 14 files changed, 189 insertions(+), 180 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index afd8931638..9b2f3d4f96 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -556,8 +556,8 @@ async def save_group( async def tree(grp: AsyncGroup, expand: bool | None = None, level: int | None = None) -> Any: """Provide a rich display of the hierarchy. - .. deprecated:: 3.0.0 - `zarr.tree()` is deprecated and will be removed in a future release. + !!! warning "Deprecated" + `zarr.tree()` is deprecated since v3.0.0 and will be removed in a future release. Use `group.tree()` instead. Parameters diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index d29a60e09a..b9a4929b5e 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -352,8 +352,8 @@ def save_group( def tree(grp: Group, expand: bool | None = None, level: int | None = None) -> Any: """Provide a rich display of the hierarchy. - .. deprecated:: 3.0.0 - `zarr.tree()` is deprecated and will be removed in a future release. + !!! warning "Deprecated" + `zarr.tree()` is deprecated since v3.0.0 and will be removed in a future release. Use `group.tree()` instead. Parameters diff --git a/src/zarr/codecs/numcodecs/_codecs.py b/src/zarr/codecs/numcodecs/_codecs.py index 3d86aee24e..03b5ea9a2d 100644 --- a/src/zarr/codecs/numcodecs/_codecs.py +++ b/src/zarr/codecs/numcodecs/_codecs.py @@ -15,8 +15,7 @@ ... compressors=[numcodecs.zarr3.BZ2(level=5)]) >>> array[:] = np.arange(*array.shape).astype(array.dtype) -.. note:: - +!!! note Please note that the codecs in [zarr.codecs.numcodecs][] are not part of the Zarr version 3 specification. Using these codecs might cause interoperability issues with other Zarr implementations. diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 64184fb018..a070f02ad1 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -492,8 +492,9 @@ async def create( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Method to create a new asynchronous array instance. - .. deprecated:: 3.0.0 - Deprecated in favor of [`zarr.api.asynchronous.create_array`][]. + !!! warning "Deprecated" + `AsyncArray.create()` is deprecated since v3.0.0 and will be removed in a future release. + Use [`zarr.api.asynchronous.create_array`][] instead. Parameters ---------- @@ -1082,9 +1083,9 @@ def compressor(self) -> Numcodec | None: """ Compressor that is applied to each chunk of the array. - .. deprecated:: 3.0.0 - `array.compressor` is deprecated and will be removed in a future release. - Use `array.compressors` instead. + !!! warning "Deprecated" + `Array.compressor` is deprecated since v3.0.0 and will be removed in a future release. + Use [`Array.compressors`][zarr.AsyncArray.compressors] instead. """ if self.metadata.zarr_format == 2: return self.metadata.compressor @@ -2035,8 +2036,9 @@ def create( ) -> Array: """Creates a new Array instance from an initialized store. - .. deprecated:: 3.0.0 - Deprecated in favor of [`zarr.create_array`][]. + !!! warning "Deprecated" + `Array.create()` is deprecated since v3.0.0 and will be removed in a future release. + Use [`zarr.create_array`][] instead. Parameters ---------- @@ -2080,7 +2082,7 @@ def create( order : Literal["C", "F"], optional The memory of the array (default is "C"). If ``zarr_format`` is 2, this parameter sets the memory order of the array. - If `zarr_format`` is 3, then this parameter is deprecated, because memory order + If ``zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'order': 'C'}``. filters : list[dict[str, JSON]], optional @@ -2393,9 +2395,9 @@ def compressor(self) -> Numcodec | None: """ Compressor that is applied to each chunk of the array. - .. deprecated:: 3.0.0 - `array.compressor` is deprecated and will be removed in a future release. - Use `array.compressors` instead. + !!! warning "Deprecated" + `array.compressor` is deprecated since v3.0.0 and will be removed in a future release. + Use [`array.compressors`][zarr.Array.compressors] instead. """ return self._async_array.compressor diff --git a/src/zarr/core/attributes.py b/src/zarr/core/attributes.py index e699c4f66d..e000839436 100644 --- a/src/zarr/core/attributes.py +++ b/src/zarr/core/attributes.py @@ -42,13 +42,13 @@ def put(self, d: dict[str, JSON]) -> None: Equivalent to the following pseudo-code, but performed atomically. - .. code-block:: python - - >>> attrs = {"a": 1, "b": 2} - >>> attrs.clear() - >>> attrs.update({"a": 3", "c": 4}) - >>> attrs - {'a': 3, 'c': 4} + ```python + >>> attrs = {"a": 1, "b": 2} + >>> attrs.clear() + >>> attrs.update({"a": 3", "c": 4}) + >>> attrs + {'a': 3, 'c': 4} + ``` """ self._obj.metadata.attributes.clear() self._obj = self._obj.update_attributes(d) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 2b7fbbe0c6..0479d38803 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -8,21 +8,21 @@ to be ``your.module.NewBytesCodec``. Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations. - .. code-block:: python + ```python + from your.module import NewBytesCodec + from zarr.core.config import register_codec, config - from your.module import NewBytesCodec - from zarr.core.config import register_codec, config - - register_codec("bytes", NewBytesCodec) - config.set({"codecs.bytes": "your.module.NewBytesCodec"}) + register_codec("bytes", NewBytesCodec) + config.set({"codecs.bytes": "your.module.NewBytesCodec"}) + ``` Instead of setting the value programmatically with ``config.set``, you can also set the value with an environment variable. The environment variable ``ZARR_CODECS__BYTES`` can be set to ``your.module.NewBytesCodec``. The double underscore ``__`` is used to indicate nested access. - .. code-block:: bash - - export ZARR_CODECS__BYTES="your.module.NewBytesCodec" + ```bash + export ZARR_CODECS__BYTES="your.module.NewBytesCodec" + ``` For more information, see the Donfig documentation at https://github.com/pytroll/donfig. """ diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py index 37371cd0cd..5c99eae3a3 100644 --- a/src/zarr/core/dtype/npy/bool.py +++ b/src/zarr/core/dtype/npy/bool.py @@ -41,7 +41,7 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize): ---------- This class implements the boolean data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding)and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ _zarr_v3_name: ClassVar[Literal["bool"]] = "bool" diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py index b7c764dcd9..50d4974253 100644 --- a/src/zarr/core/dtype/npy/bytes.py +++ b/src/zarr/core/dtype/npy/bytes.py @@ -36,11 +36,11 @@ class FixedLengthBytesConfig(TypedDict): Examples -------- - .. code-block:: python - - { - "length_bytes": 12 - } + ```python + { + "length_bytes": 12 + } + ``` """ length_bytes: int @@ -56,17 +56,17 @@ class NullterminatedBytesJSON_V2(DTypeConfig_V2[str, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "|S10", - "object_codec_id": None - } + ```python + { + "name": "|S10", + "object_codec_id": None + } + ``` """ @@ -83,14 +83,14 @@ class NullTerminatedBytesJSON_V3( Examples -------- - .. code-block:: python - - { - "name": "null_terminated_bytes", - "configuration": { - "length_bytes": 12 - } + ```python + { + "name": "null_terminated_bytes", + "configuration": { + "length_bytes": 12 } + } + ``` """ @@ -105,17 +105,18 @@ class RawBytesJSON_V2(DTypeConfig_V2[str, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python + ```python { "name": "|V10", "object_codec_id": None } + ``` """ @@ -130,12 +131,14 @@ class RawBytesJSON_V3(NamedConfig[Literal["raw_bytes"], FixedLengthBytesConfig]) Examples -------- - .. code-block:: python - - { - "name": "raw_bytes", - "configuration": { - "length_bytes": 12 + ```python + { + "name": "raw_bytes", + "configuration": { + "length_bytes": 12 + } + } + ``` """ @@ -149,16 +152,16 @@ class VariableLengthBytesJSON_V2(DTypeConfig_V2[Literal["|O"], Literal["vlen-byt References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "|O", - "object_codec_id": "vlen-bytes" - } + ```python + { + "name": "|O", + "object_codec_id": "vlen-bytes" + } + ``` """ diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py index 3113bc5b61..ae64ba3f43 100644 --- a/src/zarr/core/dtype/npy/float.py +++ b/src/zarr/core/dtype/npy/float.py @@ -326,7 +326,7 @@ class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]): ---------- This class implements the float16 data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float16DType @@ -363,7 +363,7 @@ class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]): ---------- This class implements the float32 data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float32DType @@ -400,7 +400,7 @@ class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]): ---------- This class implements the float64 data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float64DType diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py index ac04d4469a..a8b4f1ad87 100644 --- a/src/zarr/core/dtype/npy/int.py +++ b/src/zarr/core/dtype/npy/int.py @@ -248,7 +248,7 @@ class Int8(BaseInt[np.dtypes.Int8DType, np.int8]): ---------- This class implements the 8-bit signed integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int8DType @@ -404,7 +404,7 @@ class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]): ---------- This class implements the 8-bit unsigned integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt8DType @@ -551,7 +551,7 @@ class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness): ---------- This class implements the 16-bit signed integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int16DType @@ -713,7 +713,7 @@ class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness): ---------- This class implements the unsigned 16-bit unsigned integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt16DType @@ -875,7 +875,7 @@ class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness): ---------- This class implements the 32-bit signed integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int32DType @@ -1058,7 +1058,7 @@ class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness): ---------- This class implements the 32-bit unsigned integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt32DType @@ -1216,7 +1216,7 @@ class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness): ---------- This class implements the 64-bit signed integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int64DType @@ -1374,7 +1374,7 @@ class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness): ---------- This class implements the unsigned 64-bit integer data type defined in Zarr V2 and V3. - See the `Zarr V2 `__ and `Zarr V3 `__ specification documents for details. + See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt64DType diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py index 32375a1c71..ee8cc71aaf 100644 --- a/src/zarr/core/dtype/npy/string.py +++ b/src/zarr/core/dtype/npy/string.py @@ -70,17 +70,17 @@ class FixedLengthUTF32JSON_V2(DTypeConfig_V2[str, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "`__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "|O", - "object_codec_id": "vlen-utf8" - } + ```python + { + "name": "|O", + "object_codec_id": "vlen-utf8" + } + ``` """ @@ -467,7 +467,7 @@ class UTF8Base(ZDType[TDType_co, str], HasObjectCodec): ---------- This data type does not have a Zarr V3 specification. - The Zarr V2 data type specification can be found `here `__. + The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). """ _zarr_v3_name: ClassVar[Literal["string"]] = "string" diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py index a0e3b0fbd4..7aa546ea9c 100644 --- a/src/zarr/core/dtype/npy/structured.py +++ b/src/zarr/core/dtype/npy/structured.py @@ -41,19 +41,19 @@ class StructuredJSON_V2(DTypeConfig_V2[StructuredName_V2, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": [ - ["f0", "`__. + The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). """ _zarr_v3_name: ClassVar[Literal["structured"]] = "structured" diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py index d523e16940..402a140321 100644 --- a/src/zarr/core/dtype/npy/time.py +++ b/src/zarr/core/dtype/npy/time.py @@ -113,9 +113,9 @@ class TimeConfig(TypedDict): Examples -------- - .. code-block:: python - - {"unit": "ms", "scale_factor": 1} + ```python + {"unit": "ms", "scale_factor": 1} + ``` """ unit: ReadOnly[DateTimeUnit] @@ -129,19 +129,19 @@ class DateTime64JSON_V3(NamedConfig[Literal["numpy.datetime64"], TimeConfig]): References ---------- This representation is defined in the ``numpy.datetime64`` - `specification document `__. + [specification document](https://zarr-specs.readthedocs.io/en/latest/spec/v3/datatypes.html#numpy-datetime64). Examples -------- - .. code-block:: python - - { - "name": "numpy.datetime64", - "configuration": { - "unit": "ms", - "scale_factor": 1 - } - } + ```python + { + "name": "numpy.datetime64", + "configuration": { + "unit": "ms", + "scale_factor": 1 + } + } + ``` """ @@ -152,19 +152,19 @@ class TimeDelta64JSON_V3(NamedConfig[Literal["numpy.timedelta64"], TimeConfig]): References ---------- This representation is defined in the numpy.timedelta64 - `specification document `__. + [specification document](https://zarr-specs.readthedocs.io/en/latest/spec/v3/datatypes.html#numpy-timedelta64). Examples -------- - .. code-block:: python - - { - "name": "numpy.timedelta64", - "configuration": { - "unit": "ms", - "scale_factor": 1 - } - } + ```python + { + "name": "numpy.timedelta64", + "configuration": { + "unit": "ms", + "scale_factor": 1 + } + } + ``` """ @@ -178,17 +178,17 @@ class TimeDelta64JSON_V2(DTypeConfig_V2[str, None]): References ---------- The structure of the ``name`` field is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "`__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- - .. code-block:: python - - { - "name": "`__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). The Zarr V3 representation of this data type is defined in the ``numpy.timedelta64`` - `specification document `__ + [specification document](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.timedelta64) """ # mypy infers the type of np.dtypes.TimeDelta64DType to be @@ -452,15 +452,15 @@ def _from_json_v3(cls, data: DTypeJSON) -> Self: For example: - .. code-block:: json - - { - "name": "numpy.timedelta64", - "configuration": { - "unit": "generic", - "scale_factor": 1 - } + ```json + { + "name": "numpy.timedelta64", + "configuration": { + "unit": "generic", + "scale_factor": 1 } + } + ``` """ if cls._check_json_v3(data): @@ -615,10 +615,10 @@ class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEnd References ---------- The Zarr V2 representation of this data type is defined in the Zarr V2 - `specification document `__. + [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). The Zarr V3 representation of this data type is defined in the ``numpy.datetime64`` - `specification document `__ + [specification document](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.datetime64) """ dtype_cls = np.dtypes.DateTime64DType # type: ignore[assignment] diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index e129a997c8..f4d44864ce 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1165,8 +1165,9 @@ async def create_dataset( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array. - .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead. + !!! warning "Deprecated" + `AsyncGroup.create_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. + Use `AsyncGroup.create_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the [zarr.AsyncGroup.require_dataset][] method. @@ -1205,8 +1206,9 @@ async def require_dataset( ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Obtain an array, creating if it doesn't exist. - .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.require_dataset` instead. + !!! warning "Deprecated" + `AsyncGroup.require_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. + Use `AsyncGroup.require_dataset` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the [zarr.AsyncGroup.create_dataset][] method. @@ -2597,8 +2599,9 @@ def create_array( def create_dataset(self, name: str, **kwargs: Any) -> Array: """Create an array. - .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `Group.create_array` instead. + !!! warning "Deprecated" + `Group.create_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. + Use `Group.create_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility @@ -2621,8 +2624,9 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array: def require_dataset(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array: """Obtain an array, creating if it doesn't exist. - .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead. + !!! warning "Deprecated" + `Group.require_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. + Use `Group.require_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the [zarr.Group.create_dataset][] method. @@ -2864,7 +2868,8 @@ def array( ) -> Array: """Create an array within this group. - .. deprecated:: 3.0.0 + !!! warning "Deprecated" + `Group.array()` is deprecated since v3.0.0 and will be removed in a future release. Use `Group.create_array` instead. This method lightly wraps [zarr.core.array.create_array][]. From defe9c259df7a1a34dad6b78740caf603863361d Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 14:59:47 -0400 Subject: [PATCH 56/64] Improve numpy dtype cross-references --- src/zarr/core/dtype/npy/bool.py | 10 +++++----- src/zarr/core/dtype/npy/bytes.py | 18 +++++++++--------- src/zarr/core/dtype/npy/complex.py | 8 ++++---- src/zarr/core/dtype/npy/float.py | 12 ++++++------ src/zarr/core/dtype/npy/int.py | 30 +++++++++++++++--------------- 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py index 5c99eae3a3..3e7f5b72f0 100644 --- a/src/zarr/core/dtype/npy/bool.py +++ b/src/zarr/core/dtype/npy/bool.py @@ -23,8 +23,8 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize): """ A Zarr data type for arrays containing booleans. - Wraps the ``np.dtypes.BoolDType`` data type. Scalars for this data type are instances of - ``np.bool_``. + Wraps the [`np.dtypes.BoolDType`][numpy.dtypes.BoolDType] data type. Scalars for this data type are instances of + [`np.bool_`][numpy.bool_]. Attributes ---------- @@ -236,7 +236,7 @@ def cast_scalar(self, data: object) -> np.bool_: Returns ------- - ``np.bool_`` + bool : np.bool_ The numpy boolean scalar. Raises @@ -258,7 +258,7 @@ def default_scalar(self) -> np.bool_: Returns ------- - ``np.bool_`` + bool : np.bool_ The default value. """ return np.False_ @@ -294,7 +294,7 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_: Returns ------- - ``np.bool_`` + bool : np.bool_ The numpy boolean scalar. Raises diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py index 50d4974253..16c3326f63 100644 --- a/src/zarr/core/dtype/npy/bytes.py +++ b/src/zarr/core/dtype/npy/bytes.py @@ -170,8 +170,8 @@ class NullTerminatedBytes(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLengt """ A Zarr data type for arrays containing fixed-length null-terminated byte sequences. - Wraps the ``np.dtypes.BytesDType`` data type. Scalars for this data type are instances of - ``np.bytes_``. + Wraps the [`np.dtypes.BytesDType`][numpy.dtypes.BytesDType] data type. Scalars for this data type are instances of + [`np.bytes_`][numpy.bytes_]. This data type is parametrized by an integral length which specifies size in bytes of each scalar. Because this data type uses null-terminated semantics, indexing into @@ -413,7 +413,7 @@ def _check_scalar(self, data: object) -> TypeGuard[BytesLike]: def _cast_scalar_unchecked(self, data: BytesLike) -> np.bytes_: """ - Cast the provided scalar data to ``np.bytes_``, truncating if necessary. + Cast the provided scalar data to [`np.bytes_`][numpy.bytes_], truncating if necessary. Parameters ---------- @@ -422,7 +422,7 @@ def _cast_scalar_unchecked(self, data: BytesLike) -> np.bytes_: Returns ------- - np.bytes_ + bytes : [`np.bytes_`][numpy.bytes_] The casted data as a NumPy bytes scalar. Notes @@ -450,7 +450,7 @@ def cast_scalar(self, data: object) -> np.bytes_: Returns ------- - ``np.bytes_`` + bytes : [`np.bytes_`][numpy.bytes_] The data cast as a NumPy bytes scalar. Raises @@ -473,7 +473,7 @@ def default_scalar(self) -> np.bytes_: Returns ------- - ``np.bytes_`` + bytes : [`np.bytes_`][numpy.bytes_] The default scalar value. """ return np.bytes_(b"") @@ -502,7 +502,7 @@ def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_: """ - Read a JSON-serializable value as ``np.bytes_``. + Read a JSON-serializable value as [`np.bytes_`][numpy.bytes_]. Parameters ---------- @@ -513,7 +513,7 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_: Returns ------- - ``np.bytes_`` + bytes : [`np.bytes_`][numpy.bytes_] The NumPy bytes scalar obtained from decoding the base64 string. Raises @@ -546,7 +546,7 @@ class RawBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize """ A Zarr data type for arrays containing fixed-length sequences of raw bytes. - Wraps the NumPy ``void`` data type. Scalars for this data type are instances of ``np.void``. + Wraps the NumPy ``void`` data type. Scalars for this data type are instances of [`np.void`][numpy.void]. This data type is parametrized by an integral length which specifies size in bytes of each scalar belonging to this data type. diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py index 2f432a9e0a..99abee5e24 100644 --- a/src/zarr/core/dtype/npy/complex.py +++ b/src/zarr/core/dtype/npy/complex.py @@ -353,8 +353,8 @@ class Complex64(BaseComplex[np.dtypes.Complex64DType, np.complex64]): """ A Zarr data type for arrays containing 64 bit complex floats. - Wraps the ``np.dtypes.Complex64DType`` data type. Scalars for this data type - are instances of ``np.complex64``. + Wraps the [`np.dtypes.Complex64DType`][numpy.dtypes.Complex64DType] data type. Scalars for this data type + are instances of [`np.complex64`][numpy.complex64]. Attributes ---------- @@ -388,8 +388,8 @@ class Complex128(BaseComplex[np.dtypes.Complex128DType, np.complex128], HasEndia """ A Zarr data type for arrays containing 64 bit complex floats. - Wraps the ``np.dtypes.Complex128DType`` data type. Scalars for this data type - are instances of ``np.complex128``. + Wraps the [`np.dtypes.Complex128DType`][numpy.dtypes.Complex128DType] data type. Scalars for this data type + are instances of [`np.complex128`][numpy.complex128]. Attributes ---------- diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py index ae64ba3f43..bedb44b52d 100644 --- a/src/zarr/core/dtype/npy/float.py +++ b/src/zarr/core/dtype/npy/float.py @@ -314,8 +314,8 @@ class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]): """ A Zarr data type for arrays containing 16-bit floating point numbers. - Wraps the ``np.dtypes.Float16DType`` data type. Scalars for this data type are instances - of ``np.float16``. + Wraps the [`np.dtypes.Float16DType`][numpy.dtypes.Float16DType] data type. Scalars for this data type are instances + of [`np.float16`][numpy.float16]. Attributes ---------- @@ -351,8 +351,8 @@ class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]): """ A Zarr data type for arrays containing 32-bit floating point numbers. - Wraps the ``np.dtypes.Float32DType`` data type. Scalars for this data type are instances - of ``np.float32``. + Wraps the [`np.dtypes.Float32DType`][numpy.dtypes.Float32DType] data type. Scalars for this data type are instances + of [`np.float32`][numpy.float32]. Attributes ---------- @@ -388,8 +388,8 @@ class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]): """ A Zarr data type for arrays containing 64-bit floating point numbers. - Wraps the ``np.dtypes.Float64DType`` data type. Scalars for this data type are instances - of ``np.float64``. + Wraps the [`np.dtypes.Float64DType`][numpy.dtypes.Float64DType] data type. Scalars for this data type are instances + of [`np.float64`][numpy.float64]. Attributes ---------- diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py index a8b4f1ad87..6f7ebc2f55 100644 --- a/src/zarr/core/dtype/npy/int.py +++ b/src/zarr/core/dtype/npy/int.py @@ -236,8 +236,8 @@ class Int8(BaseInt[np.dtypes.Int8DType, np.int8]): """ A Zarr data type for arrays containing 8-bit signed integers. - Wraps the ``np.dtypes.Int8DType`` data type. Scalars for this data type are - instances of ``np.int8``. + Wraps the [`np.dtypes.Int8DType`][numpy.dtypes.Int8DType] data type. Scalars for this data type are + instances of [`np.int8`][numpy.int8]. Attributes ---------- @@ -393,7 +393,7 @@ class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]): """ A Zarr data type for arrays containing 8-bit unsigned integers. - Wraps the ``np.dtypes.UInt8DType`` data type. Scalars for this data type are instances of ``np.uint8``. + Wraps the [`np.dtypes.UInt8DType`][numpy.dtypes.UInt8DType] data type. Scalars for this data type are instances of [`np.uint8`][numpy.uint8]. Attributes ---------- @@ -539,8 +539,8 @@ class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness): """ A Zarr data type for arrays containing 16-bit signed integers. - Wraps the ``np.dtypes.Int16DType`` data type. Scalars for this data type are instances of - ``np.int16``. + Wraps the [`np.dtypes.Int16DType`][numpy.dtypes.Int16DType] data type. Scalars for this data type are instances of + [`np.int16`][numpy.int16]. Attributes ---------- @@ -701,8 +701,8 @@ class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness): """ A Zarr data type for arrays containing 16-bit unsigned integers. - Wraps the ``np.dtypes.UInt16DType`` data type. Scalars for this data type are instances of - ``np.uint16``. + Wraps the [`np.dtypes.UInt16DType`][numpy.dtypes.UInt16DType] data type. Scalars for this data type are instances of + [`np.uint16`][numpy.uint16]. Attributes ---------- @@ -863,8 +863,8 @@ class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness): """ A Zarr data type for arrays containing 32-bit signed integers. - Wraps the ``np.dtypes.Int32DType`` data type. Scalars for this data type are instances of - ``np.int32``. + Wraps the [`np.dtypes.Int32DType`][numpy.dtypes.Int32DType] data type. Scalars for this data type are instances of + [`np.int32`][numpy.int32]. Attributes ---------- @@ -1046,8 +1046,8 @@ class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness): """ A Zarr data type for arrays containing 32-bit unsigned integers. - Wraps the ``np.dtypes.UInt32DType`` data type. Scalars for this data type are instances of - ``np.uint32``. + Wraps the [`np.dtypes.UInt32DType`][numpy.dtypes.UInt32DType] data type. Scalars for this data type are instances of + [`np.uint32`][numpy.uint32]. Attributes ---------- @@ -1204,8 +1204,8 @@ class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness): """ A Zarr data type for arrays containing 64-bit signed integers. - Wraps the ``np.dtypes.Int64DType`` data type. Scalars for this data type are instances of - ``np.int64``. + Wraps the [`np.dtypes.Int64DType`][numpy.dtypes.Int64DType] data type. Scalars for this data type are instances of + [`np.int64`][numpy.int64]. Attributes ---------- @@ -1362,8 +1362,8 @@ class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness): """ A Zarr data type for arrays containing 64-bit unsigned integers. - Wraps the ``np.dtypes.UInt64DType`` data type. Scalars for this data type - are instances of ``np.uint64``. + Wraps the [`np.dtypes.UInt64DType`][numpy.dtypes.UInt64DType] data type. Scalars for this data type + are instances of [`np.uint64`][numpy.uint64]. Attributes ---------- From 8c7f82994282e21c59d781e5a8b28118a303f909 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 15:00:02 -0400 Subject: [PATCH 57/64] Show source --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 85a5aae646..0259fd5eec 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -126,7 +126,7 @@ plugins: separate_signature: true show_root_heading: true show_signature_annotations: true - show_source: false + show_source: true show_symbol_type_toc: true signature_crossrefs: true extensions: From 0aaf48f109a545f491e8b99d3f715014dfde222a Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 15:08:29 -0400 Subject: [PATCH 58/64] Update changelog entries to markdown --- changes/{2859.removal.rst => 2859.removal.md} | 0 changes/3130.feature.md | 1 - changes/{3310.feature.rst => 3310.feature.md} | 0 changes/{3376.misc.rst => 3376.misc.md} | 0 changes/{3403.misc.rst => 3403.misc.md} | 0 changes/{3411.bugfix.rst => 3411.bugfix.md} | 0 changes/{3422.bugfix.rst => 3422.bugfix.md} | 0 changes/{3425.bugfix.rst => 3425.bugfix.md} | 0 changes/{3428.bugfix.rst => 3428.bugfix.md} | 0 changes/{3431.bugfix.rst => 3431.bugfix.md} | 0 changes/{3448.bugfix.rst => 3448.bugfix.md} | 0 changes/{3449.misc.rst => 3449.misc.md} | 0 12 files changed, 1 deletion(-) rename changes/{2859.removal.rst => 2859.removal.md} (100%) delete mode 100644 changes/3130.feature.md rename changes/{3310.feature.rst => 3310.feature.md} (100%) rename changes/{3376.misc.rst => 3376.misc.md} (100%) rename changes/{3403.misc.rst => 3403.misc.md} (100%) rename changes/{3411.bugfix.rst => 3411.bugfix.md} (100%) rename changes/{3422.bugfix.rst => 3422.bugfix.md} (100%) rename changes/{3425.bugfix.rst => 3425.bugfix.md} (100%) rename changes/{3428.bugfix.rst => 3428.bugfix.md} (100%) rename changes/{3431.bugfix.rst => 3431.bugfix.md} (100%) rename changes/{3448.bugfix.rst => 3448.bugfix.md} (100%) rename changes/{3449.misc.rst => 3449.misc.md} (100%) diff --git a/changes/2859.removal.rst b/changes/2859.removal.md similarity index 100% rename from changes/2859.removal.rst rename to changes/2859.removal.md diff --git a/changes/3130.feature.md b/changes/3130.feature.md deleted file mode 100644 index 81e43ab2da..0000000000 --- a/changes/3130.feature.md +++ /dev/null @@ -1 +0,0 @@ -Port more stateful testing actions from [Icechunk](https://icechunk.io). diff --git a/changes/3310.feature.rst b/changes/3310.feature.md similarity index 100% rename from changes/3310.feature.rst rename to changes/3310.feature.md diff --git a/changes/3376.misc.rst b/changes/3376.misc.md similarity index 100% rename from changes/3376.misc.rst rename to changes/3376.misc.md diff --git a/changes/3403.misc.rst b/changes/3403.misc.md similarity index 100% rename from changes/3403.misc.rst rename to changes/3403.misc.md diff --git a/changes/3411.bugfix.rst b/changes/3411.bugfix.md similarity index 100% rename from changes/3411.bugfix.rst rename to changes/3411.bugfix.md diff --git a/changes/3422.bugfix.rst b/changes/3422.bugfix.md similarity index 100% rename from changes/3422.bugfix.rst rename to changes/3422.bugfix.md diff --git a/changes/3425.bugfix.rst b/changes/3425.bugfix.md similarity index 100% rename from changes/3425.bugfix.rst rename to changes/3425.bugfix.md diff --git a/changes/3428.bugfix.rst b/changes/3428.bugfix.md similarity index 100% rename from changes/3428.bugfix.rst rename to changes/3428.bugfix.md diff --git a/changes/3431.bugfix.rst b/changes/3431.bugfix.md similarity index 100% rename from changes/3431.bugfix.rst rename to changes/3431.bugfix.md diff --git a/changes/3448.bugfix.rst b/changes/3448.bugfix.md similarity index 100% rename from changes/3448.bugfix.rst rename to changes/3448.bugfix.md diff --git a/changes/3449.misc.rst b/changes/3449.misc.md similarity index 100% rename from changes/3449.misc.rst rename to changes/3449.misc.md From e6b629ff165ba43ca8c359332193d39631466769 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 15:16:15 -0400 Subject: [PATCH 59/64] Strict RTD build --- .readthedocs.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 96bc537bd3..894778c5a4 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,7 +11,9 @@ build: then towncrier build --version Unreleased --yes; fi - + build: + html: + - mkdocs build --strict --site-dir $READTHEDOCS_OUTPUT/html mkdocs: configuration: mkdocs.yml @@ -21,3 +23,4 @@ python: path: . extra_requirements: - docs + - remote From 14db861b9372fe77768f047ee326cee76eb081a5 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 15:45:07 -0400 Subject: [PATCH 60/64] Fix numpydoc errors --- src/zarr/api/asynchronous.py | 2 +- src/zarr/core/array.py | 34 +++++++++++++++++----------------- src/zarr/core/group.py | 8 ++++---- src/zarr/core/indexing.py | 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 9b2f3d4f96..7761921584 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -295,7 +295,7 @@ async def load( See Also -------- - save, savez + save Notes ----- diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index a070f02ad1..b1300b55cb 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1925,7 +1925,7 @@ def info(self) -> Any: ArrayInfo Related - -------- + ------- [zarr.AsyncArray.info_complete][] - All information about a group, including dynamic information like the number of bytes and chunks written. @@ -1963,7 +1963,7 @@ async def info_complete(self) -> Any: ArrayInfo Related - -------- + ------- [zarr.AsyncArray.info][] - A property giving just the statically known information about an array. """ return self._info( @@ -2788,7 +2788,7 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: methods listed under Related. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection] [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], @@ -2889,7 +2889,7 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: methods listed under Related. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], @@ -3016,7 +3016,7 @@ def get_basic_selection( using the alternative notation. Related - -------- + ------- [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], @@ -3126,7 +3126,7 @@ def set_basic_selection( alternative notation. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], @@ -3254,7 +3254,7 @@ def get_orthogonal_selection( Slices with step > 1 are supported, but slices with negative step are not. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], @@ -3374,7 +3374,7 @@ def set_orthogonal_selection( Slices with step > 1 are supported, but slices with negative step are not. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], @@ -3461,7 +3461,7 @@ def get_mask_selection( arrays by calling `np.nonzero`. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [set_mask_selection][zarr.Array.set_mask_selection], @@ -3551,7 +3551,7 @@ def set_mask_selection( arrays by calling `np.nonzero`. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], @@ -3639,7 +3639,7 @@ def get_coordinate_selection( each coordinate array after broadcasting. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], @@ -3731,7 +3731,7 @@ def set_coordinate_selection( of the array. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], @@ -3855,7 +3855,7 @@ def get_block_selection( [23, 24, 25, 26, 27, 28]]) Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], @@ -3955,7 +3955,7 @@ def set_block_selection( Slices are supported. However, only with a step size of one. Related - -------- + ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], @@ -4119,7 +4119,7 @@ def info(self) -> Any: ArrayInfo Related - -------- + ------- [zarr.Array.info_complete][] - All information about a group, including dynamic information like the number of bytes and chunks written. @@ -4155,7 +4155,7 @@ def info_complete(self) -> Any: ArrayInfo Related - -------- + ------- [zarr.Array.info][] - The statically known subset of metadata about an array. """ return sync(self._async_array.info_complete()) @@ -4178,7 +4178,7 @@ async def _shards_initialized( The keys of the chunks that have been initialized. Related - -------- + ------- [nchunks_initialized][zarr.Array.nchunks_initialized] """ diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index f4d44864ce..b7c46d3370 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -865,7 +865,7 @@ def info(self) -> Any: GroupInfo Related - -------- + ------- [zarr.AsyncGroup.info_complete][] All information about a group, including dynamic information """ @@ -889,7 +889,7 @@ async def info_complete(self) -> Any: GroupInfo Related - -------- + ------- [zarr.AsyncGroup.info][] """ members = [x[1].metadata async for x in self.members(max_depth=None)] @@ -2076,7 +2076,7 @@ def info(self) -> Any: GroupInfo Related - -------- + ------- [zarr.Group.info_complete][] All information about a group, including dynamic information like the children members. @@ -2095,7 +2095,7 @@ def info_complete(self) -> Any: GroupInfo Related - -------- + ------- [zarr.Group.info][] """ return self._sync(self._async_group.info_complete()) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index be60f4208f..243096b029 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -183,7 +183,7 @@ def _iter_regions( The linear indexing order to use. Yields - ------- + ------ Iterator[tuple[slice, ...]] An iterator over tuples of slices, where each slice spans a separate contiguous region From d70be1fb76c1912063e6d0685074532caef936ed Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 15:45:40 -0400 Subject: [PATCH 61/64] Allow related section --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7940dfa51c..d0f9efc3e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -407,7 +407,6 @@ ignore = [ [tool.numpydoc_validation] # See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks checks = [ - "GL06", "GL07", # Currently broken; see https://github.com/numpy/numpydoc/issues/573 # "GL09", From 091ff559b8c86e8535104e5bf50e43a956b24c1a Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 14 Sep 2025 17:07:38 -0400 Subject: [PATCH 62/64] Update numpydoc validation config --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d0f9efc3e5..046758052d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -407,7 +407,9 @@ ignore = [ [tool.numpydoc_validation] # See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks checks = [ - "GL07", + # Requires third-party support; see https://github.com/numpy/numpydoc/issues/463 + # "GL06", + # "GL07", # Currently broken; see https://github.com/numpy/numpydoc/issues/573 # "GL09", "GL10", From aef04d644a75bcd173e88116ec11be579239c33d Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 19 Sep 2025 10:48:14 -0400 Subject: [PATCH 63/64] Fix docstring failures --- src/zarr/api/asynchronous.py | 20 ++++++++++---------- src/zarr/api/synchronous.py | 24 ++++++++++++------------ src/zarr/core/array.py | 1 + 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index e5f1f2f2e3..881341ace2 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -352,8 +352,8 @@ async def open( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Additional parameters are passed through to [zarr.creation.open_array][] or - [zarr.api.asynchronous.open_group][]. + Additional parameters are passed through to [`zarr.creation.open_array`][] or + [`open_group`][zarr.api.asynchronous.open_group]. Returns ------- @@ -1105,7 +1105,7 @@ async def empty( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Notes ----- @@ -1127,7 +1127,7 @@ async def empty_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1160,7 +1160,7 @@ async def full( fill_value : scalar Fill value. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1181,7 +1181,7 @@ async def full_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1205,7 +1205,7 @@ async def ones( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1225,7 +1225,7 @@ async def ones_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1329,7 +1329,7 @@ async def zeros( shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1349,7 +1349,7 @@ async def zeros_like( a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 59e1d892f0..728822a326 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -199,7 +199,7 @@ def open( the backend implementation. Ignored otherwise. **kwargs Additional parameters are passed through to [`zarr.creation.open_array`][] or - [`zarr.api.synchronous.open_group`][]. + [`open_group`][zarr.api.asynchronous.open_group]. Returns ------- @@ -291,7 +291,7 @@ def save_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Passed through to [`create`][zarr.api.synchronous.create], e.g., compressor. + Passed through to [`create`][zarr.api.asynchronous.create], e.g., compressor. """ return sync( async_api.save_array( @@ -383,7 +383,7 @@ def array(data: npt.ArrayLike | Array, **kwargs: Any) -> Array: data : array_like The data to fill the array with. **kwargs - Passed through to [`create`][zarr.api.synchronous.create]. + Passed through to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1196,7 +1196,7 @@ def empty(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1223,7 +1223,7 @@ def empty_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1250,7 +1250,7 @@ def full(shape: tuple[int, ...], fill_value: Any, **kwargs: Any) -> Array: fill_value : scalar Fill value. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- @@ -1270,7 +1270,7 @@ def full_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1289,7 +1289,7 @@ def ones(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1308,7 +1308,7 @@ def ones_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1344,7 +1344,7 @@ def open_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Any keyword arguments to pass to [zarr.api.synchronous.create][]. + Any keyword arguments to pass to [`create`][zarr.api.asynchronous.create]. Returns @@ -1396,7 +1396,7 @@ def zeros(shape: tuple[int, ...], **kwargs: Any) -> Array: shape : int or tuple of int Shape of the empty array. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- @@ -1415,7 +1415,7 @@ def zeros_like(a: ArrayLike, **kwargs: Any) -> Array: a : array-like The array to create an empty array like. **kwargs - Keyword arguments passed to [zarr.api.asynchronous.create][]. + Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 69e6ac529e..6aefc38031 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -4756,6 +4756,7 @@ async def create_array( chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, + and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. From 9404483641ebefcd8e03ffa42e7752686966b460 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 19 Sep 2025 11:52:18 -0400 Subject: [PATCH 64/64] Update config ref --- src/zarr/core/group.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 98406100d8..e71c55c10f 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -2506,13 +2506,13 @@ def create( returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. + in [`zarr.config`][]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. + in [`zarr.config`][]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. @@ -2521,7 +2521,7 @@ def create( Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + in [`zarr.config`][]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -2531,7 +2531,7 @@ def create( is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. - This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional